diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 15:17:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 15:17:17 -0700 |
commit | 80cee03bf1d626db0278271b505d7f5febb37bba (patch) | |
tree | 6fc86272106f526a9d07343c524612aa493539e6 | |
parent | aae3dbb4776e7916b6cd442d00159bea27a695c1 (diff) | |
parent | 2d45a7e89833f88b38112292ff227af437f81f2f (diff) | |
download | linux-80cee03bf1d626db0278271b505d7f5febb37bba.tar.bz2 |
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu:
"Here is the crypto update for 4.14:
API:
- Defer scompress scratch buffer allocation to first use.
- Add __crypto_xor that takes separte src and dst operands.
- Add ahash multiple registration interface.
- Revamped aead/skcipher algif code to fix async IO properly.
Drivers:
- Add non-SIMD fallback code path on ARM for SVE.
- Add AMD Security Processor framework for ccp.
- Add support for RSA in ccp.
- Add XTS-AES-256 support for CCP version 5.
- Add support for PRNG in sun4i-ss.
- Add support for DPAA2 in caam.
- Add ARTPEC crypto support.
- Add Freescale RNGC hwrng support.
- Add Microchip / Atmel ECC driver.
- Add support for STM32 HASH module"
* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (116 commits)
crypto: af_alg - get_page upon reassignment to TX SGL
crypto: cavium/nitrox - Fix an error handling path in 'nitrox_probe()'
crypto: inside-secure - fix an error handling path in safexcel_probe()
crypto: rockchip - Don't dequeue the request when device is busy
crypto: cavium - add release_firmware to all return case
crypto: sahara - constify platform_device_id
MAINTAINERS: Add ARTPEC crypto maintainer
crypto: axis - add ARTPEC-6/7 crypto accelerator driver
crypto: hash - add crypto_(un)register_ahashes()
dt-bindings: crypto: add ARTPEC crypto
crypto: algif_aead - fix comment regarding memory layout
crypto: ccp - use dma_mapping_error to check map error
lib/mpi: fix build with clang
crypto: sahara - Remove leftover from previous used spinlock
crypto: sahara - Fix dma unmap direction
crypto: af_alg - consolidation of duplicate code
crypto: caam - Remove unused dentry members
crypto: ccp - select CONFIG_CRYPTO_RSA
crypto: ccp - avoid uninitialized variable warning
crypto: serpent - improve __serpent_setkey with UBSAN
...
136 files changed, 11758 insertions, 3169 deletions
diff --git a/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt b/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt new file mode 100644 index 000000000000..d9cca4875bd6 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt @@ -0,0 +1,16 @@ +Axis crypto engine with PDMA interface. + +Required properties: +- compatible : Should be one of the following strings: + "axis,artpec6-crypto" for the version in the Axis ARTPEC-6 SoC + "axis,artpec7-crypto" for the version in the Axis ARTPEC-7 SoC. +- reg: Base address and size for the PDMA register area. +- interrupts: Interrupt handle for the PDMA interrupt line. + +Example: + +crypto@f4264000 { + compatible = "axis,artpec6-crypto"; + reg = <0xf4264000 0x1000>; + interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>; +}; diff --git a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt index f2aab3dc2b52..7de1a9674c70 100644 --- a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt +++ b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt @@ -66,3 +66,16 @@ sha@f8034000 { dmas = <&dma1 2 17>; dma-names = "tx"; }; + +* Eliptic Curve Cryptography (I2C) + +Required properties: +- compatible : must be "atmel,atecc508a". +- reg: I2C bus address of the device. +- clock-frequency: must be present in the i2c controller node. + +Example: +atecc508a@C0 { + compatible = "atmel,atecc508a"; + reg = <0xC0>; +}; diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt b/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt new file mode 100644 index 000000000000..04fc246f02f7 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt @@ -0,0 +1,30 @@ +* STMicroelectronics STM32 HASH + +Required properties: +- compatible: Should contain entries for this and backward compatible + HASH versions: + - "st,stm32f456-hash" for stm32 F456. + - "st,stm32f756-hash" for stm32 F756. +- reg: The address and length of the peripheral registers space +- interrupts: the interrupt specifier for the HASH +- clocks: The input clock of the HASH instance + +Optional properties: +- resets: The input reset of the HASH instance +- dmas: DMA specifiers for the HASH. See the DMA client binding, + Documentation/devicetree/bindings/dma/dma.txt +- dma-names: DMA request name. Should be "in" if a dma is present. +- dma-maxburst: Set number of maximum dma burst supported + +Example: + +hash1: hash@50060400 { + compatible = "st,stm32f756-hash"; + reg = <0x50060400 0x400>; + interrupts = <80>; + clocks = <&rcc 0 STM32F7_AHB2_CLOCK(HASH)>; + resets = <&rcc STM32F7_AHB2_RESET(HASH)>; + dmas = <&dma2 7 2 0x400 0x0>; + dma-names = "in"; + dma-maxburst = <0>; +}; diff --git a/Documentation/devicetree/bindings/rng/imx-rngc.txt b/Documentation/devicetree/bindings/rng/imx-rngc.txt new file mode 100644 index 000000000000..93c7174a7bed --- /dev/null +++ b/Documentation/devicetree/bindings/rng/imx-rngc.txt @@ -0,0 +1,21 @@ +Freescale RNGC (Random Number Generator Version C) + +The driver also supports version B, which is mostly compatible +to version C. + +Required properties: +- compatible : should be one of + "fsl,imx25-rngb" + "fsl,imx35-rngc" +- reg : offset and length of the register set of this block +- interrupts : the interrupt number for the RNGC block +- clocks : the RNGC clk source + +Example: + +rng@53fb0000 { + compatible = "fsl,imx25-rngb"; + reg = <0x53fb0000 0x4000>; + interrupts = <22>; + clocks = <&trng_clk>; +}; diff --git a/MAINTAINERS b/MAINTAINERS index 961423bac24c..11dde284a426 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1162,6 +1162,7 @@ L: linux-arm-kernel@axis.com F: arch/arm/mach-artpec F: arch/arm/boot/dts/artpec6* F: drivers/clk/axis +F: drivers/crypto/axis F: drivers/pinctrl/pinctrl-artpec* F: Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt @@ -8770,6 +8771,12 @@ F: drivers/dma/at_hdmac.c F: drivers/dma/at_hdmac_regs.h F: include/linux/platform_data/dma-atmel.h +MICROCHIP / ATMEL ECC DRIVER +M: Tudor Ambarus <tudor.ambarus@microchip.com> +L: linux-crypto@vger.kernel.org +S: Maintained +F: drivers/crypto/atmel-ecc.* + MICROCHIP / ATMEL ISC DRIVER M: Songjun Wu <songjun.wu@microchip.com> L: linux-media@vger.kernel.org diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index b9adedcc5b2e..ec72752d5668 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -94,14 +94,15 @@ config CRYPTO_AES_ARM_CE ARMv8 Crypto Extensions config CRYPTO_GHASH_ARM_CE - tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions" + tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_CRYPTD help Use an implementation of GHASH (used by the GCM AEAD chaining mode) that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) - that is part of the ARMv8 Crypto Extensions + that is part of the ARMv8 Crypto Extensions, or a slower variant that + uses the vmull.p8 instruction that is part of the basic NEON ISA. config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 0f966a8ca1ce..d0a9cec73707 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -285,9 +285,7 @@ static int ctr_encrypt(struct skcipher_request *req) ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, num_rounds(ctx), blocks, walk.iv); - if (tdst != tsrc) - memcpy(tdst, tsrc, nbytes); - crypto_xor(tdst, tail, nbytes); + crypto_xor_cpy(tdst, tsrc, tail, nbytes); err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index c817a86c4ca8..54b384084637 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> +#include <asm/cache.h> .text .align 5 @@ -32,19 +33,19 @@ .endif .endm - .macro __load, out, in, idx + .macro __load, out, in, idx, sz, op .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 - ldr \out, [ttab, \in, lsr #(8 * \idx) - 2] + ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] .else - ldr \out, [ttab, \in, lsl #2] + ldr\op \out, [ttab, \in, lsl #\sz] .endif .endm - .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op __select \out0, \in0, 0 __select t0, \in1, 1 - __load \out0, \out0, 0 - __load t0, t0, 1 + __load \out0, \out0, 0, \sz, \op + __load t0, t0, 1, \sz, \op .if \enc __select \out1, \in1, 0 @@ -53,10 +54,10 @@ __select \out1, \in3, 0 __select t1, \in0, 1 .endif - __load \out1, \out1, 0 + __load \out1, \out1, 0, \sz, \op __select t2, \in2, 2 - __load t1, t1, 1 - __load t2, t2, 2 + __load t1, t1, 1, \sz, \op + __load t2, t2, 2, \sz, \op eor \out0, \out0, t0, ror #24 @@ -68,9 +69,9 @@ __select \t3, \in1, 2 __select \t4, \in2, 3 .endif - __load \t3, \t3, 2 - __load t0, t0, 3 - __load \t4, \t4, 3 + __load \t3, \t3, 2, \sz, \op + __load t0, t0, 3, \sz, \op + __load \t4, \t4, 3, \sz, \op eor \out1, \out1, t1, ror #24 eor \out0, \out0, t2, ror #16 @@ -82,14 +83,14 @@ eor \out1, \out1, t2 .endm - .macro fround, out0, out1, out2, out3, in0, in1, in2, in3 - __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 - __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op .endm - .macro iround, out0, out1, out2, out3, in0, in1, in2, in3 - __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 - __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op .endm .macro __rev, out, in @@ -114,7 +115,7 @@ .endif .endm - .macro do_crypt, round, ttab, ltab + .macro do_crypt, round, ttab, ltab, bsz push {r3-r11, lr} ldr r4, [in] @@ -146,9 +147,12 @@ 1: subs rounds, rounds, #4 \round r8, r9, r10, r11, r4, r5, r6, r7 - __adrl ttab, \ltab, ls + bls 2f \round r4, r5, r6, r7, r8, r9, r10, r11 - bhi 0b + b 0b + +2: __adrl ttab, \ltab + \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b #ifdef CONFIG_CPU_BIG_ENDIAN __rev r4, r4 @@ -170,10 +174,48 @@ .ltorg .endm + .align L1_CACHE_SHIFT + .type __aes_arm_inverse_sbox, %object +__aes_arm_inverse_sbox: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox + ENTRY(__aes_arm_encrypt) - do_crypt fround, crypto_ft_tab, crypto_fl_tab + do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 ENDPROC(__aes_arm_encrypt) + .align 5 ENTRY(__aes_arm_decrypt) - do_crypt iround, crypto_it_tab, crypto_il_tab + do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 ENDPROC(__aes_arm_decrypt) diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index c76377961444..18768f330449 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -221,9 +221,8 @@ static int ctr_encrypt(struct skcipher_request *req) u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; - if (dst != src) - memcpy(dst, src, walk.total % AES_BLOCK_SIZE); - crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE); + crypto_xor_cpy(dst, src, final, + walk.total % AES_BLOCK_SIZE); err = skcipher_walk_done(&walk, 0); break; diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index f6ab8bcc9efe..2f78c10b1881 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -1,7 +1,7 @@ /* - * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. + * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions. * - * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -12,40 +12,162 @@ #include <asm/assembler.h> SHASH .req q0 - SHASH2 .req q1 - T1 .req q2 - T2 .req q3 - MASK .req q4 - XL .req q5 - XM .req q6 - XH .req q7 - IN1 .req q7 + T1 .req q1 + XL .req q2 + XM .req q3 + XH .req q4 + IN1 .req q4 SHASH_L .req d0 SHASH_H .req d1 - SHASH2_L .req d2 - T1_L .req d4 - MASK_L .req d8 - XL_L .req d10 - XL_H .req d11 - XM_L .req d12 - XM_H .req d13 - XH_L .req d14 + T1_L .req d2 + T1_H .req d3 + XL_L .req d4 + XL_H .req d5 + XM_L .req d6 + XM_H .req d7 + XH_L .req d8 + + t0l .req d10 + t0h .req d11 + t1l .req d12 + t1h .req d13 + t2l .req d14 + t2h .req d15 + t3l .req d16 + t3h .req d17 + t4l .req d18 + t4h .req d19 + + t0q .req q5 + t1q .req q6 + t2q .req q7 + t3q .req q8 + t4q .req q9 + T2 .req q9 + + s1l .req d20 + s1h .req d21 + s2l .req d22 + s2h .req d23 + s3l .req d24 + s3h .req d25 + s4l .req d26 + s4h .req d27 + + MASK .req d28 + SHASH2_p8 .req d28 + + k16 .req d29 + k32 .req d30 + k48 .req d31 + SHASH2_p64 .req d31 .text .fpu crypto-neon-fp-armv8 + .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 + vmull.p64 \rd, \rn, \rm + .endm + /* - * void pmull_ghash_update(int blocks, u64 dg[], const char *src, - * struct ghash_key const *k, const char *head) + * This implementation of 64x64 -> 128 bit polynomial multiplication + * using vmull.p8 instructions (8x8 -> 16) is taken from the paper + * "Fast Software Polynomial Multiplication on ARM Processors Using + * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and + * Ricardo Dahab (https://hal.inria.fr/hal-01506572) + * + * It has been slightly tweaked for in-order performance, and to allow + * 'rq' to overlap with 'ad' or 'bd'. */ -ENTRY(pmull_ghash_update) - vld1.64 {SHASH}, [r3] + .macro __pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l + vext.8 t0l, \ad, \ad, #1 @ A1 + .ifc \b1, t4l + vext.8 t4l, \bd, \bd, #1 @ B1 + .endif + vmull.p8 t0q, t0l, \bd @ F = A1*B + vext.8 t1l, \ad, \ad, #2 @ A2 + vmull.p8 t4q, \ad, \b1 @ E = A*B1 + .ifc \b2, t3l + vext.8 t3l, \bd, \bd, #2 @ B2 + .endif + vmull.p8 t1q, t1l, \bd @ H = A2*B + vext.8 t2l, \ad, \ad, #3 @ A3 + vmull.p8 t3q, \ad, \b2 @ G = A*B2 + veor t0q, t0q, t4q @ L = E + F + .ifc \b3, t4l + vext.8 t4l, \bd, \bd, #3 @ B3 + .endif + vmull.p8 t2q, t2l, \bd @ J = A3*B + veor t0l, t0l, t0h @ t0 = (L) (P0 + P1) << 8 + veor t1q, t1q, t3q @ M = G + H + .ifc \b4, t3l + vext.8 t3l, \bd, \bd, #4 @ B4 + .endif + vmull.p8 t4q, \ad, \b3 @ I = A*B3 + veor t1l, t1l, t1h @ t1 = (M) (P2 + P3) << 16 + vmull.p8 t3q, \ad, \b4 @ K = A*B4 + vand t0h, t0h, k48 + vand t1h, t1h, k32 + veor t2q, t2q, t4q @ N = I + J + veor t0l, t0l, t0h + veor t1l, t1l, t1h + veor t2l, t2l, t2h @ t2 = (N) (P4 + P5) << 24 + vand t2h, t2h, k16 + veor t3l, t3l, t3h @ t3 = (K) (P6 + P7) << 32 + vmov.i64 t3h, #0 + vext.8 t0q, t0q, t0q, #15 + veor t2l, t2l, t2h + vext.8 t1q, t1q, t1q, #14 + vmull.p8 \rq, \ad, \bd @ D = A*B + vext.8 t2q, t2q, t2q, #13 + vext.8 t3q, t3q, t3q, #12 + veor t0q, t0q, t1q + veor t2q, t2q, t3q + veor \rq, \rq, t0q + veor \rq, \rq, t2q + .endm + + // + // PMULL (64x64->128) based reduction for CPUs that can do + // it in a single instruction. + // + .macro __pmull_reduce_p64 + vmull.p64 T1, XL_L, MASK + + veor XH_L, XH_L, XM_H + vext.8 T1, T1, T1, #8 + veor XL_H, XL_H, XM_L + veor T1, T1, XL + + vmull.p64 XL, T1_H, MASK + .endm + + // + // Alternative reduction for CPUs that lack support for the + // 64x64->128 PMULL instruction + // + .macro __pmull_reduce_p8 + veor XL_H, XL_H, XM_L + veor XH_L, XH_L, XM_H + + vshl.i64 T1, XL, #57 + vshl.i64 T2, XL, #62 + veor T1, T1, T2 + vshl.i64 T2, XL, #63 + veor T1, T1, T2 + veor XL_H, XL_H, T1_L + veor XH_L, XH_L, T1_H + + vshr.u64 T1, XL, #1 + veor XH, XH, XL + veor XL, XL, T1 + vshr.u64 T1, T1, #6 + vshr.u64 XL, XL, #1 + .endm + + .macro ghash_update, pn vld1.64 {XL}, [r1] - vmov.i8 MASK, #0xe1 - vext.8 SHASH2, SHASH, SHASH, #8 - vshl.u64 MASK, MASK, #57 - veor SHASH2, SHASH2, SHASH /* do the head block first, if supplied */ ldr ip, [sp] @@ -62,33 +184,59 @@ ENTRY(pmull_ghash_update) #ifndef CONFIG_CPU_BIG_ENDIAN vrev64.8 T1, T1 #endif - vext.8 T2, XL, XL, #8 vext.8 IN1, T1, T1, #8 - veor T1, T1, T2 + veor T1_L, T1_L, XL_H veor XL, XL, IN1 - vmull.p64 XH, SHASH_H, XL_H @ a1 * b1 + __pmull_\pn XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h @ a1 * b1 veor T1, T1, XL - vmull.p64 XL, SHASH_L, XL_L @ a0 * b0 - vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0) + __pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0 + __pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0) - vext.8 T1, XL, XH, #8 - veor T2, XL, XH + veor T1, XL, XH veor XM, XM, T1 - veor XM, XM, T2 - vmull.p64 T2, XL_L, MASK_L - vmov XH_L, XM_H - vmov XM_H, XL_L + __pmull_reduce_\pn - veor XL, XM, T2 - vext.8 T2, XL, XL, #8 - vmull.p64 XL, XL_L, MASK_L - veor T2, T2, XH - veor XL, XL, T2 + veor T1, T1, XH + veor XL, XL, T1 bne 0b vst1.64 {XL}, [r1] bx lr -ENDPROC(pmull_ghash_update) + .endm + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update_p64) + vld1.64 {SHASH}, [r3] + veor SHASH2_p64, SHASH_L, SHASH_H + + vmov.i8 MASK, #0xe1 + vshl.u64 MASK, MASK, #57 + + ghash_update p64 +ENDPROC(pmull_ghash_update_p64) + +ENTRY(pmull_ghash_update_p8) + vld1.64 {SHASH}, [r3] + veor SHASH2_p8, SHASH_L, SHASH_H + + vext.8 s1l, SHASH_L, SHASH_L, #1 + vext.8 s2l, SHASH_L, SHASH_L, #2 + vext.8 s3l, SHASH_L, SHASH_L, #3 + vext.8 s4l, SHASH_L, SHASH_L, #4 + vext.8 s1h, SHASH_H, SHASH_H, #1 + vext.8 s2h, SHASH_H, SHASH_H, #2 + vext.8 s3h, SHASH_H, SHASH_H, #3 + vext.8 s4h, SHASH_H, SHASH_H, #4 + + vmov.i64 k16, #0xffff + vmov.i64 k32, #0xffffffff + vmov.i64 k48, #0xffffffffffff + + ghash_update p8 +ENDPROC(pmull_ghash_update_p8) diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index 6bac8bea9f1e..d9bb52cae2ac 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -22,6 +22,7 @@ MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("ghash"); #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 @@ -41,8 +42,17 @@ struct ghash_async_ctx { struct cryptd_ahash *cryptd_tfm; }; -asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, - struct ghash_key const *k, const char *head); +asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); + +asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); + +static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); static int ghash_init(struct shash_desc *desc) { @@ -312,6 +322,14 @@ static int __init ghash_ce_mod_init(void) { int err; + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + if (elf_hwcap2 & HWCAP2_PMULL) + pmull_ghash_update = pmull_ghash_update_p64; + else + pmull_ghash_update = pmull_ghash_update_p8; + err = crypto_register_shash(&ghash_alg); if (err) return err; @@ -332,5 +350,5 @@ static void __exit ghash_ce_mod_exit(void) crypto_unregister_shash(&ghash_alg); } -module_cpu_feature_match(PMULL, ghash_ce_mod_init); +module_init(ghash_ce_mod_init); module_exit(ghash_ce_mod_exit); diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index d92293747d63..7ca54a76f6b9 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -18,18 +18,23 @@ config CRYPTO_SHA512_ARM64 config CRYPTO_SHA1_ARM64_CE tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" - depends on ARM64 && KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON select CRYPTO_HASH + select CRYPTO_SHA1 config CRYPTO_SHA2_ARM64_CE tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)" - depends on ARM64 && KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON select CRYPTO_HASH + select CRYPTO_SHA256_ARM64 config CRYPTO_GHASH_ARM64_CE - tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions" - depends on ARM64 && KERNEL_MODE_NEON + tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" + depends on KERNEL_MODE_NEON select CRYPTO_HASH + select CRYPTO_GF128MUL + select CRYPTO_AES + select CRYPTO_AES_ARM64 config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" @@ -49,25 +54,29 @@ config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI + select CRYPTO_AES_ARM64 config CRYPTO_AES_ARM64_CE_CCM tristate "AES in CCM mode using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI select CRYPTO_AES_ARM64_CE + select CRYPTO_AES_ARM64 select CRYPTO_AEAD config CRYPTO_AES_ARM64_CE_BLK tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" - depends on ARM64 && KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64_CE + select CRYPTO_AES_ARM64 select CRYPTO_SIMD config CRYPTO_AES_ARM64_NEON_BLK tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" - depends on ARM64 && KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER + select CRYPTO_AES_ARM64 select CRYPTO_AES select CRYPTO_SIMD @@ -82,6 +91,7 @@ config CRYPTO_AES_ARM64_BS depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64_NEON_BLK + select CRYPTO_AES_ARM64 select CRYPTO_SIMD endif diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 3363560c79b7..e3a375c4cb83 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -1,7 +1,7 @@ /* * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions * - * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) beq 8f /* out of input? */ cbnz w8, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.16b}, [x4] /* load first round key */ +1: ld1 {v3.4s}, [x4] /* load first round key */ prfm pldl1strm, [x1] cmp w5, #12 /* which key size? */ add x6, x4, #16 @@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) mov v5.16b, v3.16b b 4f 2: mov v4.16b, v3.16b - ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ + ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ 3: aese v0.16b, v4.16b aesmc v0.16b, v0.16b -4: ld1 {v3.16b}, [x6], #16 /* load next round key */ +4: ld1 {v3.4s}, [x6], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b -5: ld1 {v4.16b}, [x6], #16 /* load next round key */ +5: ld1 {v4.4s}, [x6], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b - ld1 {v5.16b}, [x6], #16 /* load next round key */ + ld1 {v5.4s}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b subs w2, w2, #16 /* last data? */ @@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data) * u32 rounds); */ ENTRY(ce_aes_ccm_final) - ld1 {v3.16b}, [x2], #16 /* load first round key */ + ld1 {v3.4s}, [x2], #16 /* load first round key */ ld1 {v0.16b}, [x0] /* load mac */ cmp w3, #12 /* which key size? */ sub w3, w3, #2 /* modified # of rounds */ @@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final) mov v5.16b, v3.16b b 2f 0: mov v4.16b, v3.16b -1: ld1 {v5.16b}, [x2], #16 /* load next round key */ +1: ld1 {v5.4s}, [x2], #16 /* load next round key */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -2: ld1 {v3.16b}, [x2], #16 /* load next round key */ +2: ld1 {v3.4s}, [x2], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -3: ld1 {v4.16b}, [x2], #16 /* load next round key */ +3: ld1 {v4.4s}, [x2], #16 /* load next round key */ subs w3, w3, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b @@ -137,31 +137,31 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ cmp w4, #12 /* which key size? */ sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.16b}, [x3] /* load first round key */ + ld1 {v3.4s}, [x3] /* load first round key */ add x10, x3, #16 bmi 1f bne 4f mov v5.16b, v3.16b b 3f 1: mov v4.16b, v3.16b - ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ + ld1 {v5.4s}, [x10], #16 /* load 2nd round key */ 2: /* inner loop: 3 rounds, 2x interleaved */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -3: ld1 {v3.16b}, [x10], #16 /* load next round key */ +3: ld1 {v3.4s}, [x10], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -4: ld1 {v4.16b}, [x10], #16 /* load next round key */ +4: ld1 {v4.4s}, [x10], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b aese v1.16b, v3.16b aesmc v1.16b, v1.16b - ld1 {v5.16b}, [x10], #16 /* load next round key */ + ld1 {v5.4s}, [x10], #16 /* load next round key */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 6a7dbc7c83a6..a1254036f2b1 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -1,7 +1,7 @@ /* * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions * - * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,6 +9,7 @@ */ #include <asm/neon.h> +#include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/aes.h> #include <crypto/scatterwalk.h> @@ -44,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[], u32 rounds); +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); + static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, unsigned int key_len) { @@ -103,7 +106,45 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen) return 0; } -static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) +static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], + u32 abytes, u32 *macp, bool use_neon) +{ + if (likely(use_neon)) { + ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc, + num_rounds(key)); + } else { + if (*macp > 0 && *macp < AES_BLOCK_SIZE) { + int added = min(abytes, AES_BLOCK_SIZE - *macp); + + crypto_xor(&mac[*macp], in, added); + + *macp += added; + in += added; + abytes -= added; + } + + while (abytes > AES_BLOCK_SIZE) { + __aes_arm64_encrypt(key->key_enc, mac, mac, + num_rounds(key)); + crypto_xor(mac, in, AES_BLOCK_SIZE); + + in += AES_BLOCK_SIZE; + abytes -= AES_BLOCK_SIZE; + } + + if (abytes > 0) { + __aes_arm64_encrypt(key->key_enc, mac, mac, + num_rounds(key)); + crypto_xor(mac, in, abytes); + *macp = abytes; + } else { + *macp = 0; + } + } +} + +static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[], + bool use_neon) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); @@ -122,8 +163,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) ltag.len = 6; } - ce_aes_ccm_auth_data(mac, (u8 *)<ag, ltag.len, &macp, ctx->key_enc, - num_rounds(ctx)); + ccm_update_mac(ctx, mac, (u8 *)<ag, ltag.len, &macp, use_neon); scatterwalk_start(&walk, req->src); do { @@ -135,8 +175,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) n = scatterwalk_clamp(&walk, len); } p = scatterwalk_map(&walk); - ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc, - num_rounds(ctx)); + ccm_update_mac(ctx, mac, p, n, &macp, use_neon); len -= n; scatterwalk_unmap(p); @@ -145,6 +184,56 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) } while (len); } +static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[], + struct crypto_aes_ctx *ctx, bool enc) +{ + u8 buf[AES_BLOCK_SIZE]; + int err = 0; + + while (walk->nbytes) { + int blocks = walk->nbytes / AES_BLOCK_SIZE; + u32 tail = walk->nbytes % AES_BLOCK_SIZE; + u8 *dst = walk->dst.virt.addr; + u8 *src = walk->src.virt.addr; + u32 nbytes = walk->nbytes; + + if (nbytes == walk->total && tail > 0) { + blocks++; + tail = 0; + } + + do { + u32 bsize = AES_BLOCK_SIZE; + + if (nbytes < AES_BLOCK_SIZE) + bsize = nbytes; + + crypto_inc(walk->iv, AES_BLOCK_SIZE); + __aes_arm64_encrypt(ctx->key_enc, buf, walk->iv, + num_rounds(ctx)); + __aes_arm64_encrypt(ctx->key_enc, mac, mac, + num_rounds(ctx)); + if (enc) + crypto_xor(mac, src, bsize); + crypto_xor_cpy(dst, src, buf, bsize); + if (!enc) + crypto_xor(mac, dst, bsize); + dst += bsize; + src += bsize; + nbytes -= bsize; + } while (--blocks); + + err = skcipher_walk_done(walk, tail); + } + + if (!err) { + __aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx)); + __aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx)); + crypto_xor(mac, buf, AES_BLOCK_SIZE); + } + return err; +} + static int ccm_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -153,39 +242,46 @@ static int ccm_encrypt(struct aead_request *req) u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen; + bool use_neon = may_use_simd(); int err; err = ccm_init_mac(req, mac, len); if (err) return err; - kernel_neon_begin_partial(6); + if (likely(use_neon)) + kernel_neon_begin(); if (req->assoclen) - ccm_calculate_auth_mac(req, mac); + ccm_calculate_auth_mac(req, mac, use_neon); /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_encrypt(&walk, req, true); - while (walk.nbytes) { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - - if (walk.nbytes == walk.total) - tail = 0; + if (likely(use_neon)) { + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; - ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes - tail, ctx->key_enc, - num_rounds(ctx), mac, walk.iv); + if (walk.nbytes == walk.total) + tail = 0; - err = skcipher_walk_done(&walk, tail); - } - if (!err) - ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + ce_aes_ccm_encrypt(walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); - kernel_neon_end(); + err = skcipher_walk_done(&walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, + num_rounds(ctx)); + kernel_neon_end(); + } else { + err = ccm_crypt_fallback(&walk, mac, buf, ctx, true); + } if (err) return err; @@ -205,38 +301,46 @@ static int ccm_decrypt(struct aead_request *req) u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen - authsize; + bool use_neon = may_use_simd(); int err; err = ccm_init_mac(req, mac, len); if (err) return err; - kernel_neon_begin_partial(6); + if (likely(use_neon)) + kernel_neon_begin(); if (req->assoclen) - ccm_calculate_auth_mac(req, mac); + ccm_calculate_auth_mac(req, mac, use_neon); /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_decrypt(&walk, req, true); - while (walk.nbytes) { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; + if (likely(use_neon)) { + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; - if (walk.nbytes == walk.total) - tail = 0; + if (walk.nbytes == walk.total) + tail = 0; - ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes - tail, ctx->key_enc, - num_rounds(ctx), mac, walk.iv); + ce_aes_ccm_decrypt(walk.dst.virt.addr, + walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); - err = skcipher_walk_done(&walk, tail); - } - if (!err) - ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + err = skcipher_walk_done(&walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, + num_rounds(ctx)); - kernel_neon_end(); + kernel_neon_end(); + } else { + err = ccm_crypt_fallback(&walk, mac, buf, ctx, false); + } if (err) return err; diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index 50d9fe11d0c8..6a75cd75ed11 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -1,7 +1,7 @@ /* * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions * - * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,6 +9,8 @@ */ #include <asm/neon.h> +#include <asm/simd.h> +#include <asm/unaligned.h> #include <crypto/aes.h> #include <linux/cpufeature.h> #include <linux/crypto.h> @@ -20,6 +22,9 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); +asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); + struct aes_block { u8 b[AES_BLOCK_SIZE]; }; @@ -44,27 +49,32 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) void *dummy0; int dummy1; - kernel_neon_begin_partial(4); + if (!may_use_simd()) { + __aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); + return; + } + + kernel_neon_begin(); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.16b}, [%[key]], #16 ;" + " ld1 {v1.4s}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.16b}, [%[key]], #16 ;" + " ld1 {v3.4s}, [%[key]], #16 ;" "1: aese v0.16b, v2.16b ;" " aesmc v0.16b, v0.16b ;" - "2: ld1 {v1.16b}, [%[key]], #16 ;" + "2: ld1 {v1.4s}, [%[key]], #16 ;" " aese v0.16b, v3.16b ;" " aesmc v0.16b, v0.16b ;" - "3: ld1 {v2.16b}, [%[key]], #16 ;" + "3: ld1 {v2.4s}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aese v0.16b, v1.16b ;" " aesmc v0.16b, v0.16b ;" - " ld1 {v3.16b}, [%[key]], #16 ;" + " ld1 {v3.4s}, [%[key]], #16 ;" " bpl 1b ;" " aese v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -89,27 +99,32 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) void *dummy0; int dummy1; - kernel_neon_begin_partial(4); + if (!may_use_simd()) { + __aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); + return; + } + + kernel_neon_begin(); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.16b}, [%[key]], #16 ;" + " ld1 {v1.4s}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.16b}, [%[key]], #16 ;" + " ld1 {v3.4s}, [%[key]], #16 ;" "1: aesd v0.16b, v2.16b ;" " aesimc v0.16b, v0.16b ;" - "2: ld1 {v1.16b}, [%[key]], #16 ;" + "2: ld1 {v1.4s}, [%[key]], #16 ;" " aesd v0.16b, v3.16b ;" " aesimc v0.16b, v0.16b ;" - "3: ld1 {v2.16b}, [%[key]], #16 ;" + "3: ld1 {v2.4s}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aesd v0.16b, v1.16b ;" " aesimc v0.16b, v0.16b ;" - " ld1 {v3.16b}, [%[key]], #16 ;" + " ld1 {v3.4s}, [%[key]], #16 ;" " bpl 1b ;" " aesd v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -165,20 +180,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, key_len != AES_KEYSIZE_256) return -EINVAL; - memcpy(ctx->key_enc, in_key, key_len); ctx->key_length = key_len; + for (i = 0; i < kwords; i++) + ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); - kernel_neon_begin_partial(2); + kernel_neon_begin(); for (i = 0; i < sizeof(rcon); i++) { u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; -#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; -#else - rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^ - rki[0]; -#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; @@ -210,9 +221,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, key_dec[0] = key_enc[j]; for (i = 1, j--; j > 0; i++, j--) - __asm__("ld1 {v0.16b}, %[in] ;" + __asm__("ld1 {v0.4s}, %[in] ;" "aesimc v1.16b, v0.16b ;" - "st1 {v1.16b}, %[out] ;" + "st1 {v1.4s}, %[out] ;" : [out] "=Q"(key_dec[i]) : [in] "Q"(key_enc[j]) diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index b46093d567e5..50330f5c3adc 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -2,7 +2,7 @@ * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with * Crypto Extensions * - * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,11 +22,11 @@ cmp \rounds, #12 blo 2222f /* 128 bits */ beq 1111f /* 192 bits */ - ld1 {v17.16b-v18.16b}, [\rk], #32 -1111: ld1 {v19.16b-v20.16b}, [\rk], #32 -2222: ld1 {v21.16b-v24.16b}, [\rk], #64 - ld1 {v25.16b-v28.16b}, [\rk], #64 - ld1 {v29.16b-v31.16b}, [\rk] + ld1 {v17.4s-v18.4s}, [\rk], #32 +1111: ld1 {v19.4s-v20.4s}, [\rk], #32 +2222: ld1 {v21.4s-v24.4s}, [\rk], #64 + ld1 {v25.4s-v28.4s}, [\rk], #64 + ld1 {v29.4s-v31.4s}, [\rk] .endm /* prepare for encryption with key in rk[] */ diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S index f2f9cc519309..6d2445d603cc 100644 --- a/arch/arm64/crypto/aes-cipher-core.S +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -10,6 +10,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/cache.h> .text @@ -17,94 +18,155 @@ out .req x1 in .req x2 rounds .req x3 - tt .req x4 - lt .req x2 + tt .req x2 - .macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift + .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift + .ifc \op\shift, b0 + ubfiz \reg0, \in0, #2, #8 + ubfiz \reg1, \in1e, #2, #8 + .else ubfx \reg0, \in0, #\shift, #8 - .if \enc ubfx \reg1, \in1e, #\shift, #8 - .else - ubfx \reg1, \in1d, #\shift, #8 .endif + + /* + * AArch64 cannot do byte size indexed loads from a table containing + * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a + * valid instruction. So perform the shift explicitly first for the + * high bytes (the low byte is shifted implicitly by using ubfiz rather + * than ubfx above) + */ + .ifnc \op, b ldr \reg0, [tt, \reg0, uxtw #2] ldr \reg1, [tt, \reg1, uxtw #2] + .else + .if \shift > 0 + lsl \reg0, \reg0, #2 + lsl \reg1, \reg1, #2 + .endif + ldrb \reg0, [tt, \reg0, uxtw] + ldrb \reg1, [tt, \reg1, uxtw] + .endif .endm - .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc + .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift + ubfx \reg0, \in0, #\shift, #8 + ubfx \reg1, \in1d, #\shift, #8 + ldr\op \reg0, [tt, \reg0, uxtw #\sz] + ldr\op \reg1, [tt, \reg1, uxtw #\sz] + .endm + + .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op ldp \out0, \out1, [rk], #8 - __pair \enc, w13, w14, \in0, \in1, \in3, 0 - __pair \enc, w15, w16, \in1, \in2, \in0, 8 - __pair \enc, w17, w18, \in2, \in3, \in1, 16 - __pair \enc, \t0, \t1, \in3, \in0, \in2, 24 - - eor \out0, \out0, w13 - eor \out1, \out1, w14 - eor \out0, \out0, w15, ror #24 - eor \out1, \out1, w16, ror #24 - eor \out0, \out0, w17, ror #16 - eor \out1, \out1, w18, ror #16 + __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0 + __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8 + __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16 + __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24 + + eor \out0, \out0, w12 + eor \out1, \out1, w13 + eor \out0, \out0, w14, ror #24 + eor \out1, \out1, w15, ror #24 + eor \out0, \out0, w16, ror #16 + eor \out1, \out1, w17, ror #16 eor \out0, \out0, \t0, ror #8 eor \out1, \out1, \t1, ror #8 .endm - .macro fround, out0, out1, out2, out3, in0, in1, in2, in3 - __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 - __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op .endm - .macro iround, out0, out1, out2, out3, in0, in1, in2, in3 - __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 - __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op .endm - .macro do_crypt, round, ttab, ltab - ldp w5, w6, [in] - ldp w7, w8, [in, #8] - ldp w9, w10, [rk], #16 - ldp w11, w12, [rk, #-8] + .macro do_crypt, round, ttab, ltab, bsz + ldp w4, w5, [in] + ldp w6, w7, [in, #8] + ldp w8, w9, [rk], #16 + ldp w10, w11, [rk, #-8] +CPU_BE( rev w4, w4 ) CPU_BE( rev w5, w5 ) CPU_BE( rev w6, w6 ) CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) + eor w4, w4, w8 eor w5, w5, w9 eor w6, w6, w10 eor w7, w7, w11 - eor w8, w8, w12 adr_l tt, \ttab - adr_l lt, \ltab tbnz rounds, #1, 1f -0: \round w9, w10, w11, w12, w5, w6, w7, w8 - \round w5, w6, w7, w8, w9, w10, w11, w12 +0: \round w8, w9, w10, w11, w4, w5, w6, w7 + \round w4, w5, w6, w7, w8, w9, w10, w11 1: subs rounds, rounds, #4 - \round w9, w10, w11, w12, w5, w6, w7, w8 - csel tt, tt, lt, hi - \round w5, w6, w7, w8, w9, w10, w11, w12 - b.hi 0b - + \round w8, w9, w10, w11, w4, w5, w6, w7 + b.ls 3f +2: \round w4, w5, w6, w7, w8, w9, w10, w11 + b 0b +3: adr_l tt, \ltab + \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b + +CPU_BE( rev w4, w4 ) CPU_BE( rev w5, w5 ) CPU_BE( rev w6, w6 ) CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) - stp w5, w6, [out] - stp w7, w8, [out, #8] + stp w4, w5, [out] + stp w6, w7, [out, #8] ret .endm - .align 5 + .align L1_CACHE_SHIFT + .type __aes_arm64_inverse_sbox, %object +__aes_arm64_inverse_sbox: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox + ENTRY(__aes_arm64_encrypt) - do_crypt fround, crypto_ft_tab, crypto_fl_tab + do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 ENDPROC(__aes_arm64_encrypt) .align 5 ENTRY(__aes_arm64_decrypt) - do_crypt iround, crypto_it_tab, crypto_il_tab + do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0 ENDPROC(__aes_arm64_decrypt) diff --git a/arch/arm64/crypto/aes-ctr-fallback.h b/arch/arm64/crypto/aes-ctr-fallback.h new file mode 100644 index 000000000000..c9285717b6b5 --- /dev/null +++ b/arch/arm64/crypto/aes-ctr-fallback.h @@ -0,0 +1,53 @@ +/* + * Fallback for sync aes(ctr) in contexts where kernel mode NEON + * is not allowed + * + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/internal/skcipher.h> + +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); + +static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx, + struct skcipher_request *req) +{ + struct skcipher_walk walk; + u8 buf[AES_BLOCK_SIZE]; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + while (walk.nbytes > 0) { + u8 *dst = walk.dst.virt.addr; + u8 *src = walk.src.virt.addr; + int nbytes = walk.nbytes; + int tail = 0; + + if (nbytes < walk.total) { + nbytes = round_down(nbytes, AES_BLOCK_SIZE); + tail = walk.nbytes % AES_BLOCK_SIZE; + } + + do { + int bsize = min(nbytes, AES_BLOCK_SIZE); + + __aes_arm64_encrypt(ctx->key_enc, buf, walk.iv, + 6 + ctx->key_length / 4); + crypto_xor_cpy(dst, src, buf, bsize); + crypto_inc(walk.iv, AES_BLOCK_SIZE); + + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + nbytes -= AES_BLOCK_SIZE; + } while (nbytes > 0); + + err = skcipher_walk_done(&walk, tail); + } + return err; +} diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index bcf596b0197e..998ba519a026 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -10,6 +10,7 @@ #include <asm/neon.h> #include <asm/hwcap.h> +#include <asm/simd.h> #include <crypto/aes.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> @@ -19,6 +20,7 @@ #include <crypto/xts.h> #include "aes-ce-setkey.h" +#include "aes-ctr-fallback.h" #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" @@ -241,9 +243,7 @@ static int ctr_encrypt(struct skcipher_request *req) aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - if (tdst != tsrc) - memcpy(tdst, tsrc, nbytes); - crypto_xor(tdst, tail, nbytes); + crypto_xor_cpy(tdst, tsrc, tail, nbytes); err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); @@ -251,6 +251,17 @@ static int ctr_encrypt(struct skcipher_request *req) return err; } +static int ctr_encrypt_sync(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (!may_use_simd()) + return aes_ctr_encrypt_fallback(ctx, req); + + return ctr_encrypt(req); +} + static int xts_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -357,8 +368,8 @@ static struct skcipher_alg aes_algs[] = { { .ivsize = AES_BLOCK_SIZE, .chunksize = AES_BLOCK_SIZE, .setkey = skcipher_aes_setkey, - .encrypt = ctr_encrypt, - .decrypt = ctr_encrypt, + .encrypt = ctr_encrypt_sync, + .decrypt = ctr_encrypt_sync, }, { .base = { .cra_name = "__xts(aes)", @@ -460,11 +471,35 @@ static int mac_init(struct shash_desc *desc) return 0; } +static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, + u8 dg[], int enc_before, int enc_after) +{ + int rounds = 6 + ctx->key_length / 4; + + if (may_use_simd()) { + kernel_neon_begin(); + aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before, + enc_after); + kernel_neon_end(); + } else { + if (enc_before) + __aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds); + + while (blocks--) { + crypto_xor(dg, in, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + + if (blocks || enc_after) + __aes_arm64_encrypt(ctx->key_enc, dg, dg, + rounds); + } + } +} + static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len) { struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct mac_desc_ctx *ctx = shash_desc_ctx(desc); - int rounds = 6 + tctx->key.key_length / 4; while (len > 0) { unsigned int l; @@ -476,10 +511,8 @@ static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len) len %= AES_BLOCK_SIZE; - kernel_neon_begin(); - aes_mac_update(p, tctx->key.key_enc, rounds, blocks, - ctx->dg, (ctx->len != 0), (len != 0)); - kernel_neon_end(); + mac_do_update(&tctx->key, p, blocks, ctx->dg, + (ctx->len != 0), (len != 0)); p += blocks * AES_BLOCK_SIZE; @@ -507,11 +540,8 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out) { struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct mac_desc_ctx *ctx = shash_desc_ctx(desc); - int rounds = 6 + tctx->key.key_length / 4; - kernel_neon_begin(); - aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0); - kernel_neon_end(); + mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0); memcpy(out, ctx->dg, AES_BLOCK_SIZE); @@ -522,7 +552,6 @@ static int cmac_final(struct shash_desc *desc, u8 *out) { struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct mac_desc_ctx *ctx = shash_desc_ctx(desc); - int rounds = 6 + tctx->key.key_length / 4; u8 *consts = tctx->consts; if (ctx->len != AES_BLOCK_SIZE) { @@ -530,9 +559,7 @@ static int cmac_final(struct shash_desc *desc, u8 *out) consts += AES_BLOCK_SIZE; } - kernel_neon_begin(); - aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1); - kernel_neon_end(); + mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1); memcpy(out, ctx->dg, AES_BLOCK_SIZE); diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index db2501d93550..c55d68ccb89f 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -1,7 +1,7 @@ /* * Bit sliced AES using NEON instructions * - * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,12 +9,15 @@ */ #include <asm/neon.h> +#include <asm/simd.h> #include <crypto/aes.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/xts.h> #include <linux/module.h> +#include "aes-ctr-fallback.h" + MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -58,6 +61,11 @@ struct aesbs_cbc_ctx { u32 enc[AES_MAX_KEYLENGTH_U32]; }; +struct aesbs_ctr_ctx { + struct aesbs_ctx key; /* must be first member */ + struct crypto_aes_ctx fallback; +}; + struct aesbs_xts_ctx { struct aesbs_ctx key; u32 twkey[AES_MAX_KEYLENGTH_U32]; @@ -196,6 +204,25 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } +static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len); + if (err) + return err; + + ctx->key.rounds = 6 + key_len / 4; + + kernel_neon_begin(); + aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds); + kernel_neon_end(); + + return 0; +} + static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -224,9 +251,8 @@ static int ctr_encrypt(struct skcipher_request *req) u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; - if (dst != src) - memcpy(dst, src, walk.total % AES_BLOCK_SIZE); - crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE); + crypto_xor_cpy(dst, src, final, + walk.total % AES_BLOCK_SIZE); err = skcipher_walk_done(&walk, 0); break; @@ -260,6 +286,17 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return aesbs_setkey(tfm, in_key, key_len); } +static int ctr_encrypt_sync(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (!may_use_simd()) + return aes_ctr_encrypt_fallback(&ctx->fallback, req); + + return ctr_encrypt(req); +} + static int __xts_crypt(struct skcipher_request *req, void (*fn)(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[])) @@ -356,7 +393,7 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_driver_name = "ctr-aes-neonbs", .base.cra_priority = 250 - 1, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx), .base.cra_module = THIS_MODULE, .min_keysize = AES_MIN_KEY_SIZE, @@ -364,9 +401,9 @@ static struct skcipher_alg aes_algs[] = { { .chunksize = AES_BLOCK_SIZE, .walksize = 8 * AES_BLOCK_SIZE, .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_setkey, - .encrypt = ctr_encrypt, - .decrypt = ctr_encrypt, + .setkey = aesbs_ctr_setkey_sync, + .encrypt = ctr_encrypt_sync, + .decrypt = ctr_encrypt_sync, }, { .base.cra_name = "__xts(aes)", .base.cra_driver_name = "__xts-aes-neonbs", diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c index a7cd575ea223..cbdb75d15cd0 100644 --- a/arch/arm64/crypto/chacha20-neon-glue.c +++ b/arch/arm64/crypto/chacha20-neon-glue.c @@ -1,7 +1,7 @@ /* * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions * - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -26,6 +26,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); @@ -64,7 +65,7 @@ static int chacha20_neon(struct skcipher_request *req) u32 state[16]; int err; - if (req->cryptlen <= CHACHA20_BLOCK_SIZE) + if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE) return crypto_chacha20_crypt(req); err = skcipher_walk_virt(&walk, req, true); diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c index eccb1ae90064..624f4137918c 100644 --- a/arch/arm64/crypto/crc32-ce-glue.c +++ b/arch/arm64/crypto/crc32-ce-glue.c @@ -1,7 +1,7 @@ /* * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions * - * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,6 +19,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> #include <asm/unaligned.h> #define PMULL_MIN_LEN 64L /* minimum size of buffer @@ -105,10 +106,10 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, length -= l; } - if (length >= PMULL_MIN_LEN) { + if (length >= PMULL_MIN_LEN && may_use_simd()) { l = round_down(length, SCALE_F); - kernel_neon_begin_partial(10); + kernel_neon_begin(); *crc = crc32_pmull_le(data, l, *crc); kernel_neon_end(); @@ -137,10 +138,10 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data, length -= l; } - if (length >= PMULL_MIN_LEN) { + if (length >= PMULL_MIN_LEN && may_use_simd()) { l = round_down(length, SCALE_F); - kernel_neon_begin_partial(10); + kernel_neon_begin(); *crc = crc32c_pmull_le(data, l, *crc); kernel_neon_end(); diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c index 60cb590c2590..96f0cae4a022 100644 --- a/arch/arm64/crypto/crct10dif-ce-glue.c +++ b/arch/arm64/crypto/crct10dif-ce-glue.c @@ -1,7 +1,7 @@ /* * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions * - * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -18,6 +18,7 @@ #include <crypto/internal/hash.h> #include <asm/neon.h> +#include <asm/simd.h> #define CRC_T10DIF_PMULL_CHUNK_SIZE 16U @@ -48,9 +49,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data, } if (length > 0) { - kernel_neon_begin_partial(14); - *crc = crc_t10dif_pmull(*crc, data, length); - kernel_neon_end(); + if (may_use_simd()) { + kernel_neon_begin(); + *crc = crc_t10dif_pmull(*crc, data, length); + kernel_neon_end(); + } else { + *crc = crc_t10dif_generic(*crc, data, length); + } } return 0; diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index f0bb9f0b524f..11ebf1ae248a 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -1,7 +1,7 @@ /* * Accelerated GHASH implementation with ARMv8 PMULL instructions. * - * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -11,31 +11,215 @@ #include <linux/linkage.h> #include <asm/assembler.h> - SHASH .req v0 - SHASH2 .req v1 - T1 .req v2 - T2 .req v3 - MASK .req v4 - XL .req v5 - XM .req v6 - XH .req v7 - IN1 .req v7 + SHASH .req v0 + SHASH2 .req v1 + T1 .req v2 + T2 .req v3 + MASK .req v4 + XL .req v5 + XM .req v6 + XH .req v7 + IN1 .req v7 + + k00_16 .req v8 + k32_48 .req v9 + + t3 .req v10 + t4 .req v11 + t5 .req v12 + t6 .req v13 + t7 .req v14 + t8 .req v15 + t9 .req v16 + + perm1 .req v17 + perm2 .req v18 + perm3 .req v19 + + sh1 .req v20 + sh2 .req v21 + sh3 .req v22 + sh4 .req v23 + + ss1 .req v24 + ss2 .req v25 + ss3 .req v26 + ss4 .req v27 .text .arch armv8-a+crypto - /* - * void pmull_ghash_update(int blocks, u64 dg[], const char *src, - * struct ghash_key const *k, const char *head) - */ -ENTRY(pmull_ghash_update) + .macro __pmull_p64, rd, rn, rm + pmull \rd\().1q, \rn\().1d, \rm\().1d + .endm + + .macro __pmull2_p64, rd, rn, rm + pmull2 \rd\().1q, \rn\().2d, \rm\().2d + .endm + + .macro __pmull_p8, rq, ad, bd + ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1 + ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2 + ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3 + + __pmull_p8_\bd \rq, \ad + .endm + + .macro __pmull2_p8, rq, ad, bd + tbl t3.16b, {\ad\().16b}, perm1.16b // A1 + tbl t5.16b, {\ad\().16b}, perm2.16b // A2 + tbl t7.16b, {\ad\().16b}, perm3.16b // A3 + + __pmull2_p8_\bd \rq, \ad + .endm + + .macro __pmull_p8_SHASH, rq, ad + __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4 + .endm + + .macro __pmull_p8_SHASH2, rq, ad + __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4 + .endm + + .macro __pmull2_p8_SHASH, rq, ad + __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4 + .endm + + .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4 + pmull\t t3.8h, t3.\nb, \bd // F = A1*B + pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1 + pmull\t t5.8h, t5.\nb, \bd // H = A2*B + pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2 + pmull\t t7.8h, t7.\nb, \bd // J = A3*B + pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3 + pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4 + pmull\t \rq\().8h, \ad, \bd // D = A*B + + eor t3.16b, t3.16b, t4.16b // L = E + F + eor t5.16b, t5.16b, t6.16b // M = G + H + eor t7.16b, t7.16b, t8.16b // N = I + J + + uzp1 t4.2d, t3.2d, t5.2d + uzp2 t3.2d, t3.2d, t5.2d + uzp1 t6.2d, t7.2d, t9.2d + uzp2 t7.2d, t7.2d, t9.2d + + // t3 = (L) (P0 + P1) << 8 + // t5 = (M) (P2 + P3) << 16 + eor t4.16b, t4.16b, t3.16b + and t3.16b, t3.16b, k32_48.16b + + // t7 = (N) (P4 + P5) << 24 + // t9 = (K) (P6 + P7) << 32 + eor t6.16b, t6.16b, t7.16b + and t7.16b, t7.16b, k00_16.16b + + eor t4.16b, t4.16b, t3.16b + eor t6.16b, t6.16b, t7.16b + + zip2 t5.2d, t4.2d, t3.2d + zip1 t3.2d, t4.2d, t3.2d + zip2 t9.2d, t6.2d, t7.2d + zip1 t7.2d, t6.2d, t7.2d + + ext t3.16b, t3.16b, t3.16b, #15 + ext t5.16b, t5.16b, t5.16b, #14 + ext t7.16b, t7.16b, t7.16b, #13 + ext t9.16b, t9.16b, t9.16b, #12 + + eor t3.16b, t3.16b, t5.16b + eor t7.16b, t7.16b, t9.16b + eor \rq\().16b, \rq\().16b, t3.16b + eor \rq\().16b, \rq\().16b, t7.16b + .endm + + .macro __pmull_pre_p64 + movi MASK.16b, #0xe1 + shl MASK.2d, MASK.2d, #57 + .endm + + .macro __pmull_pre_p8 + // k00_16 := 0x0000000000000000_000000000000ffff + // k32_48 := 0x00000000ffffffff_0000ffffffffffff + movi k32_48.2d, #0xffffffff + mov k32_48.h[2], k32_48.h[0] + ushr k00_16.2d, k32_48.2d, #32 + + // prepare the permutation vectors + mov_q x5, 0x080f0e0d0c0b0a09 + movi T1.8b, #8 + dup perm1.2d, x5 + eor perm1.16b, perm1.16b, T1.16b + ushr perm2.2d, perm1.2d, #8 + ushr perm3.2d, perm1.2d, #16 + ushr T1.2d, perm1.2d, #24 + sli perm2.2d, perm1.2d, #56 + sli perm3.2d, perm1.2d, #48 + sli T1.2d, perm1.2d, #40 + + // precompute loop invariants + tbl sh1.16b, {SHASH.16b}, perm1.16b + tbl sh2.16b, {SHASH.16b}, perm2.16b + tbl sh3.16b, {SHASH.16b}, perm3.16b + tbl sh4.16b, {SHASH.16b}, T1.16b + ext ss1.8b, SHASH2.8b, SHASH2.8b, #1 + ext ss2.8b, SHASH2.8b, SHASH2.8b, #2 + ext ss3.8b, SHASH2.8b, SHASH2.8b, #3 + ext ss4.8b, SHASH2.8b, SHASH2.8b, #4 + .endm + + // + // PMULL (64x64->128) based reduction for CPUs that can do + // it in a single instruction. + // + .macro __pmull_reduce_p64 + pmull T2.1q, XL.1d, MASK.1d + eor XM.16b, XM.16b, T1.16b + + mov XH.d[0], XM.d[1] + mov XM.d[1], XL.d[0] + + eor XL.16b, XM.16b, T2.16b + ext T2.16b, XL.16b, XL.16b, #8 + pmull XL.1q, XL.1d, MASK.1d + .endm + + // + // Alternative reduction for CPUs that lack support for the + // 64x64->128 PMULL instruction + // + .macro __pmull_reduce_p8 + eor XM.16b, XM.16b, T1.16b + + mov XL.d[1], XM.d[0] + mov XH.d[0], XM.d[1] + + shl T1.2d, XL.2d, #57 + shl T2.2d, XL.2d, #62 + eor T2.16b, T2.16b, T1.16b + shl T1.2d, XL.2d, #63 + eor T2.16b, T2.16b, T1.16b + ext T1.16b, XL.16b, XH.16b, #8 + eor T2.16b, T2.16b, T1.16b + + mov XL.d[1], T2.d[0] + mov XH.d[0], T2.d[1] + + ushr T2.2d, XL.2d, #1 + eor XH.16b, XH.16b, XL.16b + eor XL.16b, XL.16b, T2.16b + ushr T2.2d, T2.2d, #6 + ushr XL.2d, XL.2d, #1 + .endm + + .macro __pmull_ghash, pn ld1 {SHASH.2d}, [x3] ld1 {XL.2d}, [x1] - movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 - shl MASK.2d, MASK.2d, #57 eor SHASH2.16b, SHASH2.16b, SHASH.16b + __pmull_pre_\pn + /* do the head block first, if supplied */ cbz x4, 0f ld1 {T1.2d}, [x4] @@ -52,28 +236,209 @@ CPU_LE( rev64 T1.16b, T1.16b ) eor T1.16b, T1.16b, T2.16b eor XL.16b, XL.16b, IN1.16b + __pmull2_\pn XH, XL, SHASH // a1 * b1 + eor T1.16b, T1.16b, XL.16b + __pmull_\pn XL, XL, SHASH // a0 * b0 + __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0) + + eor T2.16b, XL.16b, XH.16b + ext T1.16b, XL.16b, XH.16b, #8 + eor XM.16b, XM.16b, T2.16b + + __pmull_reduce_\pn + + eor T2.16b, T2.16b, XH.16b + eor XL.16b, XL.16b, T2.16b + + cbnz w0, 0b + + st1 {XL.2d}, [x1] + ret + .endm + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update_p64) + __pmull_ghash p64 +ENDPROC(pmull_ghash_update_p64) + +ENTRY(pmull_ghash_update_p8) + __pmull_ghash p8 +ENDPROC(pmull_ghash_update_p8) + + KS .req v8 + CTR .req v9 + INP .req v10 + + .macro load_round_keys, rounds, rk + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + ld1 {v17.4s-v18.4s}, [\rk], #32 +1111: ld1 {v19.4s-v20.4s}, [\rk], #32 +2222: ld1 {v21.4s-v24.4s}, [\rk], #64 + ld1 {v25.4s-v28.4s}, [\rk], #64 + ld1 {v29.4s-v31.4s}, [\rk] + .endm + + .macro enc_round, state, key + aese \state\().16b, \key\().16b + aesmc \state\().16b, \state\().16b + .endm + + .macro enc_block, state, rounds + cmp \rounds, #12 + b.lo 2222f /* 128 bits */ + b.eq 1111f /* 192 bits */ + enc_round \state, v17 + enc_round \state, v18 +1111: enc_round \state, v19 + enc_round \state, v20 +2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 + enc_round \state, \key + .endr + aese \state\().16b, v30.16b + eor \state\().16b, \state\().16b, v31.16b + .endm + + .macro pmull_gcm_do_crypt, enc + ld1 {SHASH.2d}, [x4] + ld1 {XL.2d}, [x1] + ldr x8, [x5, #8] // load lower counter + + movi MASK.16b, #0xe1 + ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 +CPU_LE( rev x8, x8 ) + shl MASK.2d, MASK.2d, #57 + eor SHASH2.16b, SHASH2.16b, SHASH.16b + + .if \enc == 1 + ld1 {KS.16b}, [x7] + .endif + +0: ld1 {CTR.8b}, [x5] // load upper counter + ld1 {INP.16b}, [x3], #16 + rev x9, x8 + add x8, x8, #1 + sub w0, w0, #1 + ins CTR.d[1], x9 // set lower counter + + .if \enc == 1 + eor INP.16b, INP.16b, KS.16b // encrypt input + st1 {INP.16b}, [x2], #16 + .endif + + rev64 T1.16b, INP.16b + + cmp w6, #12 + b.ge 2f // AES-192/256? + +1: enc_round CTR, v21 + + ext T2.16b, XL.16b, XL.16b, #8 + ext IN1.16b, T1.16b, T1.16b, #8 + + enc_round CTR, v22 + + eor T1.16b, T1.16b, T2.16b + eor XL.16b, XL.16b, IN1.16b + + enc_round CTR, v23 + pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 eor T1.16b, T1.16b, XL.16b + + enc_round CTR, v24 + pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) + enc_round CTR, v25 + ext T1.16b, XL.16b, XH.16b, #8 eor T2.16b, XL.16b, XH.16b eor XM.16b, XM.16b, T1.16b + + enc_round CTR, v26 + eor XM.16b, XM.16b, T2.16b pmull T2.1q, XL.1d, MASK.1d + enc_round CTR, v27 + mov XH.d[0], XM.d[1] mov XM.d[1], XL.d[0] + enc_round CTR, v28 + eor XL.16b, XM.16b, T2.16b + + enc_round CTR, v29 + ext T2.16b, XL.16b, XL.16b, #8 + + aese CTR.16b, v30.16b + pmull XL.1q, XL.1d, MASK.1d eor T2.16b, T2.16b, XH.16b + + eor KS.16b, CTR.16b, v31.16b + eor XL.16b, XL.16b, T2.16b + .if \enc == 0 + eor INP.16b, INP.16b, KS.16b + st1 {INP.16b}, [x2], #16 + .endif + cbnz w0, 0b +CPU_LE( rev x8, x8 ) st1 {XL.2d}, [x1] + str x8, [x5, #8] // store lower counter + + .if \enc == 1 + st1 {KS.16b}, [x7] + .endif + + ret + +2: b.eq 3f // AES-192? + enc_round CTR, v17 + enc_round CTR, v18 +3: enc_round CTR, v19 + enc_round CTR, v20 + b 1b + .endm + + /* + * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], + * struct ghash_key const *k, u8 ctr[], + * int rounds, u8 ks[]) + */ +ENTRY(pmull_gcm_encrypt) + pmull_gcm_do_crypt 1 +ENDPROC(pmull_gcm_encrypt) + + /* + * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], + * struct ghash_key const *k, u8 ctr[], + * int rounds) + */ +ENTRY(pmull_gcm_decrypt) + pmull_gcm_do_crypt 0 +ENDPROC(pmull_gcm_decrypt) + + /* + * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds) + */ +ENTRY(pmull_gcm_encrypt_block) + cbz x2, 0f + load_round_keys w3, x2 +0: ld1 {v0.16b}, [x1] + enc_block v0, w3 + st1 {v0.16b}, [x0] ret -ENDPROC(pmull_ghash_update) +ENDPROC(pmull_gcm_encrypt_block) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 833ec1e3f3e9..cfc9c92814fd 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -1,7 +1,7 @@ /* * Accelerated GHASH implementation with ARMv8 PMULL instructions. * - * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -9,22 +9,33 @@ */ #include <asm/neon.h> +#include <asm/simd.h> #include <asm/unaligned.h> +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/b128ops.h> +#include <crypto/gf128mul.h> +#include <crypto/internal/aead.h> #include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #include <linux/cpufeature.h> #include <linux/crypto.h> #include <linux/module.h> -MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); +MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("ghash"); #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 +#define GCM_IV_SIZE 12 struct ghash_key { u64 a; u64 b; + be128 k; }; struct ghash_desc_ctx { @@ -33,8 +44,35 @@ struct ghash_desc_ctx { u32 count; }; -asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, - struct ghash_key const *k, const char *head); +struct gcm_aes_ctx { + struct crypto_aes_ctx aes_key; + struct ghash_key ghash_key; +}; + +asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); + +asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); + +static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, + const char *head); + +asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], + const u8 src[], struct ghash_key const *k, + u8 ctr[], int rounds, u8 ks[]); + +asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], + const u8 src[], struct ghash_key const *k, + u8 ctr[], int rounds); + +asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[], + u32 const rk[], int rounds); + +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); static int ghash_init(struct shash_desc *desc) { @@ -44,6 +82,36 @@ static int ghash_init(struct shash_desc *desc) return 0; } +static void ghash_do_update(int blocks, u64 dg[], const char *src, + struct ghash_key *key, const char *head) +{ + if (likely(may_use_simd())) { + kernel_neon_begin(); + pmull_ghash_update(blocks, dg, src, key, head); + kernel_neon_end(); + } else { + be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) }; + + do { + const u8 *in = src; + + if (head) { + in = head; + blocks++; + head = NULL; + } else { + src += GHASH_BLOCK_SIZE; + } + + crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE); + gf128mul_lle(&dst, &key->k); + } while (--blocks); + + dg[0] = be64_to_cpu(dst.b); + dg[1] = be64_to_cpu(dst.a); + } +} + static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int len) { @@ -67,10 +135,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, blocks = len / GHASH_BLOCK_SIZE; len %= GHASH_BLOCK_SIZE; - kernel_neon_begin_partial(8); - pmull_ghash_update(blocks, ctx->digest, src, key, - partial ? ctx->buf : NULL); - kernel_neon_end(); + ghash_do_update(blocks, ctx->digest, src, key, + partial ? ctx->buf : NULL); + src += blocks * GHASH_BLOCK_SIZE; partial = 0; } @@ -89,9 +156,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); - kernel_neon_begin_partial(8); - pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); - kernel_neon_end(); + ghash_do_update(1, ctx->digest, ctx->buf, key, NULL); } put_unaligned_be64(ctx->digest[1], dst); put_unaligned_be64(ctx->digest[0], dst + 8); @@ -100,16 +165,13 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) return 0; } -static int ghash_setkey(struct crypto_shash *tfm, - const u8 *inkey, unsigned int keylen) +static int __ghash_setkey(struct ghash_key *key, + const u8 *inkey, unsigned int keylen) { - struct ghash_key *key = crypto_shash_ctx(tfm); u64 a, b; - if (keylen != GHASH_BLOCK_SIZE) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } + /* needed for the fallback */ + memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); /* perform multiplication by 'x' in GF(2^128) */ b = get_unaligned_be64(inkey); @@ -124,33 +186,418 @@ static int ghash_setkey(struct crypto_shash *tfm, return 0; } +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *inkey, unsigned int keylen) +{ + struct ghash_key *key = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + return __ghash_setkey(key, inkey, keylen); +} + static struct shash_alg ghash_alg = { - .digestsize = GHASH_DIGEST_SIZE, - .init = ghash_init, - .update = ghash_update, - .final = ghash_final, - .setkey = ghash_setkey, - .descsize = sizeof(struct ghash_desc_ctx), - .base = { - .cra_name = "ghash", - .cra_driver_name = "ghash-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ghash_key), - .cra_module = THIS_MODULE, - }, + .base.cra_name = "ghash", + .base.cra_driver_name = "ghash-ce", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = GHASH_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ghash_key), + .base.cra_module = THIS_MODULE, + + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), +}; + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey, + unsigned int keylen) +{ + struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); + u8 key[GHASH_BLOCK_SIZE]; + int ret; + + ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen); + if (ret) { + tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + __aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){}, + num_rounds(&ctx->aes_key)); + + return __ghash_setkey(&ctx->ghash_key, key, sizeof(key)); +} + +static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + switch (authsize) { + case 4: + case 8: + case 12 ... 16: + break; + default: + return -EINVAL; + } + return 0; +} + +static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], + int *buf_count, struct gcm_aes_ctx *ctx) +{ + if (*buf_count > 0) { + int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count); + + memcpy(&buf[*buf_count], src, buf_added); + + *buf_count += buf_added; + src += buf_added; + count -= buf_added; + } + + if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) { + int blocks = count / GHASH_BLOCK_SIZE; + + ghash_do_update(blocks, dg, src, &ctx->ghash_key, + *buf_count ? buf : NULL); + + src += blocks * GHASH_BLOCK_SIZE; + count %= GHASH_BLOCK_SIZE; + *buf_count = 0; + } + + if (count > 0) { + memcpy(buf, src, count); + *buf_count = count; + } +} + +static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); + u8 buf[GHASH_BLOCK_SIZE]; + struct scatter_walk walk; + u32 len = req->assoclen; + int buf_count = 0; + + scatterwalk_start(&walk, req->src); + + do { + u32 n = scatterwalk_clamp(&walk, len); + u8 *p; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, len); + } + p = scatterwalk_map(&walk); + + gcm_update_mac(dg, p, n, buf, &buf_count, ctx); + len -= n; + + scatterwalk_unmap(p); + scatterwalk_advance(&walk, n); + scatterwalk_done(&walk, 0, len); + } while (len); + + if (buf_count) { + memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count); + ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL); + } +} + +static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx, + u64 dg[], u8 tag[], int cryptlen) +{ + u8 mac[AES_BLOCK_SIZE]; + u128 lengths; + + lengths.a = cpu_to_be64(req->assoclen * 8); + lengths.b = cpu_to_be64(cryptlen * 8); + + ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL); + + put_unaligned_be64(dg[1], mac); + put_unaligned_be64(dg[0], mac + 8); + + crypto_xor(tag, mac, AES_BLOCK_SIZE); +} + +static int gcm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); + struct skcipher_walk walk; + u8 iv[AES_BLOCK_SIZE]; + u8 ks[AES_BLOCK_SIZE]; + u8 tag[AES_BLOCK_SIZE]; + u64 dg[2] = {}; + int err; + + if (req->assoclen) + gcm_calculate_auth_mac(req, dg); + + memcpy(iv, req->iv, GCM_IV_SIZE); + put_unaligned_be32(1, iv + GCM_IV_SIZE); + + if (likely(may_use_simd())) { + kernel_neon_begin(); + + pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, + num_rounds(&ctx->aes_key)); + put_unaligned_be32(2, iv + GCM_IV_SIZE); + pmull_gcm_encrypt_block(ks, iv, NULL, + num_rounds(&ctx->aes_key)); + put_unaligned_be32(3, iv + GCM_IV_SIZE); + + err = skcipher_walk_aead_encrypt(&walk, req, true); + + while (walk.nbytes >= AES_BLOCK_SIZE) { + int blocks = walk.nbytes / AES_BLOCK_SIZE; + + pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr, + walk.src.virt.addr, &ctx->ghash_key, + iv, num_rounds(&ctx->aes_key), ks); + + err = skcipher_walk_done(&walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + } else { + __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, + num_rounds(&ctx->aes_key)); + put_unaligned_be32(2, iv + GCM_IV_SIZE); + + err = skcipher_walk_aead_encrypt(&walk, req, true); + + while (walk.nbytes >= AES_BLOCK_SIZE) { + int blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *dst = walk.dst.virt.addr; + u8 *src = walk.src.virt.addr; + + do { + __aes_arm64_encrypt(ctx->aes_key.key_enc, + ks, iv, + num_rounds(&ctx->aes_key)); + crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE); + crypto_inc(iv, AES_BLOCK_SIZE); + + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + } while (--blocks > 0); + + ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg, + walk.dst.virt.addr, &ctx->ghash_key, + NULL); + + err = skcipher_walk_done(&walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (walk.nbytes) + __aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv, + num_rounds(&ctx->aes_key)); + } + + /* handle the tail */ + if (walk.nbytes) { + u8 buf[GHASH_BLOCK_SIZE]; + + crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks, + walk.nbytes); + + memcpy(buf, walk.dst.virt.addr, walk.nbytes); + memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes); + ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL); + + err = skcipher_walk_done(&walk, 0); + } + + if (err) + return err; + + gcm_final(req, ctx, dg, tag, req->cryptlen); + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int gcm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); + unsigned int authsize = crypto_aead_authsize(aead); + struct skcipher_walk walk; + u8 iv[AES_BLOCK_SIZE]; + u8 tag[AES_BLOCK_SIZE]; + u8 buf[GHASH_BLOCK_SIZE]; + u64 dg[2] = {}; + int err; + + if (req->assoclen) + gcm_calculate_auth_mac(req, dg); + + memcpy(iv, req->iv, GCM_IV_SIZE); + put_unaligned_be32(1, iv + GCM_IV_SIZE); + + if (likely(may_use_simd())) { + kernel_neon_begin(); + + pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, + num_rounds(&ctx->aes_key)); + put_unaligned_be32(2, iv + GCM_IV_SIZE); + + err = skcipher_walk_aead_decrypt(&walk, req, true); + + while (walk.nbytes >= AES_BLOCK_SIZE) { + int blocks = walk.nbytes / AES_BLOCK_SIZE; + + pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr, + walk.src.virt.addr, &ctx->ghash_key, + iv, num_rounds(&ctx->aes_key)); + + err = skcipher_walk_done(&walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (walk.nbytes) + pmull_gcm_encrypt_block(iv, iv, NULL, + num_rounds(&ctx->aes_key)); + + kernel_neon_end(); + } else { + __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, + num_rounds(&ctx->aes_key)); + put_unaligned_be32(2, iv + GCM_IV_SIZE); + + err = skcipher_walk_aead_decrypt(&walk, req, true); + + while (walk.nbytes >= AES_BLOCK_SIZE) { + int blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *dst = walk.dst.virt.addr; + u8 *src = walk.src.virt.addr; + + ghash_do_update(blocks, dg, walk.src.virt.addr, + &ctx->ghash_key, NULL); + + do { + __aes_arm64_encrypt(ctx->aes_key.key_enc, + buf, iv, + num_rounds(&ctx->aes_key)); + crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE); + crypto_inc(iv, AES_BLOCK_SIZE); + + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + } while (--blocks > 0); + + err = skcipher_walk_done(&walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (walk.nbytes) + __aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv, + num_rounds(&ctx->aes_key)); + } + + /* handle the tail */ + if (walk.nbytes) { + memcpy(buf, walk.src.virt.addr, walk.nbytes); + memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes); + ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL); + + crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv, + walk.nbytes); + + err = skcipher_walk_done(&walk, 0); + } + + if (err) + return err; + + gcm_final(req, ctx, dg, tag, req->cryptlen - authsize); + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(buf, req->src, + req->assoclen + req->cryptlen - authsize, + authsize, 0); + + if (crypto_memneq(tag, buf, authsize)) + return -EBADMSG; + return 0; +} + +static struct aead_alg gcm_aes_alg = { + .ivsize = GCM_IV_SIZE, + .chunksize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setkey = gcm_setkey, + .setauthsize = gcm_setauthsize, + .encrypt = gcm_encrypt, + .decrypt = gcm_decrypt, + + .base.cra_name = "gcm(aes)", + .base.cra_driver_name = "gcm-aes-ce", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct gcm_aes_ctx), + .base.cra_module = THIS_MODULE, }; static int __init ghash_ce_mod_init(void) { - return crypto_register_shash(&ghash_alg); + int ret; + + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + if (elf_hwcap & HWCAP_PMULL) + pmull_ghash_update = pmull_ghash_update_p64; + + else + pmull_ghash_update = pmull_ghash_update_p8; + + ret = crypto_register_shash(&ghash_alg); + if (ret) + return ret; + + if (elf_hwcap & HWCAP_PMULL) { + ret = crypto_register_aead(&gcm_aes_alg); + if (ret) + crypto_unregister_shash(&ghash_alg); + } + return ret; } static void __exit ghash_ce_mod_exit(void) { crypto_unregister_shash(&ghash_alg); + crypto_unregister_aead(&gcm_aes_alg); } -module_cpu_feature_match(PMULL, ghash_ce_mod_init); +static const struct cpu_feature ghash_cpu_feature[] = { + { cpu_feature(PMULL) }, { } +}; +MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature); + +module_init(ghash_ce_mod_init); module_exit(ghash_ce_mod_exit); diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index ea319c055f5d..efbeb3e0dcfb 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -1,7 +1,7 @@ /* * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions * - * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,6 +9,7 @@ */ #include <asm/neon.h> +#include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/sha.h> @@ -37,8 +38,11 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data, { struct sha1_ce_state *sctx = shash_desc_ctx(desc); + if (!may_use_simd()) + return crypto_sha1_update(desc, data, len); + sctx->finalize = 0; - kernel_neon_begin_partial(16); + kernel_neon_begin(); sha1_base_do_update(desc, data, len, (sha1_block_fn *)sha1_ce_transform); kernel_neon_end(); @@ -52,13 +56,16 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, struct sha1_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); + if (!may_use_simd()) + return crypto_sha1_finup(desc, data, len, out); + /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. */ sctx->finalize = finalize; - kernel_neon_begin_partial(16); + kernel_neon_begin(); sha1_base_do_update(desc, data, len, (sha1_block_fn *)sha1_ce_transform); if (!finalize) @@ -71,8 +78,11 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out) { struct sha1_ce_state *sctx = shash_desc_ctx(desc); + if (!may_use_simd()) + return crypto_sha1_finup(desc, NULL, 0, out); + sctx->finalize = 0; - kernel_neon_begin_partial(16); + kernel_neon_begin(); sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); kernel_neon_end(); return sha1_base_finish(desc, out); diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 0ed9486f75dd..fd1ff2b13dfa 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -1,7 +1,7 @@ /* * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions * - * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,6 +9,7 @@ */ #include <asm/neon.h> +#include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/sha.h> @@ -34,13 +35,19 @@ const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, finalize); +asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks); + static int sha256_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha256_ce_state *sctx = shash_desc_ctx(desc); + if (!may_use_simd()) + return sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); + sctx->finalize = 0; - kernel_neon_begin_partial(28); + kernel_neon_begin(); sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); @@ -54,13 +61,22 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, struct sha256_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); + if (!may_use_simd()) { + if (len) + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_data_order); + return sha256_base_finish(desc, out); + } + /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. */ sctx->finalize = finalize; - kernel_neon_begin_partial(28); + kernel_neon_begin(); sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha2_ce_transform); if (!finalize) @@ -74,8 +90,14 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out) { struct sha256_ce_state *sctx = shash_desc_ctx(desc); + if (!may_use_simd()) { + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_data_order); + return sha256_base_finish(desc, out); + } + sctx->finalize = 0; - kernel_neon_begin_partial(28); + kernel_neon_begin(); sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); return sha256_base_finish(desc, out); diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index a2226f841960..b064d925fe2a 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -29,6 +29,7 @@ MODULE_ALIAS_CRYPTO("sha256"); asmlinkage void sha256_block_data_order(u32 *digest, const void *data, unsigned int num_blks); +EXPORT_SYMBOL(sha256_block_data_order); asmlinkage void sha256_block_neon(u32 *digest, const void *data, unsigned int num_blks); diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index c90930de76ba..3cd4f6b198b6 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -344,8 +344,7 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx, ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk, keystream, AES_BLOCK_SIZE); - crypto_xor((u8 *) keystream, src, nbytes); - memcpy(dst, keystream, nbytes); + crypto_xor_cpy(dst, (u8 *) keystream, src, nbytes); crypto_inc(ctrblk, AES_BLOCK_SIZE); } diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 4a55cdcdc008..5c15d6b57329 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -475,8 +475,8 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, unsigned int nbytes = walk->nbytes; aesni_enc(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); + crypto_xor_cpy(dst, keystream, src, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); } diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 17c05531dfd1..f9eca34301e2 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -271,8 +271,7 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) unsigned int nbytes = walk->nbytes; blowfish_enc_blk(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); + crypto_xor_cpy(dst, keystream, src, nbytes); crypto_inc(ctrblk, BF_BLOCK_SIZE); } diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 8648158f3916..dbea6020ffe7 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -256,8 +256,7 @@ static void ctr_crypt_final(struct blkcipher_desc *desc, unsigned int nbytes = walk->nbytes; __cast5_encrypt(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); + crypto_xor_cpy(dst, keystream, src, nbytes); crypto_inc(ctrblk, CAST5_BLOCK_SIZE); } diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index d6fc59aaaadf..30c0a37f4882 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -277,8 +277,7 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, unsigned int nbytes = walk->nbytes; des3_ede_enc_blk(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); + crypto_xor_cpy(dst, keystream, src, nbytes); crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); } diff --git a/crypto/Kconfig b/crypto/Kconfig index caa770e535a2..0a121f9ddf8e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1753,6 +1753,8 @@ config CRYPTO_USER_API_AEAD tristate "User-space interface for AEAD cipher algorithms" depends on NET select CRYPTO_AEAD + select CRYPTO_BLKCIPHER + select CRYPTO_NULL select CRYPTO_USER_API help This option enables the user-spaces interface for AEAD diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 92a3d540d920..ffa9f4ccd9b4 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/net.h> #include <linux/rwsem.h> +#include <linux/sched/signal.h> #include <linux/security.h> struct alg_type_list { @@ -507,6 +508,696 @@ void af_alg_complete(struct crypto_async_request *req, int err) } EXPORT_SYMBOL_GPL(af_alg_complete); +/** + * af_alg_alloc_tsgl - allocate the TX SGL + * + * @sk socket of connection to user space + * @return: 0 upon success, < 0 upon error + */ +int af_alg_alloc_tsgl(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + struct af_alg_tsgl *sgl; + struct scatterlist *sg = NULL; + + sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list); + if (!list_empty(&ctx->tsgl_list)) + sg = sgl->sg; + + if (!sg || sgl->cur >= MAX_SGL_ENTS) { + sgl = sock_kmalloc(sk, sizeof(*sgl) + + sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1), + GFP_KERNEL); + if (!sgl) + return -ENOMEM; + + sg_init_table(sgl->sg, MAX_SGL_ENTS + 1); + sgl->cur = 0; + + if (sg) + sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg); + + list_add_tail(&sgl->list, &ctx->tsgl_list); + } + + return 0; +} +EXPORT_SYMBOL_GPL(af_alg_alloc_tsgl); + +/** + * aead_count_tsgl - Count number of TX SG entries + * + * The counting starts from the beginning of the SGL to @bytes. If + * an offset is provided, the counting of the SG entries starts at the offset. + * + * @sk socket of connection to user space + * @bytes Count the number of SG entries holding given number of bytes. + * @offset Start the counting of SG entries from the given offset. + * @return Number of TX SG entries found given the constraints + */ +unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + struct af_alg_tsgl *sgl, *tmp; + unsigned int i; + unsigned int sgl_count = 0; + + if (!bytes) + return 0; + + list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) { + struct scatterlist *sg = sgl->sg; + + for (i = 0; i < sgl->cur; i++) { + size_t bytes_count; + + /* Skip offset */ + if (offset >= sg[i].length) { + offset -= sg[i].length; + bytes -= sg[i].length; + continue; + } + + bytes_count = sg[i].length - offset; + + offset = 0; + sgl_count++; + + /* If we have seen requested number of bytes, stop */ + if (bytes_count >= bytes) + return sgl_count; + + bytes -= bytes_count; + } + } + + return sgl_count; +} +EXPORT_SYMBOL_GPL(af_alg_count_tsgl); + +/** + * aead_pull_tsgl - Release the specified buffers from TX SGL + * + * If @dst is non-null, reassign the pages to dst. The caller must release + * the pages. If @dst_offset is given only reassign the pages to @dst starting + * at the @dst_offset (byte). The caller must ensure that @dst is large + * enough (e.g. by using af_alg_count_tsgl with the same offset). + * + * @sk socket of connection to user space + * @used Number of bytes to pull from TX SGL + * @dst If non-NULL, buffer is reassigned to dst SGL instead of releasing. The + * caller must release the buffers in dst. + * @dst_offset Reassign the TX SGL from given offset. All buffers before + * reaching the offset is released. + */ +void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, + size_t dst_offset) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + struct af_alg_tsgl *sgl; + struct scatterlist *sg; + unsigned int i, j; + + while (!list_empty(&ctx->tsgl_list)) { + sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, + list); + sg = sgl->sg; + + for (i = 0, j = 0; i < sgl->cur; i++) { + size_t plen = min_t(size_t, used, sg[i].length); + struct page *page = sg_page(sg + i); + + if (!page) + continue; + + /* + * Assumption: caller created af_alg_count_tsgl(len) + * SG entries in dst. + */ + if (dst) { + if (dst_offset >= plen) { + /* discard page before offset */ + dst_offset -= plen; + } else { + /* reassign page to dst after offset */ + get_page(page); + sg_set_page(dst + j, page, + plen - dst_offset, + sg[i].offset + dst_offset); + dst_offset = 0; + j++; + } + } + + sg[i].length -= plen; + sg[i].offset += plen; + + used -= plen; + ctx->used -= plen; + + if (sg[i].length) + return; + + put_page(page); + sg_assign_page(sg + i, NULL); + } + + list_del(&sgl->list); + sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) * + (MAX_SGL_ENTS + 1)); + } + + if (!ctx->used) + ctx->merge = 0; +} +EXPORT_SYMBOL_GPL(af_alg_pull_tsgl); + +/** + * af_alg_free_areq_sgls - Release TX and RX SGLs of the request + * + * @areq Request holding the TX and RX SGL + */ +void af_alg_free_areq_sgls(struct af_alg_async_req *areq) +{ + struct sock *sk = areq->sk; + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + struct af_alg_rsgl *rsgl, *tmp; + struct scatterlist *tsgl; + struct scatterlist *sg; + unsigned int i; + + list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) { + ctx->rcvused -= rsgl->sg_num_bytes; + af_alg_free_sg(&rsgl->sgl); + list_del(&rsgl->list); + if (rsgl != &areq->first_rsgl) + sock_kfree_s(sk, rsgl, sizeof(*rsgl)); + } + + tsgl = areq->tsgl; + for_each_sg(tsgl, sg, areq->tsgl_entries, i) { + if (!sg_page(sg)) + continue; + put_page(sg_page(sg)); + } + + if (areq->tsgl && areq->tsgl_entries) + sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl)); +} +EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls); + +/** + * af_alg_wait_for_wmem - wait for availability of writable memory + * + * @sk socket of connection to user space + * @flags If MSG_DONTWAIT is set, then only report if function would sleep + * @return 0 when writable memory is available, < 0 upon error + */ +int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int err = -ERESTARTSYS; + long timeout; + + if (flags & MSG_DONTWAIT) + return -EAGAIN; + + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); + + add_wait_queue(sk_sleep(sk), &wait); + for (;;) { + if (signal_pending(current)) + break; + timeout = MAX_SCHEDULE_TIMEOUT; + if (sk_wait_event(sk, &timeout, af_alg_writable(sk), &wait)) { + err = 0; + break; + } + } + remove_wait_queue(sk_sleep(sk), &wait); + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_wait_for_wmem); + +/** + * af_alg_wmem_wakeup - wakeup caller when writable memory is available + * + * @sk socket of connection to user space + */ +void af_alg_wmem_wakeup(struct sock *sk) +{ + struct socket_wq *wq; + + if (!af_alg_writable(sk)) + return; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (skwq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | + POLLRDNORM | + POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup); + +/** + * af_alg_wait_for_data - wait for availability of TX data + * + * @sk socket of connection to user space + * @flags If MSG_DONTWAIT is set, then only report if function would sleep + * @return 0 when writable memory is available, < 0 upon error + */ +int af_alg_wait_for_data(struct sock *sk, unsigned flags) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + long timeout; + int err = -ERESTARTSYS; + + if (flags & MSG_DONTWAIT) + return -EAGAIN; + + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + + add_wait_queue(sk_sleep(sk), &wait); + for (;;) { + if (signal_pending(current)) + break; + timeout = MAX_SCHEDULE_TIMEOUT; + if (sk_wait_event(sk, &timeout, (ctx->used || !ctx->more), + &wait)) { + err = 0; + break; + } + } + remove_wait_queue(sk_sleep(sk), &wait); + + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_wait_for_data); + +/** + * af_alg_data_wakeup - wakeup caller when new data can be sent to kernel + * + * @sk socket of connection to user space + */ + +void af_alg_data_wakeup(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + struct socket_wq *wq; + + if (!ctx->used) + return; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (skwq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | + POLLRDNORM | + POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(af_alg_data_wakeup); + +/** + * af_alg_sendmsg - implementation of sendmsg system call handler + * + * The sendmsg system call handler obtains the user data and stores it + * in ctx->tsgl_list. This implies allocation of the required numbers of + * struct af_alg_tsgl. + * + * In addition, the ctx is filled with the information sent via CMSG. + * + * @sock socket of connection to user space + * @msg message from user space + * @size size of message from user space + * @ivsize the size of the IV for the cipher operation to verify that the + * user-space-provided IV has the right size + * @return the number of copied data upon success, < 0 upon error + */ +int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, + unsigned int ivsize) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + struct af_alg_tsgl *sgl; + struct af_alg_control con = {}; + long copied = 0; + bool enc = 0; + bool init = 0; + int err = 0; + + if (msg->msg_controllen) { + err = af_alg_cmsg_send(msg, &con); + if (err) + return err; + + init = 1; + switch (con.op) { + case ALG_OP_ENCRYPT: + enc = 1; + break; + case ALG_OP_DECRYPT: + enc = 0; + break; + default: + return -EINVAL; + } + + if (con.iv && con.iv->ivlen != ivsize) + return -EINVAL; + } + + lock_sock(sk); + if (!ctx->more && ctx->used) { + err = -EINVAL; + goto unlock; + } + + if (init) { + ctx->enc = enc; + if (con.iv) + memcpy(ctx->iv, con.iv->iv, ivsize); + + ctx->aead_assoclen = con.aead_assoclen; + } + + while (size) { + struct scatterlist *sg; + size_t len = size; + size_t plen; + + /* use the existing memory in an allocated page */ + if (ctx->merge) { + sgl = list_entry(ctx->tsgl_list.prev, + struct af_alg_tsgl, list); + sg = sgl->sg + sgl->cur - 1; + len = min_t(size_t, len, + PAGE_SIZE - sg->offset - sg->length); + + err = memcpy_from_msg(page_address(sg_page(sg)) + + sg->offset + sg->length, + msg, len); + if (err) + goto unlock; + + sg->length += len; + ctx->merge = (sg->offset + sg->length) & + (PAGE_SIZE - 1); + + ctx->used += len; + copied += len; + size -= len; + continue; + } + + if (!af_alg_writable(sk)) { + err = af_alg_wait_for_wmem(sk, msg->msg_flags); + if (err) + goto unlock; + } + + /* allocate a new page */ + len = min_t(unsigned long, len, af_alg_sndbuf(sk)); + + err = af_alg_alloc_tsgl(sk); + if (err) + goto unlock; + + sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, + list); + sg = sgl->sg; + if (sgl->cur) + sg_unmark_end(sg + sgl->cur - 1); + + do { + unsigned int i = sgl->cur; + + plen = min_t(size_t, len, PAGE_SIZE); + + sg_assign_page(sg + i, alloc_page(GFP_KERNEL)); + if (!sg_page(sg + i)) { + err = -ENOMEM; + goto unlock; + } + + err = memcpy_from_msg(page_address(sg_page(sg + i)), + msg, plen); + if (err) { + __free_page(sg_page(sg + i)); + sg_assign_page(sg + i, NULL); + goto unlock; + } + + sg[i].length = plen; + len -= plen; + ctx->used += plen; + copied += plen; + size -= plen; + sgl->cur++; + } while (len && sgl->cur < MAX_SGL_ENTS); + + if (!size) + sg_mark_end(sg + sgl->cur - 1); + + ctx->merge = plen & (PAGE_SIZE - 1); + } + + err = 0; + + ctx->more = msg->msg_flags & MSG_MORE; + +unlock: + af_alg_data_wakeup(sk); + release_sock(sk); + + return copied ?: err; +} +EXPORT_SYMBOL_GPL(af_alg_sendmsg); + +/** + * af_alg_sendpage - sendpage system call handler + * + * This is a generic implementation of sendpage to fill ctx->tsgl_list. + */ +ssize_t af_alg_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + struct af_alg_tsgl *sgl; + int err = -EINVAL; + + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + + lock_sock(sk); + if (!ctx->more && ctx->used) + goto unlock; + + if (!size) + goto done; + + if (!af_alg_writable(sk)) { + err = af_alg_wait_for_wmem(sk, flags); + if (err) + goto unlock; + } + + err = af_alg_alloc_tsgl(sk); + if (err) + goto unlock; + + ctx->merge = 0; + sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list); + + if (sgl->cur) + sg_unmark_end(sgl->sg + sgl->cur - 1); + + sg_mark_end(sgl->sg + sgl->cur); + + get_page(page); + sg_set_page(sgl->sg + sgl->cur, page, size, offset); + sgl->cur++; + ctx->used += size; + +done: + ctx->more = flags & MSG_MORE; + +unlock: + af_alg_data_wakeup(sk); + release_sock(sk); + + return err ?: size; +} +EXPORT_SYMBOL_GPL(af_alg_sendpage); + +/** + * af_alg_async_cb - AIO callback handler + * + * This handler cleans up the struct af_alg_async_req upon completion of the + * AIO operation. + * + * The number of bytes to be generated with the AIO operation must be set + * in areq->outlen before the AIO callback handler is invoked. + */ +void af_alg_async_cb(struct crypto_async_request *_req, int err) +{ + struct af_alg_async_req *areq = _req->data; + struct sock *sk = areq->sk; + struct kiocb *iocb = areq->iocb; + unsigned int resultlen; + + lock_sock(sk); + + /* Buffer size written by crypto operation. */ + resultlen = areq->outlen; + + af_alg_free_areq_sgls(areq); + sock_kfree_s(sk, areq, areq->areqlen); + __sock_put(sk); + + iocb->ki_complete(iocb, err ? err : resultlen, 0); + + release_sock(sk); +} +EXPORT_SYMBOL_GPL(af_alg_async_cb); + +/** + * af_alg_poll - poll system call handler + */ +unsigned int af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + unsigned int mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (!ctx->more || ctx->used) + mask |= POLLIN | POLLRDNORM; + + if (af_alg_writable(sk)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; +} +EXPORT_SYMBOL_GPL(af_alg_poll); + +/** + * af_alg_alloc_areq - allocate struct af_alg_async_req + * + * @sk socket of connection to user space + * @areqlen size of struct af_alg_async_req + crypto_*_reqsize + * @return allocated data structure or ERR_PTR upon error + */ +struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk, + unsigned int areqlen) +{ + struct af_alg_async_req *areq = sock_kmalloc(sk, areqlen, GFP_KERNEL); + + if (unlikely(!areq)) + return ERR_PTR(-ENOMEM); + + areq->areqlen = areqlen; + areq->sk = sk; + areq->last_rsgl = NULL; + INIT_LIST_HEAD(&areq->rsgl_list); + areq->tsgl = NULL; + areq->tsgl_entries = 0; + + return areq; +} +EXPORT_SYMBOL_GPL(af_alg_alloc_areq); + +/** + * af_alg_get_rsgl - create the RX SGL for the output data from the crypto + * operation + * + * @sk socket of connection to user space + * @msg user space message + * @flags flags used to invoke recvmsg with + * @areq instance of the cryptographic request that will hold the RX SGL + * @maxsize maximum number of bytes to be pulled from user space + * @outlen number of bytes in the RX SGL + * @return 0 on success, < 0 upon error + */ +int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, + struct af_alg_async_req *areq, size_t maxsize, + size_t *outlen) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + size_t len = 0; + + while (maxsize > len && msg_data_left(msg)) { + struct af_alg_rsgl *rsgl; + size_t seglen; + int err; + + /* limit the amount of readable buffers */ + if (!af_alg_readable(sk)) + break; + + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + + seglen = min_t(size_t, (maxsize - len), + msg_data_left(msg)); + + if (list_empty(&areq->rsgl_list)) { + rsgl = &areq->first_rsgl; + } else { + rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL); + if (unlikely(!rsgl)) + return -ENOMEM; + } + + rsgl->sgl.npages = 0; + list_add_tail(&rsgl->list, &areq->rsgl_list); + + /* make one iovec available as scatterlist */ + err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen); + if (err < 0) + return err; + + /* chain the new scatterlist with previous one */ + if (areq->last_rsgl) + af_alg_link_sg(&areq->last_rsgl->sgl, &rsgl->sgl); + + areq->last_rsgl = rsgl; + len += err; + ctx->rcvused += err; + rsgl->sg_num_bytes = err; + iov_iter_advance(&msg->msg_iter, err); + } + + *outlen = len; + return 0; +} +EXPORT_SYMBOL_GPL(af_alg_get_rsgl); + static int __init af_alg_init(void) { int err = proto_register(&alg_proto, 0); diff --git a/crypto/ahash.c b/crypto/ahash.c index 826cd7ab4d4a..5e8666e6ccae 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -588,6 +588,35 @@ int crypto_unregister_ahash(struct ahash_alg *alg) } EXPORT_SYMBOL_GPL(crypto_unregister_ahash); +int crypto_register_ahashes(struct ahash_alg *algs, int count) +{ + int i, ret; + + for (i = 0; i < count; i++) { + ret = crypto_register_ahash(&algs[i]); + if (ret) + goto err; + } + + return 0; + +err: + for (--i; i >= 0; --i) + crypto_unregister_ahash(&algs[i]); + + return ret; +} +EXPORT_SYMBOL_GPL(crypto_register_ahashes); + +void crypto_unregister_ahashes(struct ahash_alg *algs, int count) +{ + int i; + + for (i = count - 1; i >= 0; --i) + crypto_unregister_ahash(&algs[i]); +} +EXPORT_SYMBOL_GPL(crypto_unregister_ahashes); + int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst) { diff --git a/crypto/algapi.c b/crypto/algapi.c index e4cc7615a139..aa699ff6c876 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -975,13 +975,15 @@ void crypto_inc(u8 *a, unsigned int size) } EXPORT_SYMBOL_GPL(crypto_inc); -void __crypto_xor(u8 *dst, const u8 *src, unsigned int len) +void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len) { int relalign = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { int size = sizeof(unsigned long); - int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1); + int d = (((unsigned long)dst ^ (unsigned long)src1) | + ((unsigned long)dst ^ (unsigned long)src2)) & + (size - 1); relalign = d ? 1 << __ffs(d) : size; @@ -992,34 +994,37 @@ void __crypto_xor(u8 *dst, const u8 *src, unsigned int len) * process the remainder of the input using optimal strides. */ while (((unsigned long)dst & (relalign - 1)) && len > 0) { - *dst++ ^= *src++; + *dst++ = *src1++ ^ *src2++; len--; } } while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) { - *(u64 *)dst ^= *(u64 *)src; + *(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2; dst += 8; - src += 8; + src1 += 8; + src2 += 8; len -= 8; } while (len >= 4 && !(relalign & 3)) { - *(u32 *)dst ^= *(u32 *)src; + *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2; dst += 4; - src += 4; + src1 += 4; + src2 += 4; len -= 4; } while (len >= 2 && !(relalign & 1)) { - *(u16 *)dst ^= *(u16 *)src; + *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2; dst += 2; - src += 2; + src1 += 2; + src2 += 2; len -= 2; } while (len--) - *dst++ ^= *src++; + *dst++ = *src1++ ^ *src2++; } EXPORT_SYMBOL_GPL(__crypto_xor); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index be117495eb43..516b38c3a169 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -5,88 +5,56 @@ * * This file provides the user-space API for AEAD ciphers. * - * This file is derived from algif_skcipher.c. - * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. + * + * The following concept of the memory management is used: + * + * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is + * filled by user space with the data submitted via sendpage/sendmsg. Filling + * up the TX SGL does not cause a crypto operation -- the data will only be + * tracked by the kernel. Upon receipt of one recvmsg call, the caller must + * provide a buffer which is tracked with the RX SGL. + * + * During the processing of the recvmsg operation, the cipher request is + * allocated and prepared. As part of the recvmsg operation, the processed + * TX buffers are extracted from the TX SGL into a separate SGL. + * + * After the completion of the crypto operation, the RX SGL and the cipher + * request is released. The extracted TX SGL parts are released together with + * the RX SGL release. */ #include <crypto/internal/aead.h> #include <crypto/scatterwalk.h> #include <crypto/if_alg.h> +#include <crypto/skcipher.h> +#include <crypto/null.h> #include <linux/init.h> #include <linux/list.h> #include <linux/kernel.h> -#include <linux/sched/signal.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/net.h> #include <net/sock.h> -struct aead_sg_list { - unsigned int cur; - struct scatterlist sg[ALG_MAX_PAGES]; -}; - -struct aead_async_rsgl { - struct af_alg_sgl sgl; - struct list_head list; -}; - -struct aead_async_req { - struct scatterlist *tsgl; - struct aead_async_rsgl first_rsgl; - struct list_head list; - struct kiocb *iocb; - struct sock *sk; - unsigned int tsgls; - char iv[]; -}; - struct aead_tfm { struct crypto_aead *aead; bool has_key; + struct crypto_skcipher *null_tfm; }; -struct aead_ctx { - struct aead_sg_list tsgl; - struct aead_async_rsgl first_rsgl; - struct list_head list; - - void *iv; - - struct af_alg_completion completion; - - unsigned long used; - - unsigned int len; - bool more; - bool merge; - bool enc; - - size_t aead_assoclen; - struct aead_request aead_req; -}; - -static inline int aead_sndbuf(struct sock *sk) +static inline bool aead_sufficient_data(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - - return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) - - ctx->used, 0); -} - -static inline bool aead_writable(struct sock *sk) -{ - return PAGE_SIZE <= aead_sndbuf(sk); -} - -static inline bool aead_sufficient_data(struct aead_ctx *ctx) -{ - unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req)); + struct sock *psk = ask->parent; + struct alg_sock *pask = alg_sk(psk); + struct af_alg_ctx *ctx = ask->private; + struct aead_tfm *aeadc = pask->private; + struct crypto_aead *tfm = aeadc->aead; + unsigned int as = crypto_aead_authsize(tfm); /* * The minimum amount of memory needed for an AEAD cipher is @@ -95,484 +63,58 @@ static inline bool aead_sufficient_data(struct aead_ctx *ctx) return ctx->used >= ctx->aead_assoclen + (ctx->enc ? 0 : as); } -static void aead_reset_ctx(struct aead_ctx *ctx) -{ - struct aead_sg_list *sgl = &ctx->tsgl; - - sg_init_table(sgl->sg, ALG_MAX_PAGES); - sgl->cur = 0; - ctx->used = 0; - ctx->more = 0; - ctx->merge = 0; -} - -static void aead_put_sgl(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - struct aead_sg_list *sgl = &ctx->tsgl; - struct scatterlist *sg = sgl->sg; - unsigned int i; - - for (i = 0; i < sgl->cur; i++) { - if (!sg_page(sg + i)) - continue; - - put_page(sg_page(sg + i)); - sg_assign_page(sg + i, NULL); - } - aead_reset_ctx(ctx); -} - -static void aead_wmem_wakeup(struct sock *sk) -{ - struct socket_wq *wq; - - if (!aead_writable(sk)) - return; - - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | - POLLRDNORM | - POLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); - rcu_read_unlock(); -} - -static int aead_wait_for_data(struct sock *sk, unsigned flags) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - long timeout; - int err = -ERESTARTSYS; - - if (flags & MSG_DONTWAIT) - return -EAGAIN; - - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - add_wait_queue(sk_sleep(sk), &wait); - for (;;) { - if (signal_pending(current)) - break; - timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, !ctx->more, &wait)) { - err = 0; - break; - } - } - remove_wait_queue(sk_sleep(sk), &wait); - - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - - return err; -} - -static void aead_data_wakeup(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - struct socket_wq *wq; - - if (ctx->more) - return; - if (!ctx->used) - return; - - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | - POLLRDNORM | - POLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); - rcu_read_unlock(); -} - static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - unsigned ivsize = - crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req)); - struct aead_sg_list *sgl = &ctx->tsgl; - struct af_alg_control con = {}; - long copied = 0; - bool enc = 0; - bool init = 0; - int err = -EINVAL; - - if (msg->msg_controllen) { - err = af_alg_cmsg_send(msg, &con); - if (err) - return err; - - init = 1; - switch (con.op) { - case ALG_OP_ENCRYPT: - enc = 1; - break; - case ALG_OP_DECRYPT: - enc = 0; - break; - default: - return -EINVAL; - } - - if (con.iv && con.iv->ivlen != ivsize) - return -EINVAL; - } - - lock_sock(sk); - if (!ctx->more && ctx->used) - goto unlock; - - if (init) { - ctx->enc = enc; - if (con.iv) - memcpy(ctx->iv, con.iv->iv, ivsize); - - ctx->aead_assoclen = con.aead_assoclen; - } - - while (size) { - size_t len = size; - struct scatterlist *sg = NULL; - - /* use the existing memory in an allocated page */ - if (ctx->merge) { - sg = sgl->sg + sgl->cur - 1; - len = min_t(unsigned long, len, - PAGE_SIZE - sg->offset - sg->length); - err = memcpy_from_msg(page_address(sg_page(sg)) + - sg->offset + sg->length, - msg, len); - if (err) - goto unlock; - - sg->length += len; - ctx->merge = (sg->offset + sg->length) & - (PAGE_SIZE - 1); - - ctx->used += len; - copied += len; - size -= len; - continue; - } - - if (!aead_writable(sk)) { - /* user space sent too much data */ - aead_put_sgl(sk); - err = -EMSGSIZE; - goto unlock; - } - - /* allocate a new page */ - len = min_t(unsigned long, size, aead_sndbuf(sk)); - while (len) { - size_t plen = 0; - - if (sgl->cur >= ALG_MAX_PAGES) { - aead_put_sgl(sk); - err = -E2BIG; - goto unlock; - } - - sg = sgl->sg + sgl->cur; - plen = min_t(size_t, len, PAGE_SIZE); - - sg_assign_page(sg, alloc_page(GFP_KERNEL)); - err = -ENOMEM; - if (!sg_page(sg)) - goto unlock; - - err = memcpy_from_msg(page_address(sg_page(sg)), - msg, plen); - if (err) { - __free_page(sg_page(sg)); - sg_assign_page(sg, NULL); - goto unlock; - } - - sg->offset = 0; - sg->length = plen; - len -= plen; - ctx->used += plen; - copied += plen; - sgl->cur++; - size -= plen; - ctx->merge = plen & (PAGE_SIZE - 1); - } - } - - err = 0; - - ctx->more = msg->msg_flags & MSG_MORE; - if (!ctx->more && !aead_sufficient_data(ctx)) { - aead_put_sgl(sk); - err = -EMSGSIZE; - } - -unlock: - aead_data_wakeup(sk); - release_sock(sk); - - return err ?: copied; -} - -static ssize_t aead_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - struct sock *sk = sock->sk; - struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - struct aead_sg_list *sgl = &ctx->tsgl; - int err = -EINVAL; - - if (flags & MSG_SENDPAGE_NOTLAST) - flags |= MSG_MORE; - - if (sgl->cur >= ALG_MAX_PAGES) - return -E2BIG; - - lock_sock(sk); - if (!ctx->more && ctx->used) - goto unlock; - - if (!size) - goto done; - - if (!aead_writable(sk)) { - /* user space sent too much data */ - aead_put_sgl(sk); - err = -EMSGSIZE; - goto unlock; - } - - ctx->merge = 0; - - get_page(page); - sg_set_page(sgl->sg + sgl->cur, page, size, offset); - sgl->cur++; - ctx->used += size; - - err = 0; - -done: - ctx->more = flags & MSG_MORE; - if (!ctx->more && !aead_sufficient_data(ctx)) { - aead_put_sgl(sk); - err = -EMSGSIZE; - } + struct sock *psk = ask->parent; + struct alg_sock *pask = alg_sk(psk); + struct aead_tfm *aeadc = pask->private; + struct crypto_aead *tfm = aeadc->aead; + unsigned int ivsize = crypto_aead_ivsize(tfm); -unlock: - aead_data_wakeup(sk); - release_sock(sk); - - return err ?: size; + return af_alg_sendmsg(sock, msg, size, ivsize); } -#define GET_ASYM_REQ(req, tfm) (struct aead_async_req *) \ - ((char *)req + sizeof(struct aead_request) + \ - crypto_aead_reqsize(tfm)) - - #define GET_REQ_SIZE(tfm) sizeof(struct aead_async_req) + \ - crypto_aead_reqsize(tfm) + crypto_aead_ivsize(tfm) + \ - sizeof(struct aead_request) - -static void aead_async_cb(struct crypto_async_request *_req, int err) +static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm, + struct scatterlist *src, + struct scatterlist *dst, unsigned int len) { - struct aead_request *req = _req->data; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aead_async_req *areq = GET_ASYM_REQ(req, tfm); - struct sock *sk = areq->sk; - struct scatterlist *sg = areq->tsgl; - struct aead_async_rsgl *rsgl; - struct kiocb *iocb = areq->iocb; - unsigned int i, reqlen = GET_REQ_SIZE(tfm); - - list_for_each_entry(rsgl, &areq->list, list) { - af_alg_free_sg(&rsgl->sgl); - if (rsgl != &areq->first_rsgl) - sock_kfree_s(sk, rsgl, sizeof(*rsgl)); - } + SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm); - for (i = 0; i < areq->tsgls; i++) - put_page(sg_page(sg + i)); + skcipher_request_set_tfm(skreq, null_tfm); + skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG, + NULL, NULL); + skcipher_request_set_crypt(skreq, src, dst, len, NULL); - sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls); - sock_kfree_s(sk, req, reqlen); - __sock_put(sk); - iocb->ki_complete(iocb, err, err); + return crypto_skcipher_encrypt(skreq); } -static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg, - int flags) +static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - struct crypto_aead *tfm = crypto_aead_reqtfm(&ctx->aead_req); - struct aead_async_req *areq; - struct aead_request *req = NULL; - struct aead_sg_list *sgl = &ctx->tsgl; - struct aead_async_rsgl *last_rsgl = NULL, *rsgl; + struct sock *psk = ask->parent; + struct alg_sock *pask = alg_sk(psk); + struct af_alg_ctx *ctx = ask->private; + struct aead_tfm *aeadc = pask->private; + struct crypto_aead *tfm = aeadc->aead; + struct crypto_skcipher *null_tfm = aeadc->null_tfm; unsigned int as = crypto_aead_authsize(tfm); - unsigned int i, reqlen = GET_REQ_SIZE(tfm); - int err = -ENOMEM; - unsigned long used; - size_t outlen = 0; - size_t usedpages = 0; - - lock_sock(sk); - if (ctx->more) { - err = aead_wait_for_data(sk, flags); - if (err) - goto unlock; - } - - if (!aead_sufficient_data(ctx)) - goto unlock; - - used = ctx->used; - if (ctx->enc) - outlen = used + as; - else - outlen = used - as; - - req = sock_kmalloc(sk, reqlen, GFP_KERNEL); - if (unlikely(!req)) - goto unlock; - - areq = GET_ASYM_REQ(req, tfm); - memset(&areq->first_rsgl, '\0', sizeof(areq->first_rsgl)); - INIT_LIST_HEAD(&areq->list); - areq->iocb = msg->msg_iocb; - areq->sk = sk; - memcpy(areq->iv, ctx->iv, crypto_aead_ivsize(tfm)); - aead_request_set_tfm(req, tfm); - aead_request_set_ad(req, ctx->aead_assoclen); - aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - aead_async_cb, req); - used -= ctx->aead_assoclen; - - /* take over all tx sgls from ctx */ - areq->tsgl = sock_kmalloc(sk, - sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1), - GFP_KERNEL); - if (unlikely(!areq->tsgl)) - goto free; - - sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1)); - for (i = 0; i < sgl->cur; i++) - sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]), - sgl->sg[i].length, sgl->sg[i].offset); - - areq->tsgls = sgl->cur; - - /* create rx sgls */ - while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) { - size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter), - (outlen - usedpages)); - - if (list_empty(&areq->list)) { - rsgl = &areq->first_rsgl; - - } else { - rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL); - if (unlikely(!rsgl)) { - err = -ENOMEM; - goto free; - } - } - rsgl->sgl.npages = 0; - list_add_tail(&rsgl->list, &areq->list); - - /* make one iovec available as scatterlist */ - err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen); - if (err < 0) - goto free; - - usedpages += err; - - /* chain the new scatterlist with previous one */ - if (last_rsgl) - af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl); - - last_rsgl = rsgl; - - iov_iter_advance(&msg->msg_iter, err); - } - - /* ensure output buffer is sufficiently large */ - if (usedpages < outlen) { - err = -EINVAL; - goto unlock; - } - - aead_request_set_crypt(req, areq->tsgl, areq->first_rsgl.sgl.sg, used, - areq->iv); - err = ctx->enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req); - if (err) { - if (err == -EINPROGRESS) { - sock_hold(sk); - err = -EIOCBQUEUED; - aead_reset_ctx(ctx); - goto unlock; - } else if (err == -EBADMSG) { - aead_put_sgl(sk); - } - goto free; - } - aead_put_sgl(sk); - -free: - list_for_each_entry(rsgl, &areq->list, list) { - af_alg_free_sg(&rsgl->sgl); - if (rsgl != &areq->first_rsgl) - sock_kfree_s(sk, rsgl, sizeof(*rsgl)); - } - if (areq->tsgl) - sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls); - if (req) - sock_kfree_s(sk, req, reqlen); -unlock: - aead_wmem_wakeup(sk); - release_sock(sk); - return err ? err : outlen; -} - -static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags) -{ - struct sock *sk = sock->sk; - struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req)); - struct aead_sg_list *sgl = &ctx->tsgl; - struct aead_async_rsgl *last_rsgl = NULL; - struct aead_async_rsgl *rsgl, *tmp; - int err = -EINVAL; - unsigned long used = 0; - size_t outlen = 0; - size_t usedpages = 0; - - lock_sock(sk); + struct af_alg_async_req *areq; + struct af_alg_tsgl *tsgl; + struct scatterlist *src; + int err = 0; + size_t used = 0; /* [in] TX bufs to be en/decrypted */ + size_t outlen = 0; /* [out] RX bufs produced by kernel */ + size_t usedpages = 0; /* [in] RX bufs to be used from user */ + size_t processed = 0; /* [in] TX bufs to be consumed */ /* - * Please see documentation of aead_request_set_crypt for the - * description of the AEAD memory structure expected from the caller. + * Data length provided by caller via sendmsg/sendpage that has not + * yet been processed. */ - - if (ctx->more) { - err = aead_wait_for_data(sk, flags); - if (err) - goto unlock; - } - - /* data length provided by caller via sendmsg/sendpage */ used = ctx->used; /* @@ -584,8 +126,8 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags) * the error message in sendmsg/sendpage and still call recvmsg. This * check here protects the kernel integrity. */ - if (!aead_sufficient_data(ctx)) - goto unlock; + if (!aead_sufficient_data(sk)) + return -EINVAL; /* * Calculate the minimum output buffer size holding the result of the @@ -606,104 +148,191 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags) */ used -= ctx->aead_assoclen; - /* convert iovecs of output buffers into scatterlists */ - while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) { - size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter), - (outlen - usedpages)); - - if (list_empty(&ctx->list)) { - rsgl = &ctx->first_rsgl; - } else { - rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL); - if (unlikely(!rsgl)) { - err = -ENOMEM; - goto unlock; - } - } - rsgl->sgl.npages = 0; - list_add_tail(&rsgl->list, &ctx->list); + /* Allocate cipher request for current operation. */ + areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + + crypto_aead_reqsize(tfm)); + if (IS_ERR(areq)) + return PTR_ERR(areq); - /* make one iovec available as scatterlist */ - err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen); - if (err < 0) - goto unlock; - usedpages += err; - /* chain the new scatterlist with previous one */ - if (last_rsgl) - af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl); + /* convert iovecs of output buffers into RX SGL */ + err = af_alg_get_rsgl(sk, msg, flags, areq, outlen, &usedpages); + if (err) + goto free; - last_rsgl = rsgl; + /* + * Ensure output buffer is sufficiently large. If the caller provides + * less buffer space, only use the relative required input size. This + * allows AIO operation where the caller sent all data to be processed + * and the AIO operation performs the operation on the different chunks + * of the input data. + */ + if (usedpages < outlen) { + size_t less = outlen - usedpages; - iov_iter_advance(&msg->msg_iter, err); + if (used < less) { + err = -EINVAL; + goto free; + } + used -= less; + outlen -= less; } - /* ensure output buffer is sufficiently large */ - if (usedpages < outlen) { - err = -EINVAL; - goto unlock; - } + processed = used + ctx->aead_assoclen; + tsgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, list); - sg_mark_end(sgl->sg + sgl->cur - 1); - aead_request_set_crypt(&ctx->aead_req, sgl->sg, ctx->first_rsgl.sgl.sg, - used, ctx->iv); - aead_request_set_ad(&ctx->aead_req, ctx->aead_assoclen); + /* + * Copy of AAD from source to destination + * + * The AAD is copied to the destination buffer without change. Even + * when user space uses an in-place cipher operation, the kernel + * will copy the data as it does not see whether such in-place operation + * is initiated. + * + * To ensure efficiency, the following implementation ensure that the + * ciphers are invoked to perform a crypto operation in-place. This + * is achieved by memory management specified as follows. + */ - err = af_alg_wait_for_completion(ctx->enc ? - crypto_aead_encrypt(&ctx->aead_req) : - crypto_aead_decrypt(&ctx->aead_req), - &ctx->completion); + /* Use the RX SGL as source (and destination) for crypto op. */ + src = areq->first_rsgl.sgl.sg; + + if (ctx->enc) { + /* + * Encryption operation - The in-place cipher operation is + * achieved by the following operation: + * + * TX SGL: AAD || PT + * | | + * | copy | + * v v + * RX SGL: AAD || PT || Tag + */ + err = crypto_aead_copy_sgl(null_tfm, tsgl->sg, + areq->first_rsgl.sgl.sg, processed); + if (err) + goto free; + af_alg_pull_tsgl(sk, processed, NULL, 0); + } else { + /* + * Decryption operation - To achieve an in-place cipher + * operation, the following SGL structure is used: + * + * TX SGL: AAD || CT || Tag + * | | ^ + * | copy | | Create SGL link. + * v v | + * RX SGL: AAD || CT ----+ + */ + + /* Copy AAD || CT to RX SGL buffer for in-place operation. */ + err = crypto_aead_copy_sgl(null_tfm, tsgl->sg, + areq->first_rsgl.sgl.sg, outlen); + if (err) + goto free; - if (err) { - /* EBADMSG implies a valid cipher operation took place */ - if (err == -EBADMSG) - aead_put_sgl(sk); + /* Create TX SGL for tag and chain it to RX SGL. */ + areq->tsgl_entries = af_alg_count_tsgl(sk, processed, + processed - as); + if (!areq->tsgl_entries) + areq->tsgl_entries = 1; + areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * + areq->tsgl_entries, + GFP_KERNEL); + if (!areq->tsgl) { + err = -ENOMEM; + goto free; + } + sg_init_table(areq->tsgl, areq->tsgl_entries); + + /* Release TX SGL, except for tag data and reassign tag data. */ + af_alg_pull_tsgl(sk, processed, areq->tsgl, processed - as); + + /* chain the areq TX SGL holding the tag with RX SGL */ + if (usedpages) { + /* RX SGL present */ + struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl; + + sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1); + sg_chain(sgl_prev->sg, sgl_prev->npages + 1, + areq->tsgl); + } else + /* no RX SGL present (e.g. authentication only) */ + src = areq->tsgl; + } - goto unlock; + /* Initialize the crypto operation */ + aead_request_set_crypt(&areq->cra_u.aead_req, src, + areq->first_rsgl.sgl.sg, used, ctx->iv); + aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen); + aead_request_set_tfm(&areq->cra_u.aead_req, tfm); + + if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) { + /* AIO operation */ + areq->iocb = msg->msg_iocb; + aead_request_set_callback(&areq->cra_u.aead_req, + CRYPTO_TFM_REQ_MAY_BACKLOG, + af_alg_async_cb, areq); + err = ctx->enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) : + crypto_aead_decrypt(&areq->cra_u.aead_req); + } else { + /* Synchronous operation */ + aead_request_set_callback(&areq->cra_u.aead_req, + CRYPTO_TFM_REQ_MAY_BACKLOG, + af_alg_complete, &ctx->completion); + err = af_alg_wait_for_completion(ctx->enc ? + crypto_aead_encrypt(&areq->cra_u.aead_req) : + crypto_aead_decrypt(&areq->cra_u.aead_req), + &ctx->completion); } - aead_put_sgl(sk); - err = 0; + /* AIO operation in progress */ + if (err == -EINPROGRESS) { + sock_hold(sk); -unlock: - list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) { - af_alg_free_sg(&rsgl->sgl); - list_del(&rsgl->list); - if (rsgl != &ctx->first_rsgl) - sock_kfree_s(sk, rsgl, sizeof(*rsgl)); + /* Remember output size that will be generated. */ + areq->outlen = outlen; + + return -EIOCBQUEUED; } - INIT_LIST_HEAD(&ctx->list); - aead_wmem_wakeup(sk); - release_sock(sk); - return err ? err : outlen; -} +free: + af_alg_free_areq_sgls(areq); + sock_kfree_s(sk, areq, areq->areqlen); -static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored, - int flags) -{ - return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ? - aead_recvmsg_async(sock, msg, flags) : - aead_recvmsg_sync(sock, msg, flags); + return err ? err : outlen; } -static unsigned int aead_poll(struct file *file, struct socket *sock, - poll_table *wait) +static int aead_recvmsg(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) { struct sock *sk = sock->sk; - struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - unsigned int mask; - - sock_poll_wait(file, sk_sleep(sk), wait); - mask = 0; + int ret = 0; - if (!ctx->more) - mask |= POLLIN | POLLRDNORM; + lock_sock(sk); + while (msg_data_left(msg)) { + int err = _aead_recvmsg(sock, msg, ignored, flags); + + /* + * This error covers -EIOCBQUEUED which implies that we can + * only handle one AIO request. If the caller wants to have + * multiple AIO requests in parallel, he must make multiple + * separate AIO calls. + * + * Also return the error if no data has been processed so far. + */ + if (err <= 0) { + if (err == -EIOCBQUEUED || err == -EBADMSG || !ret) + ret = err; + goto out; + } - if (aead_writable(sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + ret += err; + } - return mask; +out: + af_alg_wmem_wakeup(sk); + release_sock(sk); + return ret; } static struct proto_ops algif_aead_ops = { @@ -723,9 +352,9 @@ static struct proto_ops algif_aead_ops = { .release = af_alg_release, .sendmsg = aead_sendmsg, - .sendpage = aead_sendpage, + .sendpage = af_alg_sendpage, .recvmsg = aead_recvmsg, - .poll = aead_poll, + .poll = af_alg_poll, }; static int aead_check_key(struct socket *sock) @@ -787,7 +416,7 @@ static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page, if (err) return err; - return aead_sendpage(sock, page, offset, size, flags); + return af_alg_sendpage(sock, page, offset, size, flags); } static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg, @@ -821,13 +450,14 @@ static struct proto_ops algif_aead_ops_nokey = { .sendmsg = aead_sendmsg_nokey, .sendpage = aead_sendpage_nokey, .recvmsg = aead_recvmsg_nokey, - .poll = aead_poll, + .poll = af_alg_poll, }; static void *aead_bind(const char *name, u32 type, u32 mask) { struct aead_tfm *tfm; struct crypto_aead *aead; + struct crypto_skcipher *null_tfm; tfm = kzalloc(sizeof(*tfm), GFP_KERNEL); if (!tfm) @@ -839,7 +469,15 @@ static void *aead_bind(const char *name, u32 type, u32 mask) return ERR_CAST(aead); } + null_tfm = crypto_get_default_null_skcipher2(); + if (IS_ERR(null_tfm)) { + crypto_free_aead(aead); + kfree(tfm); + return ERR_CAST(null_tfm); + } + tfm->aead = aead; + tfm->null_tfm = null_tfm; return tfm; } @@ -873,12 +511,15 @@ static int aead_setkey(void *private, const u8 *key, unsigned int keylen) static void aead_sock_destruct(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); - struct aead_ctx *ctx = ask->private; - unsigned int ivlen = crypto_aead_ivsize( - crypto_aead_reqtfm(&ctx->aead_req)); - - WARN_ON(refcount_read(&sk->sk_refcnt) != 0); - aead_put_sgl(sk); + struct af_alg_ctx *ctx = ask->private; + struct sock *psk = ask->parent; + struct alg_sock *pask = alg_sk(psk); + struct aead_tfm *aeadc = pask->private; + struct crypto_aead *tfm = aeadc->aead; + unsigned int ivlen = crypto_aead_ivsize(tfm); + + af_alg_pull_tsgl(sk, ctx->used, NULL, 0); + crypto_put_default_null_skcipher2(); sock_kzfree_s(sk, ctx->iv, ivlen); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk); @@ -886,11 +527,11 @@ static void aead_sock_destruct(struct sock *sk) static int aead_accept_parent_nokey(void *private, struct sock *sk) { - struct aead_ctx *ctx; + struct af_alg_ctx *ctx; struct alg_sock *ask = alg_sk(sk); struct aead_tfm *tfm = private; struct crypto_aead *aead = tfm->aead; - unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(aead); + unsigned int len = sizeof(*ctx); unsigned int ivlen = crypto_aead_ivsize(aead); ctx = sock_kmalloc(sk, len, GFP_KERNEL); @@ -905,23 +546,18 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk) } memset(ctx->iv, 0, ivlen); + INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; + ctx->rcvused = 0; ctx->more = 0; ctx->merge = 0; ctx->enc = 0; - ctx->tsgl.cur = 0; ctx->aead_assoclen = 0; af_alg_init_completion(&ctx->completion); - sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES); - INIT_LIST_HEAD(&ctx->list); ask->private = ctx; - aead_request_set_tfm(&ctx->aead_req, aead); - aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - af_alg_complete, &ctx->completion); - sk->sk_destruct = aead_sock_destruct; return 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 903605dbc1a5..8ae4170aaeb4 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -10,6 +10,21 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. * + * The following concept of the memory management is used: + * + * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is + * filled by user space with the data submitted via sendpage/sendmsg. Filling + * up the TX SGL does not cause a crypto operation -- the data will only be + * tracked by the kernel. Upon receipt of one recvmsg call, the caller must + * provide a buffer which is tracked with the RX SGL. + * + * During the processing of the recvmsg operation, the cipher request is + * allocated and prepared. As part of the recvmsg operation, the processed + * TX buffers are extracted from the TX SGL into a separate SGL. + * + * After the completion of the crypto operation, the RX SGL and the cipher + * request is released. The extracted TX SGL parts are released together with + * the RX SGL release. */ #include <crypto/scatterwalk.h> @@ -18,284 +33,16 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/kernel.h> -#include <linux/sched/signal.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/net.h> #include <net/sock.h> -struct skcipher_sg_list { - struct list_head list; - - int cur; - - struct scatterlist sg[0]; -}; - struct skcipher_tfm { struct crypto_skcipher *skcipher; bool has_key; }; -struct skcipher_ctx { - struct list_head tsgl; - struct af_alg_sgl rsgl; - - void *iv; - - struct af_alg_completion completion; - - atomic_t inflight; - size_t used; - - unsigned int len; - bool more; - bool merge; - bool enc; - - struct skcipher_request req; -}; - -struct skcipher_async_rsgl { - struct af_alg_sgl sgl; - struct list_head list; -}; - -struct skcipher_async_req { - struct kiocb *iocb; - struct skcipher_async_rsgl first_sgl; - struct list_head list; - struct scatterlist *tsg; - atomic_t *inflight; - struct skcipher_request req; -}; - -#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \ - sizeof(struct scatterlist) - 1) - -static void skcipher_free_async_sgls(struct skcipher_async_req *sreq) -{ - struct skcipher_async_rsgl *rsgl, *tmp; - struct scatterlist *sgl; - struct scatterlist *sg; - int i, n; - - list_for_each_entry_safe(rsgl, tmp, &sreq->list, list) { - af_alg_free_sg(&rsgl->sgl); - if (rsgl != &sreq->first_sgl) - kfree(rsgl); - } - sgl = sreq->tsg; - n = sg_nents(sgl); - for_each_sg(sgl, sg, n, i) { - struct page *page = sg_page(sg); - - /* some SGs may not have a page mapped */ - if (page && page_ref_count(page)) - put_page(page); - } - - kfree(sreq->tsg); -} - -static void skcipher_async_cb(struct crypto_async_request *req, int err) -{ - struct skcipher_async_req *sreq = req->data; - struct kiocb *iocb = sreq->iocb; - - atomic_dec(sreq->inflight); - skcipher_free_async_sgls(sreq); - kzfree(sreq); - iocb->ki_complete(iocb, err, err); -} - -static inline int skcipher_sndbuf(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - - return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) - - ctx->used, 0); -} - -static inline bool skcipher_writable(struct sock *sk) -{ - return PAGE_SIZE <= skcipher_sndbuf(sk); -} - -static int skcipher_alloc_sgl(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - struct skcipher_sg_list *sgl; - struct scatterlist *sg = NULL; - - sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); - if (!list_empty(&ctx->tsgl)) - sg = sgl->sg; - - if (!sg || sgl->cur >= MAX_SGL_ENTS) { - sgl = sock_kmalloc(sk, sizeof(*sgl) + - sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1), - GFP_KERNEL); - if (!sgl) - return -ENOMEM; - - sg_init_table(sgl->sg, MAX_SGL_ENTS + 1); - sgl->cur = 0; - - if (sg) - sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg); - - list_add_tail(&sgl->list, &ctx->tsgl); - } - - return 0; -} - -static void skcipher_pull_sgl(struct sock *sk, size_t used, int put) -{ - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - struct skcipher_sg_list *sgl; - struct scatterlist *sg; - int i; - - while (!list_empty(&ctx->tsgl)) { - sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list, - list); - sg = sgl->sg; - - for (i = 0; i < sgl->cur; i++) { - size_t plen = min_t(size_t, used, sg[i].length); - - if (!sg_page(sg + i)) - continue; - - sg[i].length -= plen; - sg[i].offset += plen; - - used -= plen; - ctx->used -= plen; - - if (sg[i].length) - return; - if (put) - put_page(sg_page(sg + i)); - sg_assign_page(sg + i, NULL); - } - - list_del(&sgl->list); - sock_kfree_s(sk, sgl, - sizeof(*sgl) + sizeof(sgl->sg[0]) * - (MAX_SGL_ENTS + 1)); - } - - if (!ctx->used) - ctx->merge = 0; -} - -static void skcipher_free_sgl(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - - skcipher_pull_sgl(sk, ctx->used, 1); -} - -static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err = -ERESTARTSYS; - long timeout; - - if (flags & MSG_DONTWAIT) - return -EAGAIN; - - sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - add_wait_queue(sk_sleep(sk), &wait); - for (;;) { - if (signal_pending(current)) - break; - timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, skcipher_writable(sk), &wait)) { - err = 0; - break; - } - } - remove_wait_queue(sk_sleep(sk), &wait); - - return err; -} - -static void skcipher_wmem_wakeup(struct sock *sk) -{ - struct socket_wq *wq; - - if (!skcipher_writable(sk)) - return; - - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | - POLLRDNORM | - POLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); - rcu_read_unlock(); -} - -static int skcipher_wait_for_data(struct sock *sk, unsigned flags) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - long timeout; - int err = -ERESTARTSYS; - - if (flags & MSG_DONTWAIT) { - return -EAGAIN; - } - - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - - add_wait_queue(sk_sleep(sk), &wait); - for (;;) { - if (signal_pending(current)) - break; - timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, ctx->used, &wait)) { - err = 0; - break; - } - } - remove_wait_queue(sk_sleep(sk), &wait); - - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - - return err; -} - -static void skcipher_data_wakeup(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - struct socket_wq *wq; - - if (!ctx->used) - return; - - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | - POLLRDNORM | - POLLRDBAND); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); - rcu_read_unlock(); -} - static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { @@ -303,445 +50,143 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg, struct alg_sock *ask = alg_sk(sk); struct sock *psk = ask->parent; struct alg_sock *pask = alg_sk(psk); - struct skcipher_ctx *ctx = ask->private; struct skcipher_tfm *skc = pask->private; struct crypto_skcipher *tfm = skc->skcipher; unsigned ivsize = crypto_skcipher_ivsize(tfm); - struct skcipher_sg_list *sgl; - struct af_alg_control con = {}; - long copied = 0; - bool enc = 0; - bool init = 0; - int err; - int i; - - if (msg->msg_controllen) { - err = af_alg_cmsg_send(msg, &con); - if (err) - return err; - - init = 1; - switch (con.op) { - case ALG_OP_ENCRYPT: - enc = 1; - break; - case ALG_OP_DECRYPT: - enc = 0; - break; - default: - return -EINVAL; - } - - if (con.iv && con.iv->ivlen != ivsize) - return -EINVAL; - } - - err = -EINVAL; - - lock_sock(sk); - if (!ctx->more && ctx->used) - goto unlock; - - if (init) { - ctx->enc = enc; - if (con.iv) - memcpy(ctx->iv, con.iv->iv, ivsize); - } - - while (size) { - struct scatterlist *sg; - unsigned long len = size; - size_t plen; - - if (ctx->merge) { - sgl = list_entry(ctx->tsgl.prev, - struct skcipher_sg_list, list); - sg = sgl->sg + sgl->cur - 1; - len = min_t(unsigned long, len, - PAGE_SIZE - sg->offset - sg->length); - - err = memcpy_from_msg(page_address(sg_page(sg)) + - sg->offset + sg->length, - msg, len); - if (err) - goto unlock; - - sg->length += len; - ctx->merge = (sg->offset + sg->length) & - (PAGE_SIZE - 1); - - ctx->used += len; - copied += len; - size -= len; - continue; - } - if (!skcipher_writable(sk)) { - err = skcipher_wait_for_wmem(sk, msg->msg_flags); - if (err) - goto unlock; - } - - len = min_t(unsigned long, len, skcipher_sndbuf(sk)); - - err = skcipher_alloc_sgl(sk); - if (err) - goto unlock; - - sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); - sg = sgl->sg; - if (sgl->cur) - sg_unmark_end(sg + sgl->cur - 1); - do { - i = sgl->cur; - plen = min_t(size_t, len, PAGE_SIZE); - - sg_assign_page(sg + i, alloc_page(GFP_KERNEL)); - err = -ENOMEM; - if (!sg_page(sg + i)) - goto unlock; - - err = memcpy_from_msg(page_address(sg_page(sg + i)), - msg, plen); - if (err) { - __free_page(sg_page(sg + i)); - sg_assign_page(sg + i, NULL); - goto unlock; - } - - sg[i].length = plen; - len -= plen; - ctx->used += plen; - copied += plen; - size -= plen; - sgl->cur++; - } while (len && sgl->cur < MAX_SGL_ENTS); - - if (!size) - sg_mark_end(sg + sgl->cur - 1); - - ctx->merge = plen & (PAGE_SIZE - 1); - } - - err = 0; - - ctx->more = msg->msg_flags & MSG_MORE; - -unlock: - skcipher_data_wakeup(sk); - release_sock(sk); - - return copied ?: err; + return af_alg_sendmsg(sock, msg, size, ivsize); } -static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - struct sock *sk = sock->sk; - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - struct skcipher_sg_list *sgl; - int err = -EINVAL; - - if (flags & MSG_SENDPAGE_NOTLAST) - flags |= MSG_MORE; - - lock_sock(sk); - if (!ctx->more && ctx->used) - goto unlock; - - if (!size) - goto done; - - if (!skcipher_writable(sk)) { - err = skcipher_wait_for_wmem(sk, flags); - if (err) - goto unlock; - } - - err = skcipher_alloc_sgl(sk); - if (err) - goto unlock; - - ctx->merge = 0; - sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); - - if (sgl->cur) - sg_unmark_end(sgl->sg + sgl->cur - 1); - - sg_mark_end(sgl->sg + sgl->cur); - get_page(page); - sg_set_page(sgl->sg + sgl->cur, page, size, offset); - sgl->cur++; - ctx->used += size; - -done: - ctx->more = flags & MSG_MORE; - -unlock: - skcipher_data_wakeup(sk); - release_sock(sk); - - return err ?: size; -} - -static int skcipher_all_sg_nents(struct skcipher_ctx *ctx) -{ - struct skcipher_sg_list *sgl; - struct scatterlist *sg; - int nents = 0; - - list_for_each_entry(sgl, &ctx->tsgl, list) { - sg = sgl->sg; - - while (!sg->length) - sg++; - - nents += sg_nents(sg); - } - return nents; -} - -static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg, - int flags) +static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct sock *psk = ask->parent; struct alg_sock *pask = alg_sk(psk); - struct skcipher_ctx *ctx = ask->private; + struct af_alg_ctx *ctx = ask->private; struct skcipher_tfm *skc = pask->private; struct crypto_skcipher *tfm = skc->skcipher; - struct skcipher_sg_list *sgl; - struct scatterlist *sg; - struct skcipher_async_req *sreq; - struct skcipher_request *req; - struct skcipher_async_rsgl *last_rsgl = NULL; - unsigned int txbufs = 0, len = 0, tx_nents; - unsigned int reqsize = crypto_skcipher_reqsize(tfm); - unsigned int ivsize = crypto_skcipher_ivsize(tfm); - int err = -ENOMEM; - bool mark = false; - char *iv; - - sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL); - if (unlikely(!sreq)) - goto out; - - req = &sreq->req; - iv = (char *)(req + 1) + reqsize; - sreq->iocb = msg->msg_iocb; - INIT_LIST_HEAD(&sreq->list); - sreq->inflight = &ctx->inflight; + unsigned int bs = crypto_skcipher_blocksize(tfm); + struct af_alg_async_req *areq; + int err = 0; + size_t len = 0; - lock_sock(sk); - tx_nents = skcipher_all_sg_nents(ctx); - sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL); - if (unlikely(!sreq->tsg)) - goto unlock; - sg_init_table(sreq->tsg, tx_nents); - memcpy(iv, ctx->iv, ivsize); - skcipher_request_set_tfm(req, tfm); - skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, - skcipher_async_cb, sreq); - - while (iov_iter_count(&msg->msg_iter)) { - struct skcipher_async_rsgl *rsgl; - int used; - - if (!ctx->used) { - err = skcipher_wait_for_data(sk, flags); - if (err) - goto free; - } - sgl = list_first_entry(&ctx->tsgl, - struct skcipher_sg_list, list); - sg = sgl->sg; - - while (!sg->length) - sg++; - - used = min_t(unsigned long, ctx->used, - iov_iter_count(&msg->msg_iter)); - used = min_t(unsigned long, used, sg->length); - - if (txbufs == tx_nents) { - struct scatterlist *tmp; - int x; - /* Ran out of tx slots in async request - * need to expand */ - tmp = kcalloc(tx_nents * 2, sizeof(*tmp), - GFP_KERNEL); - if (!tmp) { - err = -ENOMEM; - goto free; - } - - sg_init_table(tmp, tx_nents * 2); - for (x = 0; x < tx_nents; x++) - sg_set_page(&tmp[x], sg_page(&sreq->tsg[x]), - sreq->tsg[x].length, - sreq->tsg[x].offset); - kfree(sreq->tsg); - sreq->tsg = tmp; - tx_nents *= 2; - mark = true; - } - /* Need to take over the tx sgl from ctx - * to the asynch req - these sgls will be freed later */ - sg_set_page(sreq->tsg + txbufs++, sg_page(sg), sg->length, - sg->offset); - - if (list_empty(&sreq->list)) { - rsgl = &sreq->first_sgl; - list_add_tail(&rsgl->list, &sreq->list); - } else { - rsgl = kmalloc(sizeof(*rsgl), GFP_KERNEL); - if (!rsgl) { - err = -ENOMEM; - goto free; - } - list_add_tail(&rsgl->list, &sreq->list); - } + /* Allocate cipher request for current operation. */ + areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + + crypto_skcipher_reqsize(tfm)); + if (IS_ERR(areq)) + return PTR_ERR(areq); - used = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, used); - err = used; - if (used < 0) - goto free; - if (last_rsgl) - af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl); - - last_rsgl = rsgl; - len += used; - skcipher_pull_sgl(sk, used, 0); - iov_iter_advance(&msg->msg_iter, used); + /* convert iovecs of output buffers into RX SGL */ + err = af_alg_get_rsgl(sk, msg, flags, areq, -1, &len); + if (err) + goto free; + + /* Process only as much RX buffers for which we have TX data */ + if (len > ctx->used) + len = ctx->used; + + /* + * If more buffers are to be expected to be processed, process only + * full block size buffers. + */ + if (ctx->more || len < ctx->used) + len -= len % bs; + + /* + * Create a per request TX SGL for this request which tracks the + * SG entries from the global TX SGL. + */ + areq->tsgl_entries = af_alg_count_tsgl(sk, len, 0); + if (!areq->tsgl_entries) + areq->tsgl_entries = 1; + areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * areq->tsgl_entries, + GFP_KERNEL); + if (!areq->tsgl) { + err = -ENOMEM; + goto free; + } + sg_init_table(areq->tsgl, areq->tsgl_entries); + af_alg_pull_tsgl(sk, len, areq->tsgl, 0); + + /* Initialize the crypto operation */ + skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm); + skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl, + areq->first_rsgl.sgl.sg, len, ctx->iv); + + if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) { + /* AIO operation */ + areq->iocb = msg->msg_iocb; + skcipher_request_set_callback(&areq->cra_u.skcipher_req, + CRYPTO_TFM_REQ_MAY_SLEEP, + af_alg_async_cb, areq); + err = ctx->enc ? + crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) : + crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); + } else { + /* Synchronous operation */ + skcipher_request_set_callback(&areq->cra_u.skcipher_req, + CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + af_alg_complete, + &ctx->completion); + err = af_alg_wait_for_completion(ctx->enc ? + crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) : + crypto_skcipher_decrypt(&areq->cra_u.skcipher_req), + &ctx->completion); } - if (mark) - sg_mark_end(sreq->tsg + txbufs - 1); - - skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg, - len, iv); - err = ctx->enc ? crypto_skcipher_encrypt(req) : - crypto_skcipher_decrypt(req); + /* AIO operation in progress */ if (err == -EINPROGRESS) { - atomic_inc(&ctx->inflight); - err = -EIOCBQUEUED; - sreq = NULL; - goto unlock; + sock_hold(sk); + + /* Remember output size that will be generated. */ + areq->outlen = len; + + return -EIOCBQUEUED; } + free: - skcipher_free_async_sgls(sreq); -unlock: - skcipher_wmem_wakeup(sk); - release_sock(sk); - kzfree(sreq); -out: - return err; + af_alg_free_areq_sgls(areq); + sock_kfree_s(sk, areq, areq->areqlen); + + return err ? err : len; } -static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg, - int flags) +static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) { struct sock *sk = sock->sk; - struct alg_sock *ask = alg_sk(sk); - struct sock *psk = ask->parent; - struct alg_sock *pask = alg_sk(psk); - struct skcipher_ctx *ctx = ask->private; - struct skcipher_tfm *skc = pask->private; - struct crypto_skcipher *tfm = skc->skcipher; - unsigned bs = crypto_skcipher_blocksize(tfm); - struct skcipher_sg_list *sgl; - struct scatterlist *sg; - int err = -EAGAIN; - int used; - long copied = 0; + int ret = 0; lock_sock(sk); while (msg_data_left(msg)) { - if (!ctx->used) { - err = skcipher_wait_for_data(sk, flags); - if (err) - goto unlock; + int err = _skcipher_recvmsg(sock, msg, ignored, flags); + + /* + * This error covers -EIOCBQUEUED which implies that we can + * only handle one AIO request. If the caller wants to have + * multiple AIO requests in parallel, he must make multiple + * separate AIO calls. + * + * Also return the error if no data has been processed so far. + */ + if (err <= 0) { + if (err == -EIOCBQUEUED || !ret) + ret = err; + goto out; } - used = min_t(unsigned long, ctx->used, msg_data_left(msg)); - - used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used); - err = used; - if (err < 0) - goto unlock; - - if (ctx->more || used < ctx->used) - used -= used % bs; - - err = -EINVAL; - if (!used) - goto free; - - sgl = list_first_entry(&ctx->tsgl, - struct skcipher_sg_list, list); - sg = sgl->sg; - - while (!sg->length) - sg++; - - skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used, - ctx->iv); - - err = af_alg_wait_for_completion( - ctx->enc ? - crypto_skcipher_encrypt(&ctx->req) : - crypto_skcipher_decrypt(&ctx->req), - &ctx->completion); - -free: - af_alg_free_sg(&ctx->rsgl); - - if (err) - goto unlock; - - copied += used; - skcipher_pull_sgl(sk, used, 1); - iov_iter_advance(&msg->msg_iter, used); + ret += err; } - err = 0; - -unlock: - skcipher_wmem_wakeup(sk); +out: + af_alg_wmem_wakeup(sk); release_sock(sk); - - return copied ?: err; -} - -static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg, - size_t ignored, int flags) -{ - return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ? - skcipher_recvmsg_async(sock, msg, flags) : - skcipher_recvmsg_sync(sock, msg, flags); + return ret; } -static unsigned int skcipher_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - unsigned int mask; - - sock_poll_wait(file, sk_sleep(sk), wait); - mask = 0; - - if (ctx->used) - mask |= POLLIN | POLLRDNORM; - - if (skcipher_writable(sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; - - return mask; -} static struct proto_ops algif_skcipher_ops = { .family = PF_ALG, @@ -760,9 +205,9 @@ static struct proto_ops algif_skcipher_ops = { .release = af_alg_release, .sendmsg = skcipher_sendmsg, - .sendpage = skcipher_sendpage, + .sendpage = af_alg_sendpage, .recvmsg = skcipher_recvmsg, - .poll = skcipher_poll, + .poll = af_alg_poll, }; static int skcipher_check_key(struct socket *sock) @@ -824,7 +269,7 @@ static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page, if (err) return err; - return skcipher_sendpage(sock, page, offset, size, flags); + return af_alg_sendpage(sock, page, offset, size, flags); } static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg, @@ -858,7 +303,7 @@ static struct proto_ops algif_skcipher_ops_nokey = { .sendmsg = skcipher_sendmsg_nokey, .sendpage = skcipher_sendpage_nokey, .recvmsg = skcipher_recvmsg_nokey, - .poll = skcipher_poll, + .poll = af_alg_poll, }; static void *skcipher_bind(const char *name, u32 type, u32 mask) @@ -900,26 +345,16 @@ static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen) return err; } -static void skcipher_wait(struct sock *sk) -{ - struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - int ctr = 0; - - while (atomic_read(&ctx->inflight) && ctr++ < 100) - msleep(100); -} - static void skcipher_sock_destruct(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); - struct skcipher_ctx *ctx = ask->private; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req); - - if (atomic_read(&ctx->inflight)) - skcipher_wait(sk); + struct af_alg_ctx *ctx = ask->private; + struct sock *psk = ask->parent; + struct alg_sock *pask = alg_sk(psk); + struct skcipher_tfm *skc = pask->private; + struct crypto_skcipher *tfm = skc->skcipher; - skcipher_free_sgl(sk); + af_alg_pull_tsgl(sk, ctx->used, NULL, 0); sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm)); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk); @@ -927,11 +362,11 @@ static void skcipher_sock_destruct(struct sock *sk) static int skcipher_accept_parent_nokey(void *private, struct sock *sk) { - struct skcipher_ctx *ctx; + struct af_alg_ctx *ctx; struct alg_sock *ask = alg_sk(sk); struct skcipher_tfm *tfm = private; struct crypto_skcipher *skcipher = tfm->skcipher; - unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher); + unsigned int len = sizeof(*ctx); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) @@ -946,22 +381,17 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher)); - INIT_LIST_HEAD(&ctx->tsgl); + INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; + ctx->rcvused = 0; ctx->more = 0; ctx->merge = 0; ctx->enc = 0; - atomic_set(&ctx->inflight, 0); af_alg_init_completion(&ctx->completion); ask->private = ctx; - skcipher_request_set_tfm(&ctx->req, skcipher); - skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - af_alg_complete, &ctx->completion); - sk->sk_destruct = skcipher_sock_destruct; return 0; diff --git a/crypto/ctr.c b/crypto/ctr.c index 477d9226ccaa..854d924f9d8e 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -65,8 +65,7 @@ static void crypto_ctr_crypt_final(struct blkcipher_walk *walk, unsigned int nbytes = walk->nbytes; crypto_cipher_encrypt_one(tfm, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); + crypto_xor_cpy(dst, keystream, src, nbytes); crypto_inc(ctrblk, bsize); } diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 61c7708905d0..4271fc77d261 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -20,8 +20,6 @@ struct ecdh_ctx { unsigned int curve_id; unsigned int ndigits; u64 private_key[ECC_MAX_DIGITS]; - u64 public_key[2 * ECC_MAX_DIGITS]; - u64 shared_secret[ECC_MAX_DIGITS]; }; static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm) @@ -70,41 +68,58 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, static int ecdh_compute_value(struct kpp_request *req) { - int ret = 0; struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct ecdh_ctx *ctx = ecdh_get_ctx(tfm); - size_t copied, nbytes; + u64 *public_key; + u64 *shared_secret = NULL; void *buf; + size_t copied, nbytes, public_key_sz; + int ret = -ENOMEM; nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT; + /* Public part is a point thus it has both coordinates */ + public_key_sz = 2 * nbytes; + + public_key = kmalloc(public_key_sz, GFP_KERNEL); + if (!public_key) + return -ENOMEM; if (req->src) { - copied = sg_copy_to_buffer(req->src, 1, ctx->public_key, - 2 * nbytes); - if (copied != 2 * nbytes) - return -EINVAL; + shared_secret = kmalloc(nbytes, GFP_KERNEL); + if (!shared_secret) + goto free_pubkey; + + copied = sg_copy_to_buffer(req->src, 1, public_key, + public_key_sz); + if (copied != public_key_sz) { + ret = -EINVAL; + goto free_all; + } ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits, - ctx->private_key, - ctx->public_key, - ctx->shared_secret); + ctx->private_key, public_key, + shared_secret); - buf = ctx->shared_secret; + buf = shared_secret; } else { ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits, - ctx->private_key, ctx->public_key); - buf = ctx->public_key; - /* Public part is a point thus it has both coordinates */ - nbytes *= 2; + ctx->private_key, public_key); + buf = public_key; + nbytes = public_key_sz; } if (ret < 0) - return ret; + goto free_all; copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes); if (copied != nbytes) - return -EINVAL; + ret = -EINVAL; + /* fall through */ +free_all: + kzfree(shared_secret); +free_pubkey: + kfree(public_key); return ret; } diff --git a/crypto/pcbc.c b/crypto/pcbc.c index 29dd2b4a3b85..d9e45a958720 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -55,8 +55,7 @@ static int crypto_pcbc_encrypt_segment(struct skcipher_request *req, do { crypto_xor(iv, src, bsize); crypto_cipher_encrypt_one(tfm, dst, iv); - memcpy(iv, dst, bsize); - crypto_xor(iv, src, bsize); + crypto_xor_cpy(iv, dst, src, bsize); src += bsize; dst += bsize; @@ -79,8 +78,7 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req, memcpy(tmpbuf, src, bsize); crypto_xor(iv, src, bsize); crypto_cipher_encrypt_one(tfm, src, iv); - memcpy(iv, tmpbuf, bsize); - crypto_xor(iv, src, bsize); + crypto_xor_cpy(iv, tmpbuf, src, bsize); src += bsize; } while ((nbytes -= bsize) >= bsize); @@ -127,8 +125,7 @@ static int crypto_pcbc_decrypt_segment(struct skcipher_request *req, do { crypto_cipher_decrypt_one(tfm, dst, src); crypto_xor(dst, iv, bsize); - memcpy(iv, src, bsize); - crypto_xor(iv, dst, bsize); + crypto_xor_cpy(iv, dst, src, bsize); src += bsize; dst += bsize; @@ -153,8 +150,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, memcpy(tmpbuf, src, bsize); crypto_cipher_decrypt_one(tfm, src, src); crypto_xor(src, iv, bsize); - memcpy(iv, tmpbuf, bsize); - crypto_xor(iv, src, bsize); + crypto_xor_cpy(iv, src, tmpbuf, bsize); src += bsize; } while ((nbytes -= bsize) >= bsize); diff --git a/crypto/rng.c b/crypto/rng.c index 5e8469244960..b4a618668161 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -43,12 +43,14 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) if (!buf) return -ENOMEM; - get_random_bytes(buf, slen); + err = get_random_bytes_wait(buf, slen); + if (err) + goto out; seed = buf; } err = crypto_rng_alg(tfm)->seed(tfm, seed, slen); - +out: kzfree(buf); return err; } diff --git a/crypto/scompress.c b/crypto/scompress.c index ae1d3cf209e4..2075e2c4e7df 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -65,11 +65,6 @@ static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) seq_puts(m, "type : scomp\n"); } -static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) -{ - return 0; -} - static void crypto_scomp_free_scratches(void * __percpu *scratches) { int i; @@ -125,12 +120,26 @@ static int crypto_scomp_alloc_all_scratches(void) if (!scomp_src_scratches) return -ENOMEM; scomp_dst_scratches = crypto_scomp_alloc_scratches(); - if (!scomp_dst_scratches) + if (!scomp_dst_scratches) { + crypto_scomp_free_scratches(scomp_src_scratches); + scomp_src_scratches = NULL; return -ENOMEM; + } } return 0; } +static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) +{ + int ret; + + mutex_lock(&scomp_lock); + ret = crypto_scomp_alloc_all_scratches(); + mutex_unlock(&scomp_lock); + + return ret; +} + static void crypto_scomp_sg_free(struct scatterlist *sgl) { int i, n; @@ -211,9 +220,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) scratch_dst, &req->dlen, *ctx); if (!ret) { if (!req->dst) { - req->dst = crypto_scomp_sg_alloc(req->dlen, - req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? - GFP_KERNEL : GFP_ATOMIC); + req->dst = crypto_scomp_sg_alloc(req->dlen, GFP_ATOMIC); if (!req->dst) goto out; } @@ -240,6 +247,10 @@ static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm) struct crypto_scomp **ctx = crypto_tfm_ctx(tfm); crypto_free_scomp(*ctx); + + mutex_lock(&scomp_lock); + crypto_scomp_free_all_scratches(); + mutex_unlock(&scomp_lock); } int crypto_init_scomp_ops_async(struct crypto_tfm *tfm) @@ -316,40 +327,18 @@ static const struct crypto_type crypto_scomp_type = { int crypto_register_scomp(struct scomp_alg *alg) { struct crypto_alg *base = &alg->base; - int ret = -ENOMEM; - - mutex_lock(&scomp_lock); - if (crypto_scomp_alloc_all_scratches()) - goto error; base->cra_type = &crypto_scomp_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS; - ret = crypto_register_alg(base); - if (ret) - goto error; - - mutex_unlock(&scomp_lock); - return ret; - -error: - crypto_scomp_free_all_scratches(); - mutex_unlock(&scomp_lock); - return ret; + return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_scomp); int crypto_unregister_scomp(struct scomp_alg *alg) { - int ret; - - mutex_lock(&scomp_lock); - ret = crypto_unregister_alg(&alg->base); - crypto_scomp_free_all_scratches(); - mutex_unlock(&scomp_lock); - - return ret; + return crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_scomp); diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index 94970a794975..7c3382facc82 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -229,6 +229,46 @@ x4 ^= x2; \ }) +static void __serpent_setkey_sbox(u32 r0, u32 r1, u32 r2, u32 r3, u32 r4, u32 *k) +{ + k += 100; + S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24); + S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20); + S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16); + S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12); + S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8); + S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4); + S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0); + S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4); + S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8); + S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12); + S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16); + S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20); + S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24); + S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28); + k -= 50; + S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18); + S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14); + S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10); + S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6); + S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2); + S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2); + S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6); + S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10); + S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14); + S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18); + S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22); + k -= 50; + S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24); + S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20); + S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16); + S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12); + S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8); + S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4); + S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0); + S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0); +} + int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key, unsigned int keylen) { @@ -395,42 +435,7 @@ int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key, keyiter(k[23], r1, r0, r3, 131, 31); /* Apply S-boxes */ - - S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24); - S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20); - S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16); - S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12); - S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8); - S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4); - S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0); - S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4); - S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8); - S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12); - S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16); - S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20); - S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24); - S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28); - k -= 50; - S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18); - S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14); - S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10); - S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6); - S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2); - S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2); - S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6); - S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10); - S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14); - S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18); - S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22); - k -= 50; - S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24); - S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20); - S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16); - S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12); - S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8); - S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4); - S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0); - S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0); + __serpent_setkey_sbox(r0, r1, r2, r3, r4, ctx->expkey); return 0; } diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 0dd6a432d6ca..0022a18d36ee 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1404,9 +1404,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) test_cipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0, speed_template_32_40_48); test_cipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0, - speed_template_32_48_64); + speed_template_32_64); test_cipher_speed("xts(aes)", DECRYPT, sec, NULL, 0, - speed_template_32_48_64); + speed_template_32_64); test_cipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0, speed_template_16_24_32); test_cipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0, @@ -1837,9 +1837,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m) test_acipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0, speed_template_32_40_48); test_acipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0, - speed_template_32_48_64); + speed_template_32_64); test_acipher_speed("xts(aes)", DECRYPT, sec, NULL, 0, - speed_template_32_48_64); + speed_template_32_64); test_acipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0, speed_template_16_24_32); test_acipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0, diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 1b223c32a8ae..95a031e9eced 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -13,10 +13,8 @@ menuconfig HW_RANDOM that's usually called /dev/hwrng, and which exposes one of possibly several hardware random number generators. - These hardware random number generators do not feed directly - into the kernel's random number generator. That is usually - handled by the "rngd" daemon. Documentation/hw_random.txt - has more information. + These hardware random number generators do feed into the + kernel's random number generator entropy pool. If unsure, say Y. @@ -255,6 +253,20 @@ config HW_RANDOM_MXC_RNGA If unsure, say Y. +config HW_RANDOM_IMX_RNGC + tristate "Freescale i.MX RNGC Random Number Generator" + depends on ARCH_MXC + default HW_RANDOM + ---help--- + This driver provides kernel-side support for the Random Number + Generator Version C hardware found on some Freescale i.MX + processors. Version B is also supported by this driver. + + To compile this driver as a module, choose M here: the + module will be called imx-rngc. + + If unsure, say Y. + config HW_RANDOM_NOMADIK tristate "ST-Ericsson Nomadik Random Number Generator support" depends on ARCH_NOMADIK diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index b085975ec1d2..39a67defac67 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_HW_RANDOM_PASEMI) += pasemi-rng.o obj-$(CONFIG_HW_RANDOM_VIRTIO) += virtio-rng.o obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o +obj-$(CONFIG_HW_RANDOM_IMX_RNGC) += imx-rngc.o obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 503a41dfa193..9701ac7d8b47 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -28,7 +28,10 @@ #define RNG_MODULE_NAME "hw_random" static struct hwrng *current_rng; +/* the current rng has been explicitly chosen by user via sysfs */ +static int cur_rng_set_by_user; static struct task_struct *hwrng_fill; +/* list of registered rngs, sorted decending by quality */ static LIST_HEAD(rng_list); /* Protects rng_list and current_rng */ static DEFINE_MUTEX(rng_mutex); @@ -303,6 +306,7 @@ static ssize_t hwrng_attr_current_store(struct device *dev, list_for_each_entry(rng, &rng_list, list) { if (sysfs_streq(rng->name, buf)) { err = 0; + cur_rng_set_by_user = 1; if (rng != current_rng) err = set_current_rng(rng); break; @@ -351,16 +355,27 @@ static ssize_t hwrng_attr_available_show(struct device *dev, return strlen(buf); } +static ssize_t hwrng_attr_selected_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", cur_rng_set_by_user); +} + static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR, hwrng_attr_current_show, hwrng_attr_current_store); static DEVICE_ATTR(rng_available, S_IRUGO, hwrng_attr_available_show, NULL); +static DEVICE_ATTR(rng_selected, S_IRUGO, + hwrng_attr_selected_show, + NULL); static struct attribute *rng_dev_attrs[] = { &dev_attr_rng_current.attr, &dev_attr_rng_available.attr, + &dev_attr_rng_selected.attr, NULL }; @@ -417,6 +432,7 @@ int hwrng_register(struct hwrng *rng) { int err = -EINVAL; struct hwrng *old_rng, *tmp; + struct list_head *rng_list_ptr; if (!rng->name || (!rng->data_read && !rng->read)) goto out; @@ -432,14 +448,27 @@ int hwrng_register(struct hwrng *rng) init_completion(&rng->cleanup_done); complete(&rng->cleanup_done); + /* rng_list is sorted by decreasing quality */ + list_for_each(rng_list_ptr, &rng_list) { + tmp = list_entry(rng_list_ptr, struct hwrng, list); + if (tmp->quality < rng->quality) + break; + } + list_add_tail(&rng->list, rng_list_ptr); + old_rng = current_rng; err = 0; - if (!old_rng) { + if (!old_rng || + (!cur_rng_set_by_user && rng->quality > old_rng->quality)) { + /* + * Set new rng as current as the new rng source + * provides better entropy quality and was not + * chosen by userspace. + */ err = set_current_rng(rng); if (err) goto out_unlock; } - list_add_tail(&rng->list, &rng_list); if (old_rng && !rng->init) { /* @@ -466,12 +495,13 @@ void hwrng_unregister(struct hwrng *rng) list_del(&rng->list); if (current_rng == rng) { drop_current_rng(); + cur_rng_set_by_user = 0; + /* rng_list is sorted by quality, use the best (=first) one */ if (!list_empty(&rng_list)) { - struct hwrng *tail; - - tail = list_entry(rng_list.prev, struct hwrng, list); + struct hwrng *new_rng; - set_current_rng(tail); + new_rng = list_entry(rng_list.next, struct hwrng, list); + set_current_rng(new_rng); } } diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c new file mode 100644 index 000000000000..88db42d30760 --- /dev/null +++ b/drivers/char/hw_random/imx-rngc.c @@ -0,0 +1,331 @@ +/* + * RNG driver for Freescale RNGC + * + * Copyright (C) 2008-2012 Freescale Semiconductor, Inc. + * Copyright (C) 2017 Martin Kaiser <martin@kaiser.cx> + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/platform_device.h> +#include <linux/interrupt.h> +#include <linux/hw_random.h> +#include <linux/completion.h> +#include <linux/io.h> + +#define RNGC_COMMAND 0x0004 +#define RNGC_CONTROL 0x0008 +#define RNGC_STATUS 0x000C +#define RNGC_ERROR 0x0010 +#define RNGC_FIFO 0x0014 + +#define RNGC_CMD_CLR_ERR 0x00000020 +#define RNGC_CMD_CLR_INT 0x00000010 +#define RNGC_CMD_SEED 0x00000002 +#define RNGC_CMD_SELF_TEST 0x00000001 + +#define RNGC_CTRL_MASK_ERROR 0x00000040 +#define RNGC_CTRL_MASK_DONE 0x00000020 + +#define RNGC_STATUS_ERROR 0x00010000 +#define RNGC_STATUS_FIFO_LEVEL_MASK 0x00000f00 +#define RNGC_STATUS_FIFO_LEVEL_SHIFT 8 +#define RNGC_STATUS_SEED_DONE 0x00000020 +#define RNGC_STATUS_ST_DONE 0x00000010 + +#define RNGC_ERROR_STATUS_STAT_ERR 0x00000008 + +#define RNGC_TIMEOUT 3000 /* 3 sec */ + + +static bool self_test = true; +module_param(self_test, bool, 0); + +struct imx_rngc { + struct device *dev; + struct clk *clk; + void __iomem *base; + struct hwrng rng; + struct completion rng_op_done; + /* + * err_reg is written only by the irq handler and read only + * when interrupts are masked, we need no spinlock + */ + u32 err_reg; +}; + + +static inline void imx_rngc_irq_mask_clear(struct imx_rngc *rngc) +{ + u32 ctrl, cmd; + + /* mask interrupts */ + ctrl = readl(rngc->base + RNGC_CONTROL); + ctrl |= RNGC_CTRL_MASK_DONE | RNGC_CTRL_MASK_ERROR; + writel(ctrl, rngc->base + RNGC_CONTROL); + + /* + * CLR_INT clears the interrupt only if there's no error + * CLR_ERR clear the interrupt and the error register if there + * is an error + */ + cmd = readl(rngc->base + RNGC_COMMAND); + cmd |= RNGC_CMD_CLR_INT | RNGC_CMD_CLR_ERR; + writel(cmd, rngc->base + RNGC_COMMAND); +} + +static inline void imx_rngc_irq_unmask(struct imx_rngc *rngc) +{ + u32 ctrl; + + ctrl = readl(rngc->base + RNGC_CONTROL); + ctrl &= ~(RNGC_CTRL_MASK_DONE | RNGC_CTRL_MASK_ERROR); + writel(ctrl, rngc->base + RNGC_CONTROL); +} + +static int imx_rngc_self_test(struct imx_rngc *rngc) +{ + u32 cmd; + int ret; + + imx_rngc_irq_unmask(rngc); + + /* run self test */ + cmd = readl(rngc->base + RNGC_COMMAND); + writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND); + + ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT); + if (!ret) { + imx_rngc_irq_mask_clear(rngc); + return -ETIMEDOUT; + } + + if (rngc->err_reg != 0) + return -EIO; + + return 0; +} + +static int imx_rngc_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng); + unsigned int status; + unsigned int level; + int retval = 0; + + while (max >= sizeof(u32)) { + status = readl(rngc->base + RNGC_STATUS); + + /* is there some error while reading this random number? */ + if (status & RNGC_STATUS_ERROR) + break; + + /* how many random numbers are in FIFO? [0-16] */ + level = (status & RNGC_STATUS_FIFO_LEVEL_MASK) >> + RNGC_STATUS_FIFO_LEVEL_SHIFT; + + if (level) { + /* retrieve a random number from FIFO */ + *(u32 *)data = readl(rngc->base + RNGC_FIFO); + + retval += sizeof(u32); + data += sizeof(u32); + max -= sizeof(u32); + } + } + + return retval ? retval : -EIO; +} + +static irqreturn_t imx_rngc_irq(int irq, void *priv) +{ + struct imx_rngc *rngc = (struct imx_rngc *)priv; + u32 status; + + /* + * clearing the interrupt will also clear the error register + * read error and status before clearing + */ + status = readl(rngc->base + RNGC_STATUS); + rngc->err_reg = readl(rngc->base + RNGC_ERROR); + + imx_rngc_irq_mask_clear(rngc); + + if (status & (RNGC_STATUS_SEED_DONE | RNGC_STATUS_ST_DONE)) + complete(&rngc->rng_op_done); + + return IRQ_HANDLED; +} + +static int imx_rngc_init(struct hwrng *rng) +{ + struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng); + u32 cmd; + int ret; + + /* clear error */ + cmd = readl(rngc->base + RNGC_COMMAND); + writel(cmd | RNGC_CMD_CLR_ERR, rngc->base + RNGC_COMMAND); + + /* create seed, repeat while there is some statistical error */ + do { + imx_rngc_irq_unmask(rngc); + + /* seed creation */ + cmd = readl(rngc->base + RNGC_COMMAND); + writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND); + + ret = wait_for_completion_timeout(&rngc->rng_op_done, + RNGC_TIMEOUT); + + if (!ret) { + imx_rngc_irq_mask_clear(rngc); + return -ETIMEDOUT; + } + + } while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR); + + return rngc->err_reg ? -EIO : 0; +} + +static int imx_rngc_probe(struct platform_device *pdev) +{ + struct imx_rngc *rngc; + struct resource *res; + int ret; + int irq; + + rngc = devm_kzalloc(&pdev->dev, sizeof(*rngc), GFP_KERNEL); + if (!rngc) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + rngc->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(rngc->base)) + return PTR_ERR(rngc->base); + + rngc->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(rngc->clk)) { + dev_err(&pdev->dev, "Can not get rng_clk\n"); + return PTR_ERR(rngc->clk); + } + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_err(&pdev->dev, "Couldn't get irq %d\n", irq); + return irq; + } + + ret = clk_prepare_enable(rngc->clk); + if (ret) + return ret; + + ret = devm_request_irq(&pdev->dev, + irq, imx_rngc_irq, 0, pdev->name, (void *)rngc); + if (ret) { + dev_err(rngc->dev, "Can't get interrupt working.\n"); + goto err; + } + + init_completion(&rngc->rng_op_done); + + rngc->rng.name = pdev->name; + rngc->rng.init = imx_rngc_init; + rngc->rng.read = imx_rngc_read; + + rngc->dev = &pdev->dev; + platform_set_drvdata(pdev, rngc); + + imx_rngc_irq_mask_clear(rngc); + + if (self_test) { + ret = imx_rngc_self_test(rngc); + if (ret) { + dev_err(rngc->dev, "FSL RNGC self test failed.\n"); + goto err; + } + } + + ret = hwrng_register(&rngc->rng); + if (ret) { + dev_err(&pdev->dev, "FSL RNGC registering failed (%d)\n", ret); + goto err; + } + + dev_info(&pdev->dev, "Freescale RNGC registered.\n"); + return 0; + +err: + clk_disable_unprepare(rngc->clk); + + return ret; +} + +static int __exit imx_rngc_remove(struct platform_device *pdev) +{ + struct imx_rngc *rngc = platform_get_drvdata(pdev); + + hwrng_unregister(&rngc->rng); + + clk_disable_unprepare(rngc->clk); + + return 0; +} + +#ifdef CONFIG_PM +static int imx_rngc_suspend(struct device *dev) +{ + struct imx_rngc *rngc = dev_get_drvdata(dev); + + clk_disable_unprepare(rngc->clk); + + return 0; +} + +static int imx_rngc_resume(struct device *dev) +{ + struct imx_rngc *rngc = dev_get_drvdata(dev); + + clk_prepare_enable(rngc->clk); + + return 0; +} + +static const struct dev_pm_ops imx_rngc_pm_ops = { + .suspend = imx_rngc_suspend, + .resume = imx_rngc_resume, +}; +#endif + +static const struct of_device_id imx_rngc_dt_ids[] = { + { .compatible = "fsl,imx25-rngb", .data = NULL, }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, imx_rngc_dt_ids); + +static struct platform_driver imx_rngc_driver = { + .driver = { + .name = "imx_rngc", +#ifdef CONFIG_PM + .pm = &imx_rngc_pm_ops, +#endif + .of_match_table = imx_rngc_dt_ids, + }, + .remove = __exit_p(imx_rngc_remove), +}; + +module_platform_driver_probe(imx_rngc_driver, imx_rngc_probe); + +MODULE_AUTHOR("Freescale Semiconductor, Inc."); +MODULE_DESCRIPTION("H/W RNGC driver for i.MX"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 4b75084fabad..fe33c199fc1a 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -525,12 +525,26 @@ config CRYPTO_DEV_ATMEL_SHA To compile this driver as a module, choose M here: the module will be called atmel-sha. +config CRYPTO_DEV_ATMEL_ECC + tristate "Support for Microchip / Atmel ECC hw accelerator" + depends on ARCH_AT91 || COMPILE_TEST + depends on I2C + select CRYPTO_ECDH + select CRC16 + help + Microhip / Atmel ECC hw accelerator. + Select this if you want to use the Microchip / Atmel module for + ECDH algorithm. + + To compile this driver as a module, choose M here: the module + will be called atmel-ecc. + config CRYPTO_DEV_CCP - bool "Support for AMD Cryptographic Coprocessor" + bool "Support for AMD Secure Processor" depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM help - The AMD Cryptographic Coprocessor provides hardware offload support - for encryption, hashing and related operations. + The AMD Secure Processor provides support for the Cryptographic Coprocessor + (CCP) and the Platform Security Processor (PSP) devices. if CRYPTO_DEV_CCP source "drivers/crypto/ccp/Kconfig" @@ -616,6 +630,14 @@ config CRYPTO_DEV_SUN4I_SS To compile this driver as a module, choose M here: the module will be called sun4i-ss. +config CRYPTO_DEV_SUN4I_SS_PRNG + bool "Support for Allwinner Security System PRNG" + depends on CRYPTO_DEV_SUN4I_SS + select CRYPTO_RNG + help + Select this option if you want to provide kernel-side support for + the Pseudo-Random Number Generator found in the Security System. + config CRYPTO_DEV_ROCKCHIP tristate "Rockchip's Cryptographic Engine driver" depends on OF && ARCH_ROCKCHIP @@ -686,4 +708,25 @@ config CRYPTO_DEV_SAFEXCEL chain mode, AES cipher mode and SHA1/SHA224/SHA256/SHA512 hash algorithms. +config CRYPTO_DEV_ARTPEC6 + tristate "Support for Axis ARTPEC-6/7 hardware crypto acceleration." + depends on ARM && (ARCH_ARTPEC || COMPILE_TEST) + depends on HAS_DMA + depends on OF + select CRYPTO_AEAD + select CRYPTO_AES + select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER + select CRYPTO_CTR + select CRYPTO_HASH + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_SHA384 + select CRYPTO_SHA512 + help + Enables the driver for the on-chip crypto accelerator + of Axis ARTPEC SoCs. + + To compile this driver as a module, choose M here. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 2c555a3393b2..808432b44c6b 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o +obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/ @@ -35,7 +36,7 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o -obj-$(CONFIG_CRYPTO_DEV_STM32) += stm32/ +obj-$(CONFIG_ARCH_STM32) += stm32/ obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ @@ -43,3 +44,4 @@ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/ +obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/ diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c new file mode 100644 index 000000000000..e66f18a0ddd0 --- /dev/null +++ b/drivers/crypto/atmel-ecc.c @@ -0,0 +1,781 @@ +/* + * Microchip / Atmel ECC (I2C) driver. + * + * Copyright (c) 2017, Microchip Technology Inc. + * Author: Tudor Ambarus <tudor.ambarus@microchip.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/bitrev.h> +#include <linux/crc16.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/i2c.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/workqueue.h> +#include <crypto/internal/kpp.h> +#include <crypto/ecdh.h> +#include <crypto/kpp.h> +#include "atmel-ecc.h" + +/* Used for binding tfm objects to i2c clients. */ +struct atmel_ecc_driver_data { + struct list_head i2c_client_list; + spinlock_t i2c_list_lock; +} ____cacheline_aligned; + +static struct atmel_ecc_driver_data driver_data; + +/** + * atmel_ecc_i2c_client_priv - i2c_client private data + * @client : pointer to i2c client device + * @i2c_client_list_node: part of i2c_client_list + * @lock : lock for sending i2c commands + * @wake_token : wake token array of zeros + * @wake_token_sz : size in bytes of the wake_token + * @tfm_count : number of active crypto transformations on i2c client + * + * Reads and writes from/to the i2c client are sequential. The first byte + * transmitted to the device is treated as the byte size. Any attempt to send + * more than this number of bytes will cause the device to not ACK those bytes. + * After the host writes a single command byte to the input buffer, reads are + * prohibited until after the device completes command execution. Use a mutex + * when sending i2c commands. + */ +struct atmel_ecc_i2c_client_priv { + struct i2c_client *client; + struct list_head i2c_client_list_node; + struct mutex lock; + u8 wake_token[WAKE_TOKEN_MAX_SIZE]; + size_t wake_token_sz; + atomic_t tfm_count ____cacheline_aligned; +}; + +/** + * atmel_ecdh_ctx - transformation context + * @client : pointer to i2c client device + * @fallback : used for unsupported curves or when user wants to use its own + * private key. + * @public_key : generated when calling set_secret(). It's the responsibility + * of the user to not call set_secret() while + * generate_public_key() or compute_shared_secret() are in flight. + * @curve_id : elliptic curve id + * @n_sz : size in bytes of the n prime + * @do_fallback: true when the device doesn't support the curve or when the user + * wants to use its own private key. + */ +struct atmel_ecdh_ctx { + struct i2c_client *client; + struct crypto_kpp *fallback; + const u8 *public_key; + unsigned int curve_id; + size_t n_sz; + bool do_fallback; +}; + +/** + * atmel_ecc_work_data - data structure representing the work + * @ctx : transformation context. + * @cbk : pointer to a callback function to be invoked upon completion of this + * request. This has the form: + * callback(struct atmel_ecc_work_data *work_data, void *areq, u8 status) + * where: + * @work_data: data structure representing the work + * @areq : optional pointer to an argument passed with the original + * request. + * @status : status returned from the i2c client device or i2c error. + * @areq: optional pointer to a user argument for use at callback time. + * @work: describes the task to be executed. + * @cmd : structure used for communicating with the device. + */ +struct atmel_ecc_work_data { + struct atmel_ecdh_ctx *ctx; + void (*cbk)(struct atmel_ecc_work_data *work_data, void *areq, + int status); + void *areq; + struct work_struct work; + struct atmel_ecc_cmd cmd; +}; + +static u16 atmel_ecc_crc16(u16 crc, const u8 *buffer, size_t len) +{ + return cpu_to_le16(bitrev16(crc16(crc, buffer, len))); +} + +/** + * atmel_ecc_checksum() - Generate 16-bit CRC as required by ATMEL ECC. + * CRC16 verification of the count, opcode, param1, param2 and data bytes. + * The checksum is saved in little-endian format in the least significant + * two bytes of the command. CRC polynomial is 0x8005 and the initial register + * value should be zero. + * + * @cmd : structure used for communicating with the device. + */ +static void atmel_ecc_checksum(struct atmel_ecc_cmd *cmd) +{ + u8 *data = &cmd->count; + size_t len = cmd->count - CRC_SIZE; + u16 *crc16 = (u16 *)(data + len); + + *crc16 = atmel_ecc_crc16(0, data, len); +} + +static void atmel_ecc_init_read_cmd(struct atmel_ecc_cmd *cmd) +{ + cmd->word_addr = COMMAND; + cmd->opcode = OPCODE_READ; + /* + * Read the word from Configuration zone that contains the lock bytes + * (UserExtra, Selector, LockValue, LockConfig). + */ + cmd->param1 = CONFIG_ZONE; + cmd->param2 = DEVICE_LOCK_ADDR; + cmd->count = READ_COUNT; + + atmel_ecc_checksum(cmd); + + cmd->msecs = MAX_EXEC_TIME_READ; + cmd->rxsize = READ_RSP_SIZE; +} + +static void atmel_ecc_init_genkey_cmd(struct atmel_ecc_cmd *cmd, u16 keyid) +{ + cmd->word_addr = COMMAND; + cmd->count = GENKEY_COUNT; + cmd->opcode = OPCODE_GENKEY; + cmd->param1 = GENKEY_MODE_PRIVATE; + /* a random private key will be generated and stored in slot keyID */ + cmd->param2 = cpu_to_le16(keyid); + + atmel_ecc_checksum(cmd); + + cmd->msecs = MAX_EXEC_TIME_GENKEY; + cmd->rxsize = GENKEY_RSP_SIZE; +} + +static int atmel_ecc_init_ecdh_cmd(struct atmel_ecc_cmd *cmd, + struct scatterlist *pubkey) +{ + size_t copied; + + cmd->word_addr = COMMAND; + cmd->count = ECDH_COUNT; + cmd->opcode = OPCODE_ECDH; + cmd->param1 = ECDH_PREFIX_MODE; + /* private key slot */ + cmd->param2 = cpu_to_le16(DATA_SLOT_2); + + /* + * The device only supports NIST P256 ECC keys. The public key size will + * always be the same. Use a macro for the key size to avoid unnecessary + * computations. + */ + copied = sg_copy_to_buffer(pubkey, 1, cmd->data, ATMEL_ECC_PUBKEY_SIZE); + if (copied != ATMEL_ECC_PUBKEY_SIZE) + return -EINVAL; + + atmel_ecc_checksum(cmd); + + cmd->msecs = MAX_EXEC_TIME_ECDH; + cmd->rxsize = ECDH_RSP_SIZE; + + return 0; +} + +/* + * After wake and after execution of a command, there will be error, status, or + * result bytes in the device's output register that can be retrieved by the + * system. When the length of that group is four bytes, the codes returned are + * detailed in error_list. + */ +static int atmel_ecc_status(struct device *dev, u8 *status) +{ + size_t err_list_len = ARRAY_SIZE(error_list); + int i; + u8 err_id = status[1]; + + if (*status != STATUS_SIZE) + return 0; + + if (err_id == STATUS_WAKE_SUCCESSFUL || err_id == STATUS_NOERR) + return 0; + + for (i = 0; i < err_list_len; i++) + if (error_list[i].value == err_id) + break; + + /* if err_id is not in the error_list then ignore it */ + if (i != err_list_len) { + dev_err(dev, "%02x: %s:\n", err_id, error_list[i].error_text); + return err_id; + } + + return 0; +} + +static int atmel_ecc_wakeup(struct i2c_client *client) +{ + struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client); + u8 status[STATUS_RSP_SIZE]; + int ret; + + /* + * The device ignores any levels or transitions on the SCL pin when the + * device is idle, asleep or during waking up. Don't check for error + * when waking up the device. + */ + i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz); + + /* + * Wait to wake the device. Typical execution times for ecdh and genkey + * are around tens of milliseconds. Delta is chosen to 50 microseconds. + */ + usleep_range(TWHI_MIN, TWHI_MAX); + + ret = i2c_master_recv(client, status, STATUS_SIZE); + if (ret < 0) + return ret; + + return atmel_ecc_status(&client->dev, status); +} + +static int atmel_ecc_sleep(struct i2c_client *client) +{ + u8 sleep = SLEEP_TOKEN; + + return i2c_master_send(client, &sleep, 1); +} + +static void atmel_ecdh_done(struct atmel_ecc_work_data *work_data, void *areq, + int status) +{ + struct kpp_request *req = areq; + struct atmel_ecdh_ctx *ctx = work_data->ctx; + struct atmel_ecc_cmd *cmd = &work_data->cmd; + size_t copied; + size_t n_sz = ctx->n_sz; + + if (status) + goto free_work_data; + + /* copy the shared secret */ + copied = sg_copy_from_buffer(req->dst, 1, &cmd->data[RSP_DATA_IDX], + n_sz); + if (copied != n_sz) + status = -EINVAL; + + /* fall through */ +free_work_data: + kzfree(work_data); + kpp_request_complete(req, status); +} + +/* + * atmel_ecc_send_receive() - send a command to the device and receive its + * response. + * @client: i2c client device + * @cmd : structure used to communicate with the device + * + * After the device receives a Wake token, a watchdog counter starts within the + * device. After the watchdog timer expires, the device enters sleep mode + * regardless of whether some I/O transmission or command execution is in + * progress. If a command is attempted when insufficient time remains prior to + * watchdog timer execution, the device will return the watchdog timeout error + * code without attempting to execute the command. There is no way to reset the + * counter other than to put the device into sleep or idle mode and then + * wake it up again. + */ +static int atmel_ecc_send_receive(struct i2c_client *client, + struct atmel_ecc_cmd *cmd) +{ + struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client); + int ret; + + mutex_lock(&i2c_priv->lock); + + ret = atmel_ecc_wakeup(client); + if (ret) + goto err; + + /* send the command */ + ret = i2c_master_send(client, (u8 *)cmd, cmd->count + WORD_ADDR_SIZE); + if (ret < 0) + goto err; + + /* delay the appropriate amount of time for command to execute */ + msleep(cmd->msecs); + + /* receive the response */ + ret = i2c_master_recv(client, cmd->data, cmd->rxsize); + if (ret < 0) + goto err; + + /* put the device into low-power mode */ + ret = atmel_ecc_sleep(client); + if (ret < 0) + goto err; + + mutex_unlock(&i2c_priv->lock); + return atmel_ecc_status(&client->dev, cmd->data); +err: + mutex_unlock(&i2c_priv->lock); + return ret; +} + +static void atmel_ecc_work_handler(struct work_struct *work) +{ + struct atmel_ecc_work_data *work_data = + container_of(work, struct atmel_ecc_work_data, work); + struct atmel_ecc_cmd *cmd = &work_data->cmd; + struct i2c_client *client = work_data->ctx->client; + int status; + + status = atmel_ecc_send_receive(client, cmd); + work_data->cbk(work_data, work_data->areq, status); +} + +static void atmel_ecc_enqueue(struct atmel_ecc_work_data *work_data, + void (*cbk)(struct atmel_ecc_work_data *work_data, + void *areq, int status), + void *areq) +{ + work_data->cbk = (void *)cbk; + work_data->areq = areq; + + INIT_WORK(&work_data->work, atmel_ecc_work_handler); + schedule_work(&work_data->work); +} + +static unsigned int atmel_ecdh_supported_curve(unsigned int curve_id) +{ + if (curve_id == ECC_CURVE_NIST_P256) + return ATMEL_ECC_NIST_P256_N_SIZE; + + return 0; +} + +/* + * A random private key is generated and stored in the device. The device + * returns the pair public key. + */ +static int atmel_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, + unsigned int len) +{ + struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm); + struct atmel_ecc_cmd *cmd; + void *public_key; + struct ecdh params; + int ret = -ENOMEM; + + /* free the old public key, if any */ + kfree(ctx->public_key); + /* make sure you don't free the old public key twice */ + ctx->public_key = NULL; + + if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0) { + dev_err(&ctx->client->dev, "crypto_ecdh_decode_key failed\n"); + return -EINVAL; + } + + ctx->n_sz = atmel_ecdh_supported_curve(params.curve_id); + if (!ctx->n_sz || params.key_size) { + /* fallback to ecdh software implementation */ + ctx->do_fallback = true; + return crypto_kpp_set_secret(ctx->fallback, buf, len); + } + + cmd = kmalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + /* + * The device only supports NIST P256 ECC keys. The public key size will + * always be the same. Use a macro for the key size to avoid unnecessary + * computations. + */ + public_key = kmalloc(ATMEL_ECC_PUBKEY_SIZE, GFP_KERNEL); + if (!public_key) + goto free_cmd; + + ctx->do_fallback = false; + ctx->curve_id = params.curve_id; + + atmel_ecc_init_genkey_cmd(cmd, DATA_SLOT_2); + + ret = atmel_ecc_send_receive(ctx->client, cmd); + if (ret) + goto free_public_key; + + /* save the public key */ + memcpy(public_key, &cmd->data[RSP_DATA_IDX], ATMEL_ECC_PUBKEY_SIZE); + ctx->public_key = public_key; + + kfree(cmd); + return 0; + +free_public_key: + kfree(public_key); +free_cmd: + kfree(cmd); + return ret; +} + +static int atmel_ecdh_generate_public_key(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm); + size_t copied; + int ret = 0; + + if (ctx->do_fallback) { + kpp_request_set_tfm(req, ctx->fallback); + return crypto_kpp_generate_public_key(req); + } + + /* public key was saved at private key generation */ + copied = sg_copy_from_buffer(req->dst, 1, ctx->public_key, + ATMEL_ECC_PUBKEY_SIZE); + if (copied != ATMEL_ECC_PUBKEY_SIZE) + ret = -EINVAL; + + return ret; +} + +static int atmel_ecdh_compute_shared_secret(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm); + struct atmel_ecc_work_data *work_data; + gfp_t gfp; + int ret; + + if (ctx->do_fallback) { + kpp_request_set_tfm(req, ctx->fallback); + return crypto_kpp_compute_shared_secret(req); + } + + gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : + GFP_ATOMIC; + + work_data = kmalloc(sizeof(*work_data), gfp); + if (!work_data) + return -ENOMEM; + + work_data->ctx = ctx; + + ret = atmel_ecc_init_ecdh_cmd(&work_data->cmd, req->src); + if (ret) + goto free_work_data; + + atmel_ecc_enqueue(work_data, atmel_ecdh_done, req); + + return -EINPROGRESS; + +free_work_data: + kfree(work_data); + return ret; +} + +static struct i2c_client *atmel_ecc_i2c_client_alloc(void) +{ + struct atmel_ecc_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL; + struct i2c_client *client = ERR_PTR(-ENODEV); + int min_tfm_cnt = INT_MAX; + int tfm_cnt; + + spin_lock(&driver_data.i2c_list_lock); + + if (list_empty(&driver_data.i2c_client_list)) { + spin_unlock(&driver_data.i2c_list_lock); + return ERR_PTR(-ENODEV); + } + + list_for_each_entry(i2c_priv, &driver_data.i2c_client_list, + i2c_client_list_node) { + tfm_cnt = atomic_read(&i2c_priv->tfm_count); + if (tfm_cnt < min_tfm_cnt) { + min_tfm_cnt = tfm_cnt; + min_i2c_priv = i2c_priv; + } + if (!min_tfm_cnt) + break; + } + + if (min_i2c_priv) { + atomic_inc(&min_i2c_priv->tfm_count); + client = min_i2c_priv->client; + } + + spin_unlock(&driver_data.i2c_list_lock); + + return client; +} + +static void atmel_ecc_i2c_client_free(struct i2c_client *client) +{ + struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client); + + atomic_dec(&i2c_priv->tfm_count); +} + +static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm) +{ + const char *alg = kpp_alg_name(tfm); + struct crypto_kpp *fallback; + struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm); + + ctx->client = atmel_ecc_i2c_client_alloc(); + if (IS_ERR(ctx->client)) { + pr_err("tfm - i2c_client binding failed\n"); + return PTR_ERR(ctx->client); + } + + fallback = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + dev_err(&ctx->client->dev, "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + + crypto_kpp_set_flags(fallback, crypto_kpp_get_flags(tfm)); + + dev_info(&ctx->client->dev, "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name(crypto_kpp_tfm(fallback))); + + ctx->fallback = fallback; + + return 0; +} + +static void atmel_ecdh_exit_tfm(struct crypto_kpp *tfm) +{ + struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm); + + kfree(ctx->public_key); + crypto_free_kpp(ctx->fallback); + atmel_ecc_i2c_client_free(ctx->client); +} + +static unsigned int atmel_ecdh_max_size(struct crypto_kpp *tfm) +{ + struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm); + + if (ctx->fallback) + return crypto_kpp_maxsize(ctx->fallback); + + /* + * The device only supports NIST P256 ECC keys. The public key size will + * always be the same. Use a macro for the key size to avoid unnecessary + * computations. + */ + return ATMEL_ECC_PUBKEY_SIZE; +} + +static struct kpp_alg atmel_ecdh = { + .set_secret = atmel_ecdh_set_secret, + .generate_public_key = atmel_ecdh_generate_public_key, + .compute_shared_secret = atmel_ecdh_compute_shared_secret, + .init = atmel_ecdh_init_tfm, + .exit = atmel_ecdh_exit_tfm, + .max_size = atmel_ecdh_max_size, + .base = { + .cra_flags = CRYPTO_ALG_NEED_FALLBACK, + .cra_name = "ecdh", + .cra_driver_name = "atmel-ecdh", + .cra_priority = ATMEL_ECC_PRIORITY, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct atmel_ecdh_ctx), + }, +}; + +static inline size_t atmel_ecc_wake_token_sz(u32 bus_clk_rate) +{ + u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC); + + /* return the size of the wake_token in bytes */ + return DIV_ROUND_UP(no_of_bits, 8); +} + +static int device_sanity_check(struct i2c_client *client) +{ + struct atmel_ecc_cmd *cmd; + int ret; + + cmd = kmalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + atmel_ecc_init_read_cmd(cmd); + + ret = atmel_ecc_send_receive(client, cmd); + if (ret) + goto free_cmd; + + /* + * It is vital that the Configuration, Data and OTP zones be locked + * prior to release into the field of the system containing the device. + * Failure to lock these zones may permit modification of any secret + * keys and may lead to other security problems. + */ + if (cmd->data[LOCK_CONFIG_IDX] || cmd->data[LOCK_VALUE_IDX]) { + dev_err(&client->dev, "Configuration or Data and OTP zones are unlocked!\n"); + ret = -ENOTSUPP; + } + + /* fall through */ +free_cmd: + kfree(cmd); + return ret; +} + +static int atmel_ecc_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct atmel_ecc_i2c_client_priv *i2c_priv; + struct device *dev = &client->dev; + int ret; + u32 bus_clk_rate; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + dev_err(dev, "I2C_FUNC_I2C not supported\n"); + return -ENODEV; + } + + ret = of_property_read_u32(client->adapter->dev.of_node, + "clock-frequency", &bus_clk_rate); + if (ret) { + dev_err(dev, "of: failed to read clock-frequency property\n"); + return ret; + } + + if (bus_clk_rate > 1000000L) { + dev_err(dev, "%d exceeds maximum supported clock frequency (1MHz)\n", + bus_clk_rate); + return -EINVAL; + } + + i2c_priv = devm_kmalloc(dev, sizeof(*i2c_priv), GFP_KERNEL); + if (!i2c_priv) + return -ENOMEM; + + i2c_priv->client = client; + mutex_init(&i2c_priv->lock); + + /* + * WAKE_TOKEN_MAX_SIZE was calculated for the maximum bus_clk_rate - + * 1MHz. The previous bus_clk_rate check ensures us that wake_token_sz + * will always be smaller than or equal to WAKE_TOKEN_MAX_SIZE. + */ + i2c_priv->wake_token_sz = atmel_ecc_wake_token_sz(bus_clk_rate); + + memset(i2c_priv->wake_token, 0, sizeof(i2c_priv->wake_token)); + + atomic_set(&i2c_priv->tfm_count, 0); + + i2c_set_clientdata(client, i2c_priv); + + ret = device_sanity_check(client); + if (ret) + return ret; + + spin_lock(&driver_data.i2c_list_lock); + list_add_tail(&i2c_priv->i2c_client_list_node, + &driver_data.i2c_client_list); + spin_unlock(&driver_data.i2c_list_lock); + + ret = crypto_register_kpp(&atmel_ecdh); + if (ret) { + spin_lock(&driver_data.i2c_list_lock); + list_del(&i2c_priv->i2c_client_list_node); + spin_unlock(&driver_data.i2c_list_lock); + + dev_err(dev, "%s alg registration failed\n", + atmel_ecdh.base.cra_driver_name); + } else { + dev_info(dev, "atmel ecc algorithms registered in /proc/crypto\n"); + } + + return ret; +} + +static int atmel_ecc_remove(struct i2c_client *client) +{ + struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client); + + /* Return EBUSY if i2c client already allocated. */ + if (atomic_read(&i2c_priv->tfm_count)) { + dev_err(&client->dev, "Device is busy\n"); + return -EBUSY; + } + + crypto_unregister_kpp(&atmel_ecdh); + + spin_lock(&driver_data.i2c_list_lock); + list_del(&i2c_priv->i2c_client_list_node); + spin_unlock(&driver_data.i2c_list_lock); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id atmel_ecc_dt_ids[] = { + { + .compatible = "atmel,atecc508a", + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, atmel_ecc_dt_ids); +#endif + +static const struct i2c_device_id atmel_ecc_id[] = { + { "atecc508a", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, atmel_ecc_id); + +static struct i2c_driver atmel_ecc_driver = { + .driver = { + .name = "atmel-ecc", + .of_match_table = of_match_ptr(atmel_ecc_dt_ids), + }, + .probe = atmel_ecc_probe, + .remove = atmel_ecc_remove, + .id_table = atmel_ecc_id, +}; + +static int __init atmel_ecc_init(void) +{ + spin_lock_init(&driver_data.i2c_list_lock); + INIT_LIST_HEAD(&driver_data.i2c_client_list); + return i2c_add_driver(&atmel_ecc_driver); +} + +static void __exit atmel_ecc_exit(void) +{ + flush_scheduled_work(); + i2c_del_driver(&atmel_ecc_driver); +} + +module_init(atmel_ecc_init); +module_exit(atmel_ecc_exit); + +MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>"); +MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-ecc.h b/drivers/crypto/atmel-ecc.h new file mode 100644 index 000000000000..25232c8abcc2 --- /dev/null +++ b/drivers/crypto/atmel-ecc.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017, Microchip Technology Inc. + * Author: Tudor Ambarus <tudor.ambarus@microchip.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef __ATMEL_ECC_H__ +#define __ATMEL_ECC_H__ + +#define ATMEL_ECC_PRIORITY 300 + +#define COMMAND 0x03 /* packet function */ +#define SLEEP_TOKEN 0x01 +#define WAKE_TOKEN_MAX_SIZE 8 + +/* Definitions of Data and Command sizes */ +#define WORD_ADDR_SIZE 1 +#define COUNT_SIZE 1 +#define CRC_SIZE 2 +#define CMD_OVERHEAD_SIZE (COUNT_SIZE + CRC_SIZE) + +/* size in bytes of the n prime */ +#define ATMEL_ECC_NIST_P256_N_SIZE 32 +#define ATMEL_ECC_PUBKEY_SIZE (2 * ATMEL_ECC_NIST_P256_N_SIZE) + +#define STATUS_RSP_SIZE 4 +#define ECDH_RSP_SIZE (32 + CMD_OVERHEAD_SIZE) +#define GENKEY_RSP_SIZE (ATMEL_ECC_PUBKEY_SIZE + \ + CMD_OVERHEAD_SIZE) +#define READ_RSP_SIZE (4 + CMD_OVERHEAD_SIZE) +#define MAX_RSP_SIZE GENKEY_RSP_SIZE + +/** + * atmel_ecc_cmd - structure used for communicating with the device. + * @word_addr: indicates the function of the packet sent to the device. This + * byte should have a value of COMMAND for normal operation. + * @count : number of bytes to be transferred to (or from) the device. + * @opcode : the command code. + * @param1 : the first parameter; always present. + * @param2 : the second parameter; always present. + * @data : optional remaining input data. Includes a 2-byte CRC. + * @rxsize : size of the data received from i2c client. + * @msecs : command execution time in milliseconds + */ +struct atmel_ecc_cmd { + u8 word_addr; + u8 count; + u8 opcode; + u8 param1; + u16 param2; + u8 data[MAX_RSP_SIZE]; + u8 msecs; + u16 rxsize; +} __packed; + +/* Status/Error codes */ +#define STATUS_SIZE 0x04 +#define STATUS_NOERR 0x00 +#define STATUS_WAKE_SUCCESSFUL 0x11 + +static const struct { + u8 value; + const char *error_text; +} error_list[] = { + { 0x01, "CheckMac or Verify miscompare" }, + { 0x03, "Parse Error" }, + { 0x05, "ECC Fault" }, + { 0x0F, "Execution Error" }, + { 0xEE, "Watchdog about to expire" }, + { 0xFF, "CRC or other communication error" }, +}; + +/* Definitions for eeprom organization */ +#define CONFIG_ZONE 0 + +/* Definitions for Indexes common to all commands */ +#define RSP_DATA_IDX 1 /* buffer index of data in response */ +#define DATA_SLOT_2 2 /* used for ECDH private key */ + +/* Definitions for the device lock state */ +#define DEVICE_LOCK_ADDR 0x15 +#define LOCK_VALUE_IDX (RSP_DATA_IDX + 2) +#define LOCK_CONFIG_IDX (RSP_DATA_IDX + 3) + +/* + * Wake High delay to data communication (microseconds). SDA should be stable + * high for this entire duration. + */ +#define TWHI_MIN 1500 +#define TWHI_MAX 1550 + +/* Wake Low duration */ +#define TWLO_USEC 60 + +/* Command execution time (milliseconds) */ +#define MAX_EXEC_TIME_ECDH 58 +#define MAX_EXEC_TIME_GENKEY 115 +#define MAX_EXEC_TIME_READ 1 + +/* Command opcode */ +#define OPCODE_ECDH 0x43 +#define OPCODE_GENKEY 0x40 +#define OPCODE_READ 0x02 + +/* Definitions for the READ Command */ +#define READ_COUNT 7 + +/* Definitions for the GenKey Command */ +#define GENKEY_COUNT 7 +#define GENKEY_MODE_PRIVATE 0x04 + +/* Definitions for the ECDH Command */ +#define ECDH_COUNT 71 +#define ECDH_PREFIX_MODE 0x00 + +#endif /* __ATMEL_ECC_H__ */ diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index dad4e5bad827..3e2f41b3eaf3 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -2883,7 +2883,7 @@ sha_dd_err: static int atmel_sha_remove(struct platform_device *pdev) { - static struct atmel_sha_dev *sha_dd; + struct atmel_sha_dev *sha_dd; sha_dd = platform_get_drvdata(pdev); if (!sha_dd) diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index b25f1b3c981f..f4b335dda568 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -1487,7 +1487,7 @@ tdes_dd_err: static int atmel_tdes_remove(struct platform_device *pdev) { - static struct atmel_tdes_dev *tdes_dd; + struct atmel_tdes_dev *tdes_dd; tdes_dd = platform_get_drvdata(pdev); if (!tdes_dd) diff --git a/drivers/crypto/axis/Makefile b/drivers/crypto/axis/Makefile new file mode 100644 index 000000000000..be9a84a4b667 --- /dev/null +++ b/drivers/crypto/axis/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) := artpec6_crypto.o diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c new file mode 100644 index 000000000000..d9fbbf01062b --- /dev/null +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -0,0 +1,3192 @@ +/* + * Driver for ARTPEC-6 crypto block using the kernel asynchronous crypto api. + * + * Copyright (C) 2014-2017 Axis Communications AB + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/bitfield.h> +#include <linux/crypto.h> +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/fault-inject.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> + +#include <crypto/aes.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> +#include <crypto/sha.h> +#include <crypto/xts.h> + +/* Max length of a line in all cache levels for Artpec SoCs. */ +#define ARTPEC_CACHE_LINE_MAX 32 + +#define PDMA_OUT_CFG 0x0000 +#define PDMA_OUT_BUF_CFG 0x0004 +#define PDMA_OUT_CMD 0x0008 +#define PDMA_OUT_DESCRQ_PUSH 0x0010 +#define PDMA_OUT_DESCRQ_STAT 0x0014 + +#define A6_PDMA_IN_CFG 0x0028 +#define A6_PDMA_IN_BUF_CFG 0x002c +#define A6_PDMA_IN_CMD 0x0030 +#define A6_PDMA_IN_STATQ_PUSH 0x0038 +#define A6_PDMA_IN_DESCRQ_PUSH 0x0044 +#define A6_PDMA_IN_DESCRQ_STAT 0x0048 +#define A6_PDMA_INTR_MASK 0x0068 +#define A6_PDMA_ACK_INTR 0x006c +#define A6_PDMA_MASKED_INTR 0x0074 + +#define A7_PDMA_IN_CFG 0x002c +#define A7_PDMA_IN_BUF_CFG 0x0030 +#define A7_PDMA_IN_CMD 0x0034 +#define A7_PDMA_IN_STATQ_PUSH 0x003c +#define A7_PDMA_IN_DESCRQ_PUSH 0x0048 +#define A7_PDMA_IN_DESCRQ_STAT 0x004C +#define A7_PDMA_INTR_MASK 0x006c +#define A7_PDMA_ACK_INTR 0x0070 +#define A7_PDMA_MASKED_INTR 0x0078 + +#define PDMA_OUT_CFG_EN BIT(0) + +#define PDMA_OUT_BUF_CFG_DATA_BUF_SIZE GENMASK(4, 0) +#define PDMA_OUT_BUF_CFG_DESCR_BUF_SIZE GENMASK(9, 5) + +#define PDMA_OUT_CMD_START BIT(0) +#define A6_PDMA_OUT_CMD_STOP BIT(3) +#define A7_PDMA_OUT_CMD_STOP BIT(2) + +#define PDMA_OUT_DESCRQ_PUSH_LEN GENMASK(5, 0) +#define PDMA_OUT_DESCRQ_PUSH_ADDR GENMASK(31, 6) + +#define PDMA_OUT_DESCRQ_STAT_LEVEL GENMASK(3, 0) +#define PDMA_OUT_DESCRQ_STAT_SIZE GENMASK(7, 4) + +#define PDMA_IN_CFG_EN BIT(0) + +#define PDMA_IN_BUF_CFG_DATA_BUF_SIZE GENMASK(4, 0) +#define PDMA_IN_BUF_CFG_DESCR_BUF_SIZE GENMASK(9, 5) +#define PDMA_IN_BUF_CFG_STAT_BUF_SIZE GENMASK(14, 10) + +#define PDMA_IN_CMD_START BIT(0) +#define A6_PDMA_IN_CMD_FLUSH_STAT BIT(2) +#define A6_PDMA_IN_CMD_STOP BIT(3) +#define A7_PDMA_IN_CMD_FLUSH_STAT BIT(1) +#define A7_PDMA_IN_CMD_STOP BIT(2) + +#define PDMA_IN_STATQ_PUSH_LEN GENMASK(5, 0) +#define PDMA_IN_STATQ_PUSH_ADDR GENMASK(31, 6) + +#define PDMA_IN_DESCRQ_PUSH_LEN GENMASK(5, 0) +#define PDMA_IN_DESCRQ_PUSH_ADDR GENMASK(31, 6) + +#define PDMA_IN_DESCRQ_STAT_LEVEL GENMASK(3, 0) +#define PDMA_IN_DESCRQ_STAT_SIZE GENMASK(7, 4) + +#define A6_PDMA_INTR_MASK_IN_DATA BIT(2) +#define A6_PDMA_INTR_MASK_IN_EOP BIT(3) +#define A6_PDMA_INTR_MASK_IN_EOP_FLUSH BIT(4) + +#define A7_PDMA_INTR_MASK_IN_DATA BIT(3) +#define A7_PDMA_INTR_MASK_IN_EOP BIT(4) +#define A7_PDMA_INTR_MASK_IN_EOP_FLUSH BIT(5) + +#define A6_CRY_MD_OPER GENMASK(19, 16) + +#define A6_CRY_MD_HASH_SEL_CTX GENMASK(21, 20) +#define A6_CRY_MD_HASH_HMAC_FIN BIT(23) + +#define A6_CRY_MD_CIPHER_LEN GENMASK(21, 20) +#define A6_CRY_MD_CIPHER_DECR BIT(22) +#define A6_CRY_MD_CIPHER_TWEAK BIT(23) +#define A6_CRY_MD_CIPHER_DSEQ BIT(24) + +#define A7_CRY_MD_OPER GENMASK(11, 8) + +#define A7_CRY_MD_HASH_SEL_CTX GENMASK(13, 12) +#define A7_CRY_MD_HASH_HMAC_FIN BIT(15) + +#define A7_CRY_MD_CIPHER_LEN GENMASK(13, 12) +#define A7_CRY_MD_CIPHER_DECR BIT(14) +#define A7_CRY_MD_CIPHER_TWEAK BIT(15) +#define A7_CRY_MD_CIPHER_DSEQ BIT(16) + +/* DMA metadata constants */ +#define regk_crypto_aes_cbc 0x00000002 +#define regk_crypto_aes_ctr 0x00000003 +#define regk_crypto_aes_ecb 0x00000001 +#define regk_crypto_aes_gcm 0x00000004 +#define regk_crypto_aes_xts 0x00000005 +#define regk_crypto_cache 0x00000002 +#define a6_regk_crypto_dlkey 0x0000000a +#define a7_regk_crypto_dlkey 0x0000000e +#define regk_crypto_ext 0x00000001 +#define regk_crypto_hmac_sha1 0x00000007 +#define regk_crypto_hmac_sha256 0x00000009 +#define regk_crypto_hmac_sha384 0x0000000b +#define regk_crypto_hmac_sha512 0x0000000d +#define regk_crypto_init 0x00000000 +#define regk_crypto_key_128 0x00000000 +#define regk_crypto_key_192 0x00000001 +#define regk_crypto_key_256 0x00000002 +#define regk_crypto_null 0x00000000 +#define regk_crypto_sha1 0x00000006 +#define regk_crypto_sha256 0x00000008 +#define regk_crypto_sha384 0x0000000a +#define regk_crypto_sha512 0x0000000c + +/* DMA descriptor structures */ +struct pdma_descr_ctrl { + unsigned char short_descr : 1; + unsigned char pad1 : 1; + unsigned char eop : 1; + unsigned char intr : 1; + unsigned char short_len : 3; + unsigned char pad2 : 1; +} __packed; + +struct pdma_data_descr { + unsigned int len : 24; + unsigned int buf : 32; +} __packed; + +struct pdma_short_descr { + unsigned char data[7]; +} __packed; + +struct pdma_descr { + struct pdma_descr_ctrl ctrl; + union { + struct pdma_data_descr data; + struct pdma_short_descr shrt; + }; +}; + +struct pdma_stat_descr { + unsigned char pad1 : 1; + unsigned char pad2 : 1; + unsigned char eop : 1; + unsigned char pad3 : 5; + unsigned int len : 24; +}; + +/* Each descriptor array can hold max 64 entries */ +#define PDMA_DESCR_COUNT 64 + +#define MODULE_NAME "Artpec-6 CA" + +/* Hash modes (including HMAC variants) */ +#define ARTPEC6_CRYPTO_HASH_SHA1 1 +#define ARTPEC6_CRYPTO_HASH_SHA256 2 +#define ARTPEC6_CRYPTO_HASH_SHA384 3 +#define ARTPEC6_CRYPTO_HASH_SHA512 4 + +/* Crypto modes */ +#define ARTPEC6_CRYPTO_CIPHER_AES_ECB 1 +#define ARTPEC6_CRYPTO_CIPHER_AES_CBC 2 +#define ARTPEC6_CRYPTO_CIPHER_AES_CTR 3 +#define ARTPEC6_CRYPTO_CIPHER_AES_XTS 5 + +/* The PDMA is a DMA-engine tightly coupled with a ciphering engine. + * It operates on a descriptor array with up to 64 descriptor entries. + * The arrays must be 64 byte aligned in memory. + * + * The ciphering unit has no registers and is completely controlled by + * a 4-byte metadata that is inserted at the beginning of each dma packet. + * + * A dma packet is a sequence of descriptors terminated by setting the .eop + * field in the final descriptor of the packet. + * + * Multiple packets are used for providing context data, key data and + * the plain/ciphertext. + * + * PDMA Descriptors (Array) + * +------+------+------+~~+-------+------+---- + * | 0 | 1 | 2 |~~| 11 EOP| 12 | .... + * +--+---+--+---+----+-+~~+-------+----+-+---- + * | | | | | + * | | | | | + * __|__ +-------++-------++-------+ +----+ + * | MD | |Payload||Payload||Payload| | MD | + * +-----+ +-------++-------++-------+ +----+ + */ + +struct artpec6_crypto_bounce_buffer { + struct list_head list; + size_t length; + struct scatterlist *sg; + size_t offset; + /* buf is aligned to ARTPEC_CACHE_LINE_MAX and + * holds up to ARTPEC_CACHE_LINE_MAX bytes data. + */ + void *buf; +}; + +struct artpec6_crypto_dma_map { + dma_addr_t dma_addr; + size_t size; + enum dma_data_direction dir; +}; + +struct artpec6_crypto_dma_descriptors { + struct pdma_descr out[PDMA_DESCR_COUNT] __aligned(64); + struct pdma_descr in[PDMA_DESCR_COUNT] __aligned(64); + u32 stat[PDMA_DESCR_COUNT] __aligned(64); + struct list_head bounce_buffers; + /* Enough maps for all out/in buffers, and all three descr. arrays */ + struct artpec6_crypto_dma_map maps[PDMA_DESCR_COUNT * 2 + 2]; + dma_addr_t out_dma_addr; + dma_addr_t in_dma_addr; + dma_addr_t stat_dma_addr; + size_t out_cnt; + size_t in_cnt; + size_t map_count; +}; + +enum artpec6_crypto_variant { + ARTPEC6_CRYPTO, + ARTPEC7_CRYPTO, +}; + +struct artpec6_crypto { + void __iomem *base; + spinlock_t queue_lock; + struct list_head queue; /* waiting for pdma fifo space */ + struct list_head pending; /* submitted to pdma fifo */ + struct tasklet_struct task; + struct kmem_cache *dma_cache; + int pending_count; + struct timer_list timer; + enum artpec6_crypto_variant variant; + void *pad_buffer; /* cache-aligned block padding buffer */ + void *zero_buffer; +}; + +enum artpec6_crypto_hash_flags { + HASH_FLAG_INIT_CTX = 2, + HASH_FLAG_UPDATE = 4, + HASH_FLAG_FINALIZE = 8, + HASH_FLAG_HMAC = 16, + HASH_FLAG_UPDATE_KEY = 32, +}; + +struct artpec6_crypto_req_common { + struct list_head list; + struct artpec6_crypto_dma_descriptors *dma; + struct crypto_async_request *req; + void (*complete)(struct crypto_async_request *req); + gfp_t gfp_flags; +}; + +struct artpec6_hash_request_context { + char partial_buffer[SHA512_BLOCK_SIZE]; + char partial_buffer_out[SHA512_BLOCK_SIZE]; + char key_buffer[SHA512_BLOCK_SIZE]; + char pad_buffer[SHA512_BLOCK_SIZE + 32]; + unsigned char digeststate[SHA512_DIGEST_SIZE]; + size_t partial_bytes; + u64 digcnt; + u32 key_md; + u32 hash_md; + enum artpec6_crypto_hash_flags hash_flags; + struct artpec6_crypto_req_common common; +}; + +struct artpec6_hash_export_state { + char partial_buffer[SHA512_BLOCK_SIZE]; + unsigned char digeststate[SHA512_DIGEST_SIZE]; + size_t partial_bytes; + u64 digcnt; + int oper; + unsigned int hash_flags; +}; + +struct artpec6_hashalg_context { + char hmac_key[SHA512_BLOCK_SIZE]; + size_t hmac_key_length; + struct crypto_shash *child_hash; +}; + +struct artpec6_crypto_request_context { + u32 cipher_md; + bool decrypt; + struct artpec6_crypto_req_common common; +}; + +struct artpec6_cryptotfm_context { + unsigned char aes_key[2*AES_MAX_KEY_SIZE]; + size_t key_length; + u32 key_md; + int crypto_type; + struct crypto_skcipher *fallback; +}; + +struct artpec6_crypto_aead_hw_ctx { + __be64 aad_length_bits; + __be64 text_length_bits; + __u8 J0[AES_BLOCK_SIZE]; +}; + +struct artpec6_crypto_aead_req_ctx { + struct artpec6_crypto_aead_hw_ctx hw_ctx; + u32 cipher_md; + bool decrypt; + struct artpec6_crypto_req_common common; + __u8 decryption_tag[AES_BLOCK_SIZE] ____cacheline_aligned; +}; + +/* The crypto framework makes it hard to avoid this global. */ +static struct device *artpec6_crypto_dev; + +static struct dentry *dbgfs_root; + +#ifdef CONFIG_FAULT_INJECTION +static DECLARE_FAULT_ATTR(artpec6_crypto_fail_status_read); +static DECLARE_FAULT_ATTR(artpec6_crypto_fail_dma_array_full); +#endif + +enum { + ARTPEC6_CRYPTO_PREPARE_HASH_NO_START, + ARTPEC6_CRYPTO_PREPARE_HASH_START, +}; + +static int artpec6_crypto_prepare_aead(struct aead_request *areq); +static int artpec6_crypto_prepare_crypto(struct skcipher_request *areq); +static int artpec6_crypto_prepare_hash(struct ahash_request *areq); + +static void +artpec6_crypto_complete_crypto(struct crypto_async_request *req); +static void +artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req); +static void +artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req); +static void +artpec6_crypto_complete_aead(struct crypto_async_request *req); +static void +artpec6_crypto_complete_hash(struct crypto_async_request *req); + +static int +artpec6_crypto_common_destroy(struct artpec6_crypto_req_common *common); + +static void +artpec6_crypto_start_dma(struct artpec6_crypto_req_common *common); + +struct artpec6_crypto_walk { + struct scatterlist *sg; + size_t offset; +}; + +static void artpec6_crypto_walk_init(struct artpec6_crypto_walk *awalk, + struct scatterlist *sg) +{ + awalk->sg = sg; + awalk->offset = 0; +} + +static size_t artpec6_crypto_walk_advance(struct artpec6_crypto_walk *awalk, + size_t nbytes) +{ + while (nbytes && awalk->sg) { + size_t piece; + + WARN_ON(awalk->offset > awalk->sg->length); + + piece = min(nbytes, (size_t)awalk->sg->length - awalk->offset); + nbytes -= piece; + awalk->offset += piece; + if (awalk->offset == awalk->sg->length) { + awalk->sg = sg_next(awalk->sg); + awalk->offset = 0; + } + + } + + return nbytes; +} + +static size_t +artpec6_crypto_walk_chunklen(const struct artpec6_crypto_walk *awalk) +{ + WARN_ON(awalk->sg->length == awalk->offset); + + return awalk->sg->length - awalk->offset; +} + +static dma_addr_t +artpec6_crypto_walk_chunk_phys(const struct artpec6_crypto_walk *awalk) +{ + return sg_phys(awalk->sg) + awalk->offset; +} + +static void +artpec6_crypto_copy_bounce_buffers(struct artpec6_crypto_req_common *common) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + struct artpec6_crypto_bounce_buffer *b; + struct artpec6_crypto_bounce_buffer *next; + + list_for_each_entry_safe(b, next, &dma->bounce_buffers, list) { + pr_debug("bounce entry %p: %zu bytes @ %zu from %p\n", + b, b->length, b->offset, b->buf); + sg_pcopy_from_buffer(b->sg, + 1, + b->buf, + b->length, + b->offset); + + list_del(&b->list); + kfree(b); + } +} + +static inline bool artpec6_crypto_busy(void) +{ + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + int fifo_count = ac->pending_count; + + return fifo_count > 6; +} + +static int artpec6_crypto_submit(struct artpec6_crypto_req_common *req) +{ + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + int ret = -EBUSY; + + spin_lock_bh(&ac->queue_lock); + + if (!artpec6_crypto_busy()) { + list_add_tail(&req->list, &ac->pending); + artpec6_crypto_start_dma(req); + ret = -EINPROGRESS; + } else if (req->req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) { + list_add_tail(&req->list, &ac->queue); + } else { + artpec6_crypto_common_destroy(req); + } + + spin_unlock_bh(&ac->queue_lock); + + return ret; +} + +static void artpec6_crypto_start_dma(struct artpec6_crypto_req_common *common) +{ + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + enum artpec6_crypto_variant variant = ac->variant; + void __iomem *base = ac->base; + struct artpec6_crypto_dma_descriptors *dma = common->dma; + u32 ind, statd, outd; + + /* Make descriptor content visible to the DMA before starting it. */ + wmb(); + + ind = FIELD_PREP(PDMA_IN_DESCRQ_PUSH_LEN, dma->in_cnt - 1) | + FIELD_PREP(PDMA_IN_DESCRQ_PUSH_ADDR, dma->in_dma_addr >> 6); + + statd = FIELD_PREP(PDMA_IN_STATQ_PUSH_LEN, dma->in_cnt - 1) | + FIELD_PREP(PDMA_IN_STATQ_PUSH_ADDR, dma->stat_dma_addr >> 6); + + outd = FIELD_PREP(PDMA_OUT_DESCRQ_PUSH_LEN, dma->out_cnt - 1) | + FIELD_PREP(PDMA_OUT_DESCRQ_PUSH_ADDR, dma->out_dma_addr >> 6); + + if (variant == ARTPEC6_CRYPTO) { + writel_relaxed(ind, base + A6_PDMA_IN_DESCRQ_PUSH); + writel_relaxed(statd, base + A6_PDMA_IN_STATQ_PUSH); + writel_relaxed(PDMA_IN_CMD_START, base + A6_PDMA_IN_CMD); + } else { + writel_relaxed(ind, base + A7_PDMA_IN_DESCRQ_PUSH); + writel_relaxed(statd, base + A7_PDMA_IN_STATQ_PUSH); + writel_relaxed(PDMA_IN_CMD_START, base + A7_PDMA_IN_CMD); + } + + writel_relaxed(outd, base + PDMA_OUT_DESCRQ_PUSH); + writel_relaxed(PDMA_OUT_CMD_START, base + PDMA_OUT_CMD); + + ac->pending_count++; +} + +static void +artpec6_crypto_init_dma_operation(struct artpec6_crypto_req_common *common) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + + dma->out_cnt = 0; + dma->in_cnt = 0; + dma->map_count = 0; + INIT_LIST_HEAD(&dma->bounce_buffers); +} + +static bool fault_inject_dma_descr(void) +{ +#ifdef CONFIG_FAULT_INJECTION + return should_fail(&artpec6_crypto_fail_dma_array_full, 1); +#else + return false; +#endif +} + +/** artpec6_crypto_setup_out_descr_phys - Setup an out channel with a + * physical address + * + * @addr: The physical address of the data buffer + * @len: The length of the data buffer + * @eop: True if this is the last buffer in the packet + * + * @return 0 on success or -ENOSPC if there are no more descriptors available + */ +static int +artpec6_crypto_setup_out_descr_phys(struct artpec6_crypto_req_common *common, + dma_addr_t addr, size_t len, bool eop) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + struct pdma_descr *d; + + if (dma->out_cnt >= PDMA_DESCR_COUNT || + fault_inject_dma_descr()) { + pr_err("No free OUT DMA descriptors available!\n"); + return -ENOSPC; + } + + d = &dma->out[dma->out_cnt++]; + memset(d, 0, sizeof(*d)); + + d->ctrl.short_descr = 0; + d->ctrl.eop = eop; + d->data.len = len; + d->data.buf = addr; + return 0; +} + +/** artpec6_crypto_setup_out_descr_short - Setup a short out descriptor + * + * @dst: The virtual address of the data + * @len: The length of the data, must be between 1 to 7 bytes + * @eop: True if this is the last buffer in the packet + * + * @return 0 on success + * -ENOSPC if no more descriptors are available + * -EINVAL if the data length exceeds 7 bytes + */ +static int +artpec6_crypto_setup_out_descr_short(struct artpec6_crypto_req_common *common, + void *dst, unsigned int len, bool eop) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + struct pdma_descr *d; + + if (dma->out_cnt >= PDMA_DESCR_COUNT || + fault_inject_dma_descr()) { + pr_err("No free OUT DMA descriptors available!\n"); + return -ENOSPC; + } else if (len > 7 || len < 1) { + return -EINVAL; + } + d = &dma->out[dma->out_cnt++]; + memset(d, 0, sizeof(*d)); + + d->ctrl.short_descr = 1; + d->ctrl.short_len = len; + d->ctrl.eop = eop; + memcpy(d->shrt.data, dst, len); + return 0; +} + +static int artpec6_crypto_dma_map_page(struct artpec6_crypto_req_common *common, + struct page *page, size_t offset, + size_t size, + enum dma_data_direction dir, + dma_addr_t *dma_addr_out) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + struct device *dev = artpec6_crypto_dev; + struct artpec6_crypto_dma_map *map; + dma_addr_t dma_addr; + + *dma_addr_out = 0; + + if (dma->map_count >= ARRAY_SIZE(dma->maps)) + return -ENOMEM; + + dma_addr = dma_map_page(dev, page, offset, size, dir); + if (dma_mapping_error(dev, dma_addr)) + return -ENOMEM; + + map = &dma->maps[dma->map_count++]; + map->size = size; + map->dma_addr = dma_addr; + map->dir = dir; + + *dma_addr_out = dma_addr; + + return 0; +} + +static int +artpec6_crypto_dma_map_single(struct artpec6_crypto_req_common *common, + void *ptr, size_t size, + enum dma_data_direction dir, + dma_addr_t *dma_addr_out) +{ + struct page *page = virt_to_page(ptr); + size_t offset = (uintptr_t)ptr & ~PAGE_MASK; + + return artpec6_crypto_dma_map_page(common, page, offset, size, dir, + dma_addr_out); +} + +static int +artpec6_crypto_dma_map_descs(struct artpec6_crypto_req_common *common) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + int ret; + + ret = artpec6_crypto_dma_map_single(common, dma->in, + sizeof(dma->in[0]) * dma->in_cnt, + DMA_TO_DEVICE, &dma->in_dma_addr); + if (ret) + return ret; + + ret = artpec6_crypto_dma_map_single(common, dma->out, + sizeof(dma->out[0]) * dma->out_cnt, + DMA_TO_DEVICE, &dma->out_dma_addr); + if (ret) + return ret; + + /* We only read one stat descriptor */ + dma->stat[dma->in_cnt - 1] = 0; + + /* + * DMA_BIDIRECTIONAL since we need our zeroing of the stat descriptor + * to be written. + */ + return artpec6_crypto_dma_map_single(common, + dma->stat + dma->in_cnt - 1, + sizeof(dma->stat[0]), + DMA_BIDIRECTIONAL, + &dma->stat_dma_addr); +} + +static void +artpec6_crypto_dma_unmap_all(struct artpec6_crypto_req_common *common) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + struct device *dev = artpec6_crypto_dev; + int i; + + for (i = 0; i < dma->map_count; i++) { + struct artpec6_crypto_dma_map *map = &dma->maps[i]; + + dma_unmap_page(dev, map->dma_addr, map->size, map->dir); + } + + dma->map_count = 0; +} + +/** artpec6_crypto_setup_out_descr - Setup an out descriptor + * + * @dst: The virtual address of the data + * @len: The length of the data + * @eop: True if this is the last buffer in the packet + * @use_short: If this is true and the data length is 7 bytes or less then + * a short descriptor will be used + * + * @return 0 on success + * Any errors from artpec6_crypto_setup_out_descr_short() or + * setup_out_descr_phys() + */ +static int +artpec6_crypto_setup_out_descr(struct artpec6_crypto_req_common *common, + void *dst, unsigned int len, bool eop, + bool use_short) +{ + if (use_short && len < 7) { + return artpec6_crypto_setup_out_descr_short(common, dst, len, + eop); + } else { + int ret; + dma_addr_t dma_addr; + + ret = artpec6_crypto_dma_map_single(common, dst, len, + DMA_TO_DEVICE, + &dma_addr); + if (ret) + return ret; + + return artpec6_crypto_setup_out_descr_phys(common, dma_addr, + len, eop); + } +} + +/** artpec6_crypto_setup_in_descr_phys - Setup an in channel with a + * physical address + * + * @addr: The physical address of the data buffer + * @len: The length of the data buffer + * @intr: True if an interrupt should be fired after HW processing of this + * descriptor + * + */ +static int +artpec6_crypto_setup_in_descr_phys(struct artpec6_crypto_req_common *common, + dma_addr_t addr, unsigned int len, bool intr) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + struct pdma_descr *d; + + if (dma->in_cnt >= PDMA_DESCR_COUNT || + fault_inject_dma_descr()) { + pr_err("No free IN DMA descriptors available!\n"); + return -ENOSPC; + } + d = &dma->in[dma->in_cnt++]; + memset(d, 0, sizeof(*d)); + + d->ctrl.intr = intr; + d->data.len = len; + d->data.buf = addr; + return 0; +} + +/** artpec6_crypto_setup_in_descr - Setup an in channel descriptor + * + * @buffer: The virtual address to of the data buffer + * @len: The length of the data buffer + * @last: If this is the last data buffer in the request (i.e. an interrupt + * is needed + * + * Short descriptors are not used for the in channel + */ +static int +artpec6_crypto_setup_in_descr(struct artpec6_crypto_req_common *common, + void *buffer, unsigned int len, bool last) +{ + dma_addr_t dma_addr; + int ret; + + ret = artpec6_crypto_dma_map_single(common, buffer, len, + DMA_FROM_DEVICE, &dma_addr); + if (ret) + return ret; + + return artpec6_crypto_setup_in_descr_phys(common, dma_addr, len, last); +} + +static struct artpec6_crypto_bounce_buffer * +artpec6_crypto_alloc_bounce(gfp_t flags) +{ + void *base; + size_t alloc_size = sizeof(struct artpec6_crypto_bounce_buffer) + + 2 * ARTPEC_CACHE_LINE_MAX; + struct artpec6_crypto_bounce_buffer *bbuf = kzalloc(alloc_size, flags); + + if (!bbuf) + return NULL; + + base = bbuf + 1; + bbuf->buf = PTR_ALIGN(base, ARTPEC_CACHE_LINE_MAX); + return bbuf; +} + +static int setup_bounce_buffer_in(struct artpec6_crypto_req_common *common, + struct artpec6_crypto_walk *walk, size_t size) +{ + struct artpec6_crypto_bounce_buffer *bbuf; + int ret; + + bbuf = artpec6_crypto_alloc_bounce(common->gfp_flags); + if (!bbuf) + return -ENOMEM; + + bbuf->length = size; + bbuf->sg = walk->sg; + bbuf->offset = walk->offset; + + ret = artpec6_crypto_setup_in_descr(common, bbuf->buf, size, false); + if (ret) { + kfree(bbuf); + return ret; + } + + pr_debug("BOUNCE %zu offset %zu\n", size, walk->offset); + list_add_tail(&bbuf->list, &common->dma->bounce_buffers); + return 0; +} + +static int +artpec6_crypto_setup_sg_descrs_in(struct artpec6_crypto_req_common *common, + struct artpec6_crypto_walk *walk, + size_t count) +{ + size_t chunk; + int ret; + dma_addr_t addr; + + while (walk->sg && count) { + chunk = min(count, artpec6_crypto_walk_chunklen(walk)); + addr = artpec6_crypto_walk_chunk_phys(walk); + + /* When destination buffers are not aligned to the cache line + * size we need bounce buffers. The DMA-API requires that the + * entire line is owned by the DMA buffer and this holds also + * for the case when coherent DMA is used. + */ + if (!IS_ALIGNED(addr, ARTPEC_CACHE_LINE_MAX)) { + chunk = min_t(dma_addr_t, chunk, + ALIGN(addr, ARTPEC_CACHE_LINE_MAX) - + addr); + + pr_debug("CHUNK-b %pad:%zu\n", &addr, chunk); + ret = setup_bounce_buffer_in(common, walk, chunk); + } else if (chunk < ARTPEC_CACHE_LINE_MAX) { + pr_debug("CHUNK-b %pad:%zu\n", &addr, chunk); + ret = setup_bounce_buffer_in(common, walk, chunk); + } else { + dma_addr_t dma_addr; + + chunk = chunk & ~(ARTPEC_CACHE_LINE_MAX-1); + + pr_debug("CHUNK %pad:%zu\n", &addr, chunk); + + ret = artpec6_crypto_dma_map_page(common, + sg_page(walk->sg), + walk->sg->offset + + walk->offset, + chunk, + DMA_FROM_DEVICE, + &dma_addr); + if (ret) + return ret; + + ret = artpec6_crypto_setup_in_descr_phys(common, + dma_addr, + chunk, false); + } + + if (ret) + return ret; + + count = count - chunk; + artpec6_crypto_walk_advance(walk, chunk); + } + + if (count) + pr_err("EOL unexpected %zu bytes left\n", count); + + return count ? -EINVAL : 0; +} + +static int +artpec6_crypto_setup_sg_descrs_out(struct artpec6_crypto_req_common *common, + struct artpec6_crypto_walk *walk, + size_t count) +{ + size_t chunk; + int ret; + dma_addr_t addr; + + while (walk->sg && count) { + chunk = min(count, artpec6_crypto_walk_chunklen(walk)); + addr = artpec6_crypto_walk_chunk_phys(walk); + + pr_debug("OUT-CHUNK %pad:%zu\n", &addr, chunk); + + if (addr & 3) { + char buf[3]; + + chunk = min_t(size_t, chunk, (4-(addr&3))); + + sg_pcopy_to_buffer(walk->sg, 1, buf, chunk, + walk->offset); + + ret = artpec6_crypto_setup_out_descr_short(common, buf, + chunk, + false); + } else { + dma_addr_t dma_addr; + + ret = artpec6_crypto_dma_map_page(common, + sg_page(walk->sg), + walk->sg->offset + + walk->offset, + chunk, + DMA_TO_DEVICE, + &dma_addr); + if (ret) + return ret; + + ret = artpec6_crypto_setup_out_descr_phys(common, + dma_addr, + chunk, false); + } + + if (ret) + return ret; + + count = count - chunk; + artpec6_crypto_walk_advance(walk, chunk); + } + + if (count) + pr_err("EOL unexpected %zu bytes left\n", count); + + return count ? -EINVAL : 0; +} + + +/** artpec6_crypto_terminate_out_descrs - Set the EOP on the last out descriptor + * + * If the out descriptor list is non-empty, then the eop flag on the + * last used out descriptor will be set. + * + * @return 0 on success + * -EINVAL if the out descriptor is empty or has overflown + */ +static int +artpec6_crypto_terminate_out_descrs(struct artpec6_crypto_req_common *common) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + struct pdma_descr *d; + + if (!dma->out_cnt || dma->out_cnt > PDMA_DESCR_COUNT) { + pr_err("%s: OUT descriptor list is %s\n", + MODULE_NAME, dma->out_cnt ? "empty" : "full"); + return -EINVAL; + + } + + d = &dma->out[dma->out_cnt-1]; + d->ctrl.eop = 1; + + return 0; +} + +/** artpec6_crypto_terminate_in_descrs - Set the interrupt flag on the last + * in descriptor + * + * See artpec6_crypto_terminate_out_descrs() for return values + */ +static int +artpec6_crypto_terminate_in_descrs(struct artpec6_crypto_req_common *common) +{ + struct artpec6_crypto_dma_descriptors *dma = common->dma; + struct pdma_descr *d; + + if (!dma->in_cnt || dma->in_cnt > PDMA_DESCR_COUNT) { + pr_err("%s: IN descriptor list is %s\n", + MODULE_NAME, dma->in_cnt ? "empty" : "full"); + return -EINVAL; + } + + d = &dma->in[dma->in_cnt-1]; + d->ctrl.intr = 1; + return 0; +} + +/** create_hash_pad - Create a Secure Hash conformant pad + * + * @dst: The destination buffer to write the pad. Must be at least 64 bytes + * @dgstlen: The total length of the hash digest in bytes + * @bitcount: The total length of the digest in bits + * + * @return The total number of padding bytes written to @dst + */ +static size_t +create_hash_pad(int oper, unsigned char *dst, u64 dgstlen, u64 bitcount) +{ + unsigned int mod, target, diff, pad_bytes, size_bytes; + __be64 bits = __cpu_to_be64(bitcount); + + switch (oper) { + case regk_crypto_sha1: + case regk_crypto_sha256: + case regk_crypto_hmac_sha1: + case regk_crypto_hmac_sha256: + target = 448 / 8; + mod = 512 / 8; + size_bytes = 8; + break; + default: + target = 896 / 8; + mod = 1024 / 8; + size_bytes = 16; + break; + } + + target -= 1; + diff = dgstlen & (mod - 1); + pad_bytes = diff > target ? target + mod - diff : target - diff; + + memset(dst + 1, 0, pad_bytes); + dst[0] = 0x80; + + if (size_bytes == 16) { + memset(dst + 1 + pad_bytes, 0, 8); + memcpy(dst + 1 + pad_bytes + 8, &bits, 8); + } else { + memcpy(dst + 1 + pad_bytes, &bits, 8); + } + + return pad_bytes + size_bytes + 1; +} + +static int artpec6_crypto_common_init(struct artpec6_crypto_req_common *common, + struct crypto_async_request *parent, + void (*complete)(struct crypto_async_request *req), + struct scatterlist *dstsg, unsigned int nbytes) +{ + gfp_t flags; + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + + flags = (parent->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + + common->gfp_flags = flags; + common->dma = kmem_cache_alloc(ac->dma_cache, flags); + if (!common->dma) + return -ENOMEM; + + common->req = parent; + common->complete = complete; + return 0; +} + +static void +artpec6_crypto_bounce_destroy(struct artpec6_crypto_dma_descriptors *dma) +{ + struct artpec6_crypto_bounce_buffer *b; + struct artpec6_crypto_bounce_buffer *next; + + list_for_each_entry_safe(b, next, &dma->bounce_buffers, list) { + kfree(b); + } +} + +static int +artpec6_crypto_common_destroy(struct artpec6_crypto_req_common *common) +{ + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + + artpec6_crypto_dma_unmap_all(common); + artpec6_crypto_bounce_destroy(common->dma); + kmem_cache_free(ac->dma_cache, common->dma); + common->dma = NULL; + return 0; +} + +/* + * Ciphering functions. + */ +static int artpec6_crypto_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher); + struct artpec6_crypto_request_context *req_ctx = NULL; + void (*complete)(struct crypto_async_request *req); + int ret; + + req_ctx = skcipher_request_ctx(req); + + switch (ctx->crypto_type) { + case ARTPEC6_CRYPTO_CIPHER_AES_CBC: + case ARTPEC6_CRYPTO_CIPHER_AES_ECB: + case ARTPEC6_CRYPTO_CIPHER_AES_XTS: + req_ctx->decrypt = 0; + break; + default: + break; + } + + switch (ctx->crypto_type) { + case ARTPEC6_CRYPTO_CIPHER_AES_CBC: + complete = artpec6_crypto_complete_cbc_encrypt; + break; + default: + complete = artpec6_crypto_complete_crypto; + break; + } + + ret = artpec6_crypto_common_init(&req_ctx->common, + &req->base, + complete, + req->dst, req->cryptlen); + if (ret) + return ret; + + ret = artpec6_crypto_prepare_crypto(req); + if (ret) { + artpec6_crypto_common_destroy(&req_ctx->common); + return ret; + } + + return artpec6_crypto_submit(&req_ctx->common); +} + +static int artpec6_crypto_decrypt(struct skcipher_request *req) +{ + int ret; + struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher); + struct artpec6_crypto_request_context *req_ctx = NULL; + void (*complete)(struct crypto_async_request *req); + + req_ctx = skcipher_request_ctx(req); + + switch (ctx->crypto_type) { + case ARTPEC6_CRYPTO_CIPHER_AES_CBC: + case ARTPEC6_CRYPTO_CIPHER_AES_ECB: + case ARTPEC6_CRYPTO_CIPHER_AES_XTS: + req_ctx->decrypt = 1; + break; + default: + break; + } + + + switch (ctx->crypto_type) { + case ARTPEC6_CRYPTO_CIPHER_AES_CBC: + complete = artpec6_crypto_complete_cbc_decrypt; + break; + default: + complete = artpec6_crypto_complete_crypto; + break; + } + + ret = artpec6_crypto_common_init(&req_ctx->common, &req->base, + complete, + req->dst, req->cryptlen); + if (ret) + return ret; + + ret = artpec6_crypto_prepare_crypto(req); + if (ret) { + artpec6_crypto_common_destroy(&req_ctx->common); + return ret; + } + + return artpec6_crypto_submit(&req_ctx->common); +} + +static int +artpec6_crypto_ctr_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher); + size_t iv_len = crypto_skcipher_ivsize(cipher); + unsigned int counter = be32_to_cpup((__be32 *) + (req->iv + iv_len - 4)); + unsigned int nblks = ALIGN(req->cryptlen, AES_BLOCK_SIZE) / + AES_BLOCK_SIZE; + + /* + * The hardware uses only the last 32-bits as the counter while the + * kernel tests (aes_ctr_enc_tv_template[4] for example) expect that + * the whole IV is a counter. So fallback if the counter is going to + * overlow. + */ + if (counter + nblks < counter) { + int ret; + + pr_debug("counter %x will overflow (nblks %u), falling back\n", + counter, counter + nblks); + + ret = crypto_skcipher_setkey(ctx->fallback, ctx->aes_key, + ctx->key_length); + if (ret) + return ret; + + { + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + ret = encrypt ? crypto_skcipher_encrypt(subreq) + : crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); + } + return ret; + } + + return encrypt ? artpec6_crypto_encrypt(req) + : artpec6_crypto_decrypt(req); +} + +static int artpec6_crypto_ctr_encrypt(struct skcipher_request *req) +{ + return artpec6_crypto_ctr_crypt(req, true); +} + +static int artpec6_crypto_ctr_decrypt(struct skcipher_request *req) +{ + return artpec6_crypto_ctr_crypt(req, false); +} + +/* + * AEAD functions + */ +static int artpec6_crypto_aead_init(struct crypto_aead *tfm) +{ + struct artpec6_cryptotfm_context *tfm_ctx = crypto_aead_ctx(tfm); + + memset(tfm_ctx, 0, sizeof(*tfm_ctx)); + + crypto_aead_set_reqsize(tfm, + sizeof(struct artpec6_crypto_aead_req_ctx)); + + return 0; +} + +static int artpec6_crypto_aead_set_key(struct crypto_aead *tfm, const u8 *key, + unsigned int len) +{ + struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(&tfm->base); + + if (len != 16 && len != 24 && len != 32) { + crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -1; + } + + ctx->key_length = len; + + memcpy(ctx->aes_key, key, len); + return 0; +} + +static int artpec6_crypto_aead_encrypt(struct aead_request *req) +{ + int ret; + struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(req); + + req_ctx->decrypt = false; + ret = artpec6_crypto_common_init(&req_ctx->common, &req->base, + artpec6_crypto_complete_aead, + NULL, 0); + if (ret) + return ret; + + ret = artpec6_crypto_prepare_aead(req); + if (ret) { + artpec6_crypto_common_destroy(&req_ctx->common); + return ret; + } + + return artpec6_crypto_submit(&req_ctx->common); +} + +static int artpec6_crypto_aead_decrypt(struct aead_request *req) +{ + int ret; + struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(req); + + req_ctx->decrypt = true; + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + ret = artpec6_crypto_common_init(&req_ctx->common, + &req->base, + artpec6_crypto_complete_aead, + NULL, 0); + if (ret) + return ret; + + ret = artpec6_crypto_prepare_aead(req); + if (ret) { + artpec6_crypto_common_destroy(&req_ctx->common); + return ret; + } + + return artpec6_crypto_submit(&req_ctx->common); +} + +static int artpec6_crypto_prepare_hash(struct ahash_request *areq) +{ + struct artpec6_hashalg_context *ctx = crypto_tfm_ctx(areq->base.tfm); + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(areq); + size_t digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(areq)); + size_t contextsize = digestsize == SHA384_DIGEST_SIZE ? + SHA512_DIGEST_SIZE : digestsize; + size_t blocksize = crypto_tfm_alg_blocksize( + crypto_ahash_tfm(crypto_ahash_reqtfm(areq))); + struct artpec6_crypto_req_common *common = &req_ctx->common; + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + enum artpec6_crypto_variant variant = ac->variant; + u32 sel_ctx; + bool ext_ctx = false; + bool run_hw = false; + int error = 0; + + artpec6_crypto_init_dma_operation(common); + + /* Upload HMAC key, must be first the first packet */ + if (req_ctx->hash_flags & HASH_FLAG_HMAC) { + if (variant == ARTPEC6_CRYPTO) { + req_ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER, + a6_regk_crypto_dlkey); + } else { + req_ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER, + a7_regk_crypto_dlkey); + } + + /* Copy and pad up the key */ + memcpy(req_ctx->key_buffer, ctx->hmac_key, + ctx->hmac_key_length); + memset(req_ctx->key_buffer + ctx->hmac_key_length, 0, + blocksize - ctx->hmac_key_length); + + error = artpec6_crypto_setup_out_descr(common, + (void *)&req_ctx->key_md, + sizeof(req_ctx->key_md), false, false); + if (error) + return error; + + error = artpec6_crypto_setup_out_descr(common, + req_ctx->key_buffer, blocksize, + true, false); + if (error) + return error; + } + + if (!(req_ctx->hash_flags & HASH_FLAG_INIT_CTX)) { + /* Restore context */ + sel_ctx = regk_crypto_ext; + ext_ctx = true; + } else { + sel_ctx = regk_crypto_init; + } + + if (variant == ARTPEC6_CRYPTO) { + req_ctx->hash_md &= ~A6_CRY_MD_HASH_SEL_CTX; + req_ctx->hash_md |= FIELD_PREP(A6_CRY_MD_HASH_SEL_CTX, sel_ctx); + + /* If this is the final round, set the final flag */ + if (req_ctx->hash_flags & HASH_FLAG_FINALIZE) + req_ctx->hash_md |= A6_CRY_MD_HASH_HMAC_FIN; + } else { + req_ctx->hash_md &= ~A7_CRY_MD_HASH_SEL_CTX; + req_ctx->hash_md |= FIELD_PREP(A7_CRY_MD_HASH_SEL_CTX, sel_ctx); + + /* If this is the final round, set the final flag */ + if (req_ctx->hash_flags & HASH_FLAG_FINALIZE) + req_ctx->hash_md |= A7_CRY_MD_HASH_HMAC_FIN; + } + + /* Setup up metadata descriptors */ + error = artpec6_crypto_setup_out_descr(common, + (void *)&req_ctx->hash_md, + sizeof(req_ctx->hash_md), false, false); + if (error) + return error; + + error = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false); + if (error) + return error; + + if (ext_ctx) { + error = artpec6_crypto_setup_out_descr(common, + req_ctx->digeststate, + contextsize, false, false); + + if (error) + return error; + } + + if (req_ctx->hash_flags & HASH_FLAG_UPDATE) { + size_t done_bytes = 0; + size_t total_bytes = areq->nbytes + req_ctx->partial_bytes; + size_t ready_bytes = round_down(total_bytes, blocksize); + struct artpec6_crypto_walk walk; + + run_hw = ready_bytes > 0; + if (req_ctx->partial_bytes && ready_bytes) { + /* We have a partial buffer and will at least some bytes + * to the HW. Empty this partial buffer before tackling + * the SG lists + */ + memcpy(req_ctx->partial_buffer_out, + req_ctx->partial_buffer, + req_ctx->partial_bytes); + + error = artpec6_crypto_setup_out_descr(common, + req_ctx->partial_buffer_out, + req_ctx->partial_bytes, + false, true); + if (error) + return error; + + /* Reset partial buffer */ + done_bytes += req_ctx->partial_bytes; + req_ctx->partial_bytes = 0; + } + + artpec6_crypto_walk_init(&walk, areq->src); + + error = artpec6_crypto_setup_sg_descrs_out(common, &walk, + ready_bytes - + done_bytes); + if (error) + return error; + + if (walk.sg) { + size_t sg_skip = ready_bytes - done_bytes; + size_t sg_rem = areq->nbytes - sg_skip; + + sg_pcopy_to_buffer(areq->src, sg_nents(areq->src), + req_ctx->partial_buffer + + req_ctx->partial_bytes, + sg_rem, sg_skip); + + req_ctx->partial_bytes += sg_rem; + } + + req_ctx->digcnt += ready_bytes; + req_ctx->hash_flags &= ~(HASH_FLAG_UPDATE); + } + + /* Finalize */ + if (req_ctx->hash_flags & HASH_FLAG_FINALIZE) { + bool needtrim = contextsize != digestsize; + size_t hash_pad_len; + u64 digest_bits; + u32 oper; + + if (variant == ARTPEC6_CRYPTO) + oper = FIELD_GET(A6_CRY_MD_OPER, req_ctx->hash_md); + else + oper = FIELD_GET(A7_CRY_MD_OPER, req_ctx->hash_md); + + /* Write out the partial buffer if present */ + if (req_ctx->partial_bytes) { + memcpy(req_ctx->partial_buffer_out, + req_ctx->partial_buffer, + req_ctx->partial_bytes); + error = artpec6_crypto_setup_out_descr(common, + req_ctx->partial_buffer_out, + req_ctx->partial_bytes, + false, true); + if (error) + return error; + + req_ctx->digcnt += req_ctx->partial_bytes; + req_ctx->partial_bytes = 0; + } + + if (req_ctx->hash_flags & HASH_FLAG_HMAC) + digest_bits = 8 * (req_ctx->digcnt + blocksize); + else + digest_bits = 8 * req_ctx->digcnt; + + /* Add the hash pad */ + hash_pad_len = create_hash_pad(oper, req_ctx->pad_buffer, + req_ctx->digcnt, digest_bits); + error = artpec6_crypto_setup_out_descr(common, + req_ctx->pad_buffer, + hash_pad_len, false, + true); + req_ctx->digcnt = 0; + + if (error) + return error; + + /* Descriptor for the final result */ + error = artpec6_crypto_setup_in_descr(common, areq->result, + digestsize, + !needtrim); + if (error) + return error; + + if (needtrim) { + /* Discard the extra context bytes for SHA-384 */ + error = artpec6_crypto_setup_in_descr(common, + req_ctx->partial_buffer, + digestsize - contextsize, true); + if (error) + return error; + } + + } else { /* This is not the final operation for this request */ + if (!run_hw) + return ARTPEC6_CRYPTO_PREPARE_HASH_NO_START; + + /* Save the result to the context */ + error = artpec6_crypto_setup_in_descr(common, + req_ctx->digeststate, + contextsize, false); + if (error) + return error; + /* fall through */ + } + + req_ctx->hash_flags &= ~(HASH_FLAG_INIT_CTX | HASH_FLAG_UPDATE | + HASH_FLAG_FINALIZE); + + error = artpec6_crypto_terminate_in_descrs(common); + if (error) + return error; + + error = artpec6_crypto_terminate_out_descrs(common); + if (error) + return error; + + error = artpec6_crypto_dma_map_descs(common); + if (error) + return error; + + return ARTPEC6_CRYPTO_PREPARE_HASH_START; +} + + +static int artpec6_crypto_aes_ecb_init(struct crypto_skcipher *tfm) +{ + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm); + + tfm->reqsize = sizeof(struct artpec6_crypto_request_context); + ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_ECB; + + return 0; +} + +static int artpec6_crypto_aes_ctr_init(struct crypto_skcipher *tfm) +{ + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm); + + ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(&tfm->base), + 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback)) + return PTR_ERR(ctx->fallback); + + tfm->reqsize = sizeof(struct artpec6_crypto_request_context); + ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_CTR; + + return 0; +} + +static int artpec6_crypto_aes_cbc_init(struct crypto_skcipher *tfm) +{ + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm); + + tfm->reqsize = sizeof(struct artpec6_crypto_request_context); + ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_CBC; + + return 0; +} + +static int artpec6_crypto_aes_xts_init(struct crypto_skcipher *tfm) +{ + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm); + + tfm->reqsize = sizeof(struct artpec6_crypto_request_context); + ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_XTS; + + return 0; +} + +static void artpec6_crypto_aes_exit(struct crypto_skcipher *tfm) +{ + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); +} + +static void artpec6_crypto_aes_ctr_exit(struct crypto_skcipher *tfm) +{ + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->fallback); + artpec6_crypto_aes_exit(tfm); +} + +static int +artpec6_crypto_cipher_set_key(struct crypto_skcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct artpec6_cryptotfm_context *ctx = + crypto_skcipher_ctx(cipher); + + switch (keylen) { + case 16: + case 24: + case 32: + break; + default: + crypto_skcipher_set_flags(cipher, + CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->aes_key, key, keylen); + ctx->key_length = keylen; + return 0; +} + +static int +artpec6_crypto_xts_set_key(struct crypto_skcipher *cipher, const u8 *key, + unsigned int keylen) +{ + struct artpec6_cryptotfm_context *ctx = + crypto_skcipher_ctx(cipher); + int ret; + + ret = xts_check_key(&cipher->base, key, keylen); + if (ret) + return ret; + + switch (keylen) { + case 32: + case 48: + case 64: + break; + default: + crypto_skcipher_set_flags(cipher, + CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->aes_key, key, keylen); + ctx->key_length = keylen; + return 0; +} + +/** artpec6_crypto_process_crypto - Prepare an async block cipher crypto request + * + * @req: The asynch request to process + * + * @return 0 if the dma job was successfully prepared + * <0 on error + * + * This function sets up the PDMA descriptors for a block cipher request. + * + * The required padding is added for AES-CTR using a statically defined + * buffer. + * + * The PDMA descriptor list will be as follows: + * + * OUT: [KEY_MD][KEY][EOP]<CIPHER_MD>[IV]<data_0>...[data_n][AES-CTR_pad]<eop> + * IN: <CIPHER_MD><data_0>...[data_n]<intr> + * + */ +static int artpec6_crypto_prepare_crypto(struct skcipher_request *areq) +{ + int ret; + struct artpec6_crypto_walk walk; + struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); + struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher); + struct artpec6_crypto_request_context *req_ctx = NULL; + size_t iv_len = crypto_skcipher_ivsize(cipher); + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + enum artpec6_crypto_variant variant = ac->variant; + struct artpec6_crypto_req_common *common; + bool cipher_decr = false; + size_t cipher_klen; + u32 cipher_len = 0; /* Same as regk_crypto_key_128 for NULL crypto */ + u32 oper; + + req_ctx = skcipher_request_ctx(areq); + common = &req_ctx->common; + + artpec6_crypto_init_dma_operation(common); + + if (variant == ARTPEC6_CRYPTO) + ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER, a6_regk_crypto_dlkey); + else + ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER, a7_regk_crypto_dlkey); + + ret = artpec6_crypto_setup_out_descr(common, (void *)&ctx->key_md, + sizeof(ctx->key_md), false, false); + if (ret) + return ret; + + ret = artpec6_crypto_setup_out_descr(common, ctx->aes_key, + ctx->key_length, true, false); + if (ret) + return ret; + + req_ctx->cipher_md = 0; + + if (ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_XTS) + cipher_klen = ctx->key_length/2; + else + cipher_klen = ctx->key_length; + + /* Metadata */ + switch (cipher_klen) { + case 16: + cipher_len = regk_crypto_key_128; + break; + case 24: + cipher_len = regk_crypto_key_192; + break; + case 32: + cipher_len = regk_crypto_key_256; + break; + default: + pr_err("%s: Invalid key length %d!\n", + MODULE_NAME, ctx->key_length); + return -EINVAL; + } + + switch (ctx->crypto_type) { + case ARTPEC6_CRYPTO_CIPHER_AES_ECB: + oper = regk_crypto_aes_ecb; + cipher_decr = req_ctx->decrypt; + break; + + case ARTPEC6_CRYPTO_CIPHER_AES_CBC: + oper = regk_crypto_aes_cbc; + cipher_decr = req_ctx->decrypt; + break; + + case ARTPEC6_CRYPTO_CIPHER_AES_CTR: + oper = regk_crypto_aes_ctr; + cipher_decr = false; + break; + + case ARTPEC6_CRYPTO_CIPHER_AES_XTS: + oper = regk_crypto_aes_xts; + cipher_decr = req_ctx->decrypt; + + if (variant == ARTPEC6_CRYPTO) + req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DSEQ; + else + req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DSEQ; + break; + + default: + pr_err("%s: Invalid cipher mode %d!\n", + MODULE_NAME, ctx->crypto_type); + return -EINVAL; + } + + if (variant == ARTPEC6_CRYPTO) { + req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_OPER, oper); + req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_CIPHER_LEN, + cipher_len); + if (cipher_decr) + req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DECR; + } else { + req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_OPER, oper); + req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_CIPHER_LEN, + cipher_len); + if (cipher_decr) + req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DECR; + } + + ret = artpec6_crypto_setup_out_descr(common, + &req_ctx->cipher_md, + sizeof(req_ctx->cipher_md), + false, false); + if (ret) + return ret; + + ret = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false); + if (ret) + return ret; + + if (iv_len) { + ret = artpec6_crypto_setup_out_descr(common, areq->iv, iv_len, + false, false); + if (ret) + return ret; + } + /* Data out */ + artpec6_crypto_walk_init(&walk, areq->src); + ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, areq->cryptlen); + if (ret) + return ret; + + /* Data in */ + artpec6_crypto_walk_init(&walk, areq->dst); + ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, areq->cryptlen); + if (ret) + return ret; + + /* CTR-mode padding required by the HW. */ + if (ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_CTR || + ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_XTS) { + size_t pad = ALIGN(areq->cryptlen, AES_BLOCK_SIZE) - + areq->cryptlen; + + if (pad) { + ret = artpec6_crypto_setup_out_descr(common, + ac->pad_buffer, + pad, false, false); + if (ret) + return ret; + + ret = artpec6_crypto_setup_in_descr(common, + ac->pad_buffer, pad, + false); + if (ret) + return ret; + } + } + + ret = artpec6_crypto_terminate_out_descrs(common); + if (ret) + return ret; + + ret = artpec6_crypto_terminate_in_descrs(common); + if (ret) + return ret; + + return artpec6_crypto_dma_map_descs(common); +} + +static int artpec6_crypto_prepare_aead(struct aead_request *areq) +{ + size_t count; + int ret; + size_t input_length; + struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(areq->base.tfm); + struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(areq); + struct crypto_aead *cipher = crypto_aead_reqtfm(areq); + struct artpec6_crypto_req_common *common = &req_ctx->common; + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + enum artpec6_crypto_variant variant = ac->variant; + u32 md_cipher_len; + + artpec6_crypto_init_dma_operation(common); + + /* Key */ + if (variant == ARTPEC6_CRYPTO) { + ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER, + a6_regk_crypto_dlkey); + } else { + ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER, + a7_regk_crypto_dlkey); + } + ret = artpec6_crypto_setup_out_descr(common, (void *)&ctx->key_md, + sizeof(ctx->key_md), false, false); + if (ret) + return ret; + + ret = artpec6_crypto_setup_out_descr(common, ctx->aes_key, + ctx->key_length, true, false); + if (ret) + return ret; + + req_ctx->cipher_md = 0; + + switch (ctx->key_length) { + case 16: + md_cipher_len = regk_crypto_key_128; + break; + case 24: + md_cipher_len = regk_crypto_key_192; + break; + case 32: + md_cipher_len = regk_crypto_key_256; + break; + default: + return -EINVAL; + } + + if (variant == ARTPEC6_CRYPTO) { + req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_OPER, + regk_crypto_aes_gcm); + req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_CIPHER_LEN, + md_cipher_len); + if (req_ctx->decrypt) + req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DECR; + } else { + req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_OPER, + regk_crypto_aes_gcm); + req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_CIPHER_LEN, + md_cipher_len); + if (req_ctx->decrypt) + req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DECR; + } + + ret = artpec6_crypto_setup_out_descr(common, + (void *) &req_ctx->cipher_md, + sizeof(req_ctx->cipher_md), false, + false); + if (ret) + return ret; + + ret = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false); + if (ret) + return ret; + + /* For the decryption, cryptlen includes the tag. */ + input_length = areq->cryptlen; + if (req_ctx->decrypt) + input_length -= AES_BLOCK_SIZE; + + /* Prepare the context buffer */ + req_ctx->hw_ctx.aad_length_bits = + __cpu_to_be64(8*areq->assoclen); + + req_ctx->hw_ctx.text_length_bits = + __cpu_to_be64(8*input_length); + + memcpy(req_ctx->hw_ctx.J0, areq->iv, crypto_aead_ivsize(cipher)); + // The HW omits the initial increment of the counter field. + crypto_inc(req_ctx->hw_ctx.J0+12, 4); + + ret = artpec6_crypto_setup_out_descr(common, &req_ctx->hw_ctx, + sizeof(struct artpec6_crypto_aead_hw_ctx), false, false); + if (ret) + return ret; + + { + struct artpec6_crypto_walk walk; + + artpec6_crypto_walk_init(&walk, areq->src); + + /* Associated data */ + count = areq->assoclen; + ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, count); + if (ret) + return ret; + + if (!IS_ALIGNED(areq->assoclen, 16)) { + size_t assoc_pad = 16 - (areq->assoclen % 16); + /* The HW mandates zero padding here */ + ret = artpec6_crypto_setup_out_descr(common, + ac->zero_buffer, + assoc_pad, false, + false); + if (ret) + return ret; + } + + /* Data to crypto */ + count = input_length; + ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, count); + if (ret) + return ret; + + if (!IS_ALIGNED(input_length, 16)) { + size_t crypto_pad = 16 - (input_length % 16); + /* The HW mandates zero padding here */ + ret = artpec6_crypto_setup_out_descr(common, + ac->zero_buffer, + crypto_pad, + false, + false); + if (ret) + return ret; + } + } + + /* Data from crypto */ + { + struct artpec6_crypto_walk walk; + size_t output_len = areq->cryptlen; + + if (req_ctx->decrypt) + output_len -= AES_BLOCK_SIZE; + + artpec6_crypto_walk_init(&walk, areq->dst); + + /* skip associated data in the output */ + count = artpec6_crypto_walk_advance(&walk, areq->assoclen); + if (count) + return -EINVAL; + + count = output_len; + ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, count); + if (ret) + return ret; + + /* Put padding between the cryptotext and the auth tag */ + if (!IS_ALIGNED(output_len, 16)) { + size_t crypto_pad = 16 - (output_len % 16); + + ret = artpec6_crypto_setup_in_descr(common, + ac->pad_buffer, + crypto_pad, false); + if (ret) + return ret; + } + + /* The authentication tag shall follow immediately after + * the output ciphertext. For decryption it is put in a context + * buffer for later compare against the input tag. + */ + count = AES_BLOCK_SIZE; + + if (req_ctx->decrypt) { + ret = artpec6_crypto_setup_in_descr(common, + req_ctx->decryption_tag, count, false); + if (ret) + return ret; + + } else { + ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, + count); + if (ret) + return ret; + } + + } + + ret = artpec6_crypto_terminate_in_descrs(common); + if (ret) + return ret; + + ret = artpec6_crypto_terminate_out_descrs(common); + if (ret) + return ret; + + return artpec6_crypto_dma_map_descs(common); +} + +static void artpec6_crypto_process_queue(struct artpec6_crypto *ac) +{ + struct artpec6_crypto_req_common *req; + + while (!list_empty(&ac->queue) && !artpec6_crypto_busy()) { + req = list_first_entry(&ac->queue, + struct artpec6_crypto_req_common, + list); + list_move_tail(&req->list, &ac->pending); + artpec6_crypto_start_dma(req); + + req->req->complete(req->req, -EINPROGRESS); + } + + /* + * In some cases, the hardware can raise an in_eop_flush interrupt + * before actually updating the status, so we have an timer which will + * recheck the status on timeout. Since the cases are expected to be + * very rare, we use a relatively large timeout value. There should be + * no noticeable negative effect if we timeout spuriously. + */ + if (ac->pending_count) + mod_timer(&ac->timer, jiffies + msecs_to_jiffies(100)); + else + del_timer(&ac->timer); +} + +static void artpec6_crypto_timeout(unsigned long data) +{ + struct artpec6_crypto *ac = (struct artpec6_crypto *) data; + + dev_info_ratelimited(artpec6_crypto_dev, "timeout\n"); + + tasklet_schedule(&ac->task); +} + +static void artpec6_crypto_task(unsigned long data) +{ + struct artpec6_crypto *ac = (struct artpec6_crypto *)data; + struct artpec6_crypto_req_common *req; + struct artpec6_crypto_req_common *n; + + if (list_empty(&ac->pending)) { + pr_debug("Spurious IRQ\n"); + return; + } + + spin_lock_bh(&ac->queue_lock); + + list_for_each_entry_safe(req, n, &ac->pending, list) { + struct artpec6_crypto_dma_descriptors *dma = req->dma; + u32 stat; + + dma_sync_single_for_cpu(artpec6_crypto_dev, dma->stat_dma_addr, + sizeof(dma->stat[0]), + DMA_BIDIRECTIONAL); + + stat = req->dma->stat[req->dma->in_cnt-1]; + + /* A non-zero final status descriptor indicates + * this job has finished. + */ + pr_debug("Request %p status is %X\n", req, stat); + if (!stat) + break; + + /* Allow testing of timeout handling with fault injection */ +#ifdef CONFIG_FAULT_INJECTION + if (should_fail(&artpec6_crypto_fail_status_read, 1)) + continue; +#endif + + pr_debug("Completing request %p\n", req); + + list_del(&req->list); + + artpec6_crypto_dma_unmap_all(req); + artpec6_crypto_copy_bounce_buffers(req); + + ac->pending_count--; + artpec6_crypto_common_destroy(req); + req->complete(req->req); + } + + artpec6_crypto_process_queue(ac); + + spin_unlock_bh(&ac->queue_lock); +} + +static void artpec6_crypto_complete_crypto(struct crypto_async_request *req) +{ + req->complete(req, 0); +} + +static void +artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req) +{ + struct skcipher_request *cipher_req = container_of(req, + struct skcipher_request, base); + + scatterwalk_map_and_copy(cipher_req->iv, cipher_req->src, + cipher_req->cryptlen - AES_BLOCK_SIZE, + AES_BLOCK_SIZE, 0); + req->complete(req, 0); +} + +static void +artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req) +{ + struct skcipher_request *cipher_req = container_of(req, + struct skcipher_request, base); + + scatterwalk_map_and_copy(cipher_req->iv, cipher_req->dst, + cipher_req->cryptlen - AES_BLOCK_SIZE, + AES_BLOCK_SIZE, 0); + req->complete(req, 0); +} + +static void artpec6_crypto_complete_aead(struct crypto_async_request *req) +{ + int result = 0; + + /* Verify GCM hashtag. */ + struct aead_request *areq = container_of(req, + struct aead_request, base); + struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(areq); + + if (req_ctx->decrypt) { + u8 input_tag[AES_BLOCK_SIZE]; + + sg_pcopy_to_buffer(areq->src, + sg_nents(areq->src), + input_tag, + AES_BLOCK_SIZE, + areq->assoclen + areq->cryptlen - + AES_BLOCK_SIZE); + + if (memcmp(req_ctx->decryption_tag, + input_tag, + AES_BLOCK_SIZE)) { + pr_debug("***EBADMSG:\n"); + print_hex_dump_debug("ref:", DUMP_PREFIX_ADDRESS, 32, 1, + input_tag, AES_BLOCK_SIZE, true); + print_hex_dump_debug("out:", DUMP_PREFIX_ADDRESS, 32, 1, + req_ctx->decryption_tag, + AES_BLOCK_SIZE, true); + + result = -EBADMSG; + } + } + + req->complete(req, result); +} + +static void artpec6_crypto_complete_hash(struct crypto_async_request *req) +{ + req->complete(req, 0); +} + + +/*------------------- Hash functions -----------------------------------------*/ +static int +artpec6_crypto_hash_set_key(struct crypto_ahash *tfm, + const u8 *key, unsigned int keylen) +{ + struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(&tfm->base); + size_t blocksize; + int ret; + + if (!keylen) { + pr_err("Invalid length (%d) of HMAC key\n", + keylen); + return -EINVAL; + } + + memset(tfm_ctx->hmac_key, 0, sizeof(tfm_ctx->hmac_key)); + + blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + + if (keylen > blocksize) { + SHASH_DESC_ON_STACK(hdesc, tfm_ctx->child_hash); + + hdesc->tfm = tfm_ctx->child_hash; + hdesc->flags = crypto_ahash_get_flags(tfm) & + CRYPTO_TFM_REQ_MAY_SLEEP; + + tfm_ctx->hmac_key_length = blocksize; + ret = crypto_shash_digest(hdesc, key, keylen, + tfm_ctx->hmac_key); + if (ret) + return ret; + + } else { + memcpy(tfm_ctx->hmac_key, key, keylen); + tfm_ctx->hmac_key_length = keylen; + } + + return 0; +} + +static int +artpec6_crypto_init_hash(struct ahash_request *req, u8 type, int hmac) +{ + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + enum artpec6_crypto_variant variant = ac->variant; + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + u32 oper; + + memset(req_ctx, 0, sizeof(*req_ctx)); + + req_ctx->hash_flags = HASH_FLAG_INIT_CTX; + if (hmac) + req_ctx->hash_flags |= (HASH_FLAG_HMAC | HASH_FLAG_UPDATE_KEY); + + switch (type) { + case ARTPEC6_CRYPTO_HASH_SHA1: + oper = hmac ? regk_crypto_hmac_sha1 : regk_crypto_sha1; + break; + case ARTPEC6_CRYPTO_HASH_SHA256: + oper = hmac ? regk_crypto_hmac_sha256 : regk_crypto_sha256; + break; + case ARTPEC6_CRYPTO_HASH_SHA384: + oper = hmac ? regk_crypto_hmac_sha384 : regk_crypto_sha384; + break; + case ARTPEC6_CRYPTO_HASH_SHA512: + oper = hmac ? regk_crypto_hmac_sha512 : regk_crypto_sha512; + break; + + default: + pr_err("%s: Unsupported hash type 0x%x\n", MODULE_NAME, type); + return -EINVAL; + } + + if (variant == ARTPEC6_CRYPTO) + req_ctx->hash_md = FIELD_PREP(A6_CRY_MD_OPER, oper); + else + req_ctx->hash_md = FIELD_PREP(A7_CRY_MD_OPER, oper); + + return 0; +} + +static int artpec6_crypto_prepare_submit_hash(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + int ret; + + if (!req_ctx->common.dma) { + ret = artpec6_crypto_common_init(&req_ctx->common, + &req->base, + artpec6_crypto_complete_hash, + NULL, 0); + + if (ret) + return ret; + } + + ret = artpec6_crypto_prepare_hash(req); + switch (ret) { + case ARTPEC6_CRYPTO_PREPARE_HASH_START: + ret = artpec6_crypto_submit(&req_ctx->common); + break; + + case ARTPEC6_CRYPTO_PREPARE_HASH_NO_START: + ret = 0; + /* Fallthrough */ + + default: + artpec6_crypto_common_destroy(&req_ctx->common); + break; + } + + return ret; +} + +static int artpec6_crypto_hash_final(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + req_ctx->hash_flags |= HASH_FLAG_FINALIZE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int artpec6_crypto_hash_update(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + req_ctx->hash_flags |= HASH_FLAG_UPDATE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int artpec6_crypto_sha1_init(struct ahash_request *req) +{ + return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA1, 0); +} + +static int artpec6_crypto_sha1_digest(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA1, 0); + + req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int artpec6_crypto_sha256_init(struct ahash_request *req) +{ + return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 0); +} + +static int artpec6_crypto_sha256_digest(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 0); + req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int __maybe_unused artpec6_crypto_sha384_init(struct ahash_request *req) +{ + return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 0); +} + +static int __maybe_unused +artpec6_crypto_sha384_digest(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 0); + req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int artpec6_crypto_sha512_init(struct ahash_request *req) +{ + return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 0); +} + +static int artpec6_crypto_sha512_digest(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 0); + req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int artpec6_crypto_hmac_sha256_init(struct ahash_request *req) +{ + return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 1); +} + +static int __maybe_unused +artpec6_crypto_hmac_sha384_init(struct ahash_request *req) +{ + return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 1); +} + +static int artpec6_crypto_hmac_sha512_init(struct ahash_request *req) +{ + return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 1); +} + +static int artpec6_crypto_hmac_sha256_digest(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 1); + req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int __maybe_unused +artpec6_crypto_hmac_sha384_digest(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 1); + req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int artpec6_crypto_hmac_sha512_digest(struct ahash_request *req) +{ + struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req); + + artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 1); + req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE; + + return artpec6_crypto_prepare_submit_hash(req); +} + +static int artpec6_crypto_ahash_init_common(struct crypto_tfm *tfm, + const char *base_hash_name) +{ + struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(tfm); + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct artpec6_hash_request_context)); + memset(tfm_ctx, 0, sizeof(*tfm_ctx)); + + if (base_hash_name) { + struct crypto_shash *child; + + child = crypto_alloc_shash(base_hash_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(child)) + return PTR_ERR(child); + + tfm_ctx->child_hash = child; + } + + return 0; +} + +static int artpec6_crypto_ahash_init(struct crypto_tfm *tfm) +{ + return artpec6_crypto_ahash_init_common(tfm, NULL); +} + +static int artpec6_crypto_ahash_init_hmac_sha256(struct crypto_tfm *tfm) +{ + return artpec6_crypto_ahash_init_common(tfm, "sha256"); +} + +static int __maybe_unused +artpec6_crypto_ahash_init_hmac_sha384(struct crypto_tfm *tfm) +{ + return artpec6_crypto_ahash_init_common(tfm, "sha384"); +} + +static int artpec6_crypto_ahash_init_hmac_sha512(struct crypto_tfm *tfm) +{ + return artpec6_crypto_ahash_init_common(tfm, "sha512"); +} + +static void artpec6_crypto_ahash_exit(struct crypto_tfm *tfm) +{ + struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(tfm); + + if (tfm_ctx->child_hash) + crypto_free_shash(tfm_ctx->child_hash); + + memset(tfm_ctx->hmac_key, 0, sizeof(tfm_ctx->hmac_key)); + tfm_ctx->hmac_key_length = 0; +} + +static int artpec6_crypto_hash_export(struct ahash_request *req, void *out) +{ + const struct artpec6_hash_request_context *ctx = ahash_request_ctx(req); + struct artpec6_hash_export_state *state = out; + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + enum artpec6_crypto_variant variant = ac->variant; + + BUILD_BUG_ON(sizeof(state->partial_buffer) != + sizeof(ctx->partial_buffer)); + BUILD_BUG_ON(sizeof(state->digeststate) != sizeof(ctx->digeststate)); + + state->digcnt = ctx->digcnt; + state->partial_bytes = ctx->partial_bytes; + state->hash_flags = ctx->hash_flags; + + if (variant == ARTPEC6_CRYPTO) + state->oper = FIELD_GET(A6_CRY_MD_OPER, ctx->hash_md); + else + state->oper = FIELD_GET(A7_CRY_MD_OPER, ctx->hash_md); + + memcpy(state->partial_buffer, ctx->partial_buffer, + sizeof(state->partial_buffer)); + memcpy(state->digeststate, ctx->digeststate, + sizeof(state->digeststate)); + + return 0; +} + +static int artpec6_crypto_hash_import(struct ahash_request *req, const void *in) +{ + struct artpec6_hash_request_context *ctx = ahash_request_ctx(req); + const struct artpec6_hash_export_state *state = in; + struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev); + enum artpec6_crypto_variant variant = ac->variant; + + memset(ctx, 0, sizeof(*ctx)); + + ctx->digcnt = state->digcnt; + ctx->partial_bytes = state->partial_bytes; + ctx->hash_flags = state->hash_flags; + + if (variant == ARTPEC6_CRYPTO) + ctx->hash_md = FIELD_PREP(A6_CRY_MD_OPER, state->oper); + else + ctx->hash_md = FIELD_PREP(A7_CRY_MD_OPER, state->oper); + + memcpy(ctx->partial_buffer, state->partial_buffer, + sizeof(state->partial_buffer)); + memcpy(ctx->digeststate, state->digeststate, + sizeof(state->digeststate)); + + return 0; +} + +static int init_crypto_hw(struct artpec6_crypto *ac) +{ + enum artpec6_crypto_variant variant = ac->variant; + void __iomem *base = ac->base; + u32 out_descr_buf_size; + u32 out_data_buf_size; + u32 in_data_buf_size; + u32 in_descr_buf_size; + u32 in_stat_buf_size; + u32 in, out; + + /* + * The PDMA unit contains 1984 bytes of internal memory for the OUT + * channels and 1024 bytes for the IN channel. This is an elastic + * memory used to internally store the descriptors and data. The values + * ares specified in 64 byte incremements. Trustzone buffers are not + * used at this stage. + */ + out_data_buf_size = 16; /* 1024 bytes for data */ + out_descr_buf_size = 15; /* 960 bytes for descriptors */ + in_data_buf_size = 8; /* 512 bytes for data */ + in_descr_buf_size = 4; /* 256 bytes for descriptors */ + in_stat_buf_size = 4; /* 256 bytes for stat descrs */ + + BUILD_BUG_ON_MSG((out_data_buf_size + + out_descr_buf_size) * 64 > 1984, + "Invalid OUT configuration"); + + BUILD_BUG_ON_MSG((in_data_buf_size + + in_descr_buf_size + + in_stat_buf_size) * 64 > 1024, + "Invalid IN configuration"); + + in = FIELD_PREP(PDMA_IN_BUF_CFG_DATA_BUF_SIZE, in_data_buf_size) | + FIELD_PREP(PDMA_IN_BUF_CFG_DESCR_BUF_SIZE, in_descr_buf_size) | + FIELD_PREP(PDMA_IN_BUF_CFG_STAT_BUF_SIZE, in_stat_buf_size); + + out = FIELD_PREP(PDMA_OUT_BUF_CFG_DATA_BUF_SIZE, out_data_buf_size) | + FIELD_PREP(PDMA_OUT_BUF_CFG_DESCR_BUF_SIZE, out_descr_buf_size); + + writel_relaxed(out, base + PDMA_OUT_BUF_CFG); + writel_relaxed(PDMA_OUT_CFG_EN, base + PDMA_OUT_CFG); + + if (variant == ARTPEC6_CRYPTO) { + writel_relaxed(in, base + A6_PDMA_IN_BUF_CFG); + writel_relaxed(PDMA_IN_CFG_EN, base + A6_PDMA_IN_CFG); + writel_relaxed(A6_PDMA_INTR_MASK_IN_DATA | + A6_PDMA_INTR_MASK_IN_EOP_FLUSH, + base + A6_PDMA_INTR_MASK); + } else { + writel_relaxed(in, base + A7_PDMA_IN_BUF_CFG); + writel_relaxed(PDMA_IN_CFG_EN, base + A7_PDMA_IN_CFG); + writel_relaxed(A7_PDMA_INTR_MASK_IN_DATA | + A7_PDMA_INTR_MASK_IN_EOP_FLUSH, + base + A7_PDMA_INTR_MASK); + } + + return 0; +} + +static void artpec6_crypto_disable_hw(struct artpec6_crypto *ac) +{ + enum artpec6_crypto_variant variant = ac->variant; + void __iomem *base = ac->base; + + if (variant == ARTPEC6_CRYPTO) { + writel_relaxed(A6_PDMA_IN_CMD_STOP, base + A6_PDMA_IN_CMD); + writel_relaxed(0, base + A6_PDMA_IN_CFG); + writel_relaxed(A6_PDMA_OUT_CMD_STOP, base + PDMA_OUT_CMD); + } else { + writel_relaxed(A7_PDMA_IN_CMD_STOP, base + A7_PDMA_IN_CMD); + writel_relaxed(0, base + A7_PDMA_IN_CFG); + writel_relaxed(A7_PDMA_OUT_CMD_STOP, base + PDMA_OUT_CMD); + } + + writel_relaxed(0, base + PDMA_OUT_CFG); + +} + +static irqreturn_t artpec6_crypto_irq(int irq, void *dev_id) +{ + struct artpec6_crypto *ac = dev_id; + enum artpec6_crypto_variant variant = ac->variant; + void __iomem *base = ac->base; + u32 mask_in_data, mask_in_eop_flush; + u32 in_cmd_flush_stat, in_cmd_reg; + u32 ack_intr_reg; + u32 ack = 0; + u32 intr; + + if (variant == ARTPEC6_CRYPTO) { + intr = readl_relaxed(base + A6_PDMA_MASKED_INTR); + mask_in_data = A6_PDMA_INTR_MASK_IN_DATA; + mask_in_eop_flush = A6_PDMA_INTR_MASK_IN_EOP_FLUSH; + in_cmd_flush_stat = A6_PDMA_IN_CMD_FLUSH_STAT; + in_cmd_reg = A6_PDMA_IN_CMD; + ack_intr_reg = A6_PDMA_ACK_INTR; + } else { + intr = readl_relaxed(base + A7_PDMA_MASKED_INTR); + mask_in_data = A7_PDMA_INTR_MASK_IN_DATA; + mask_in_eop_flush = A7_PDMA_INTR_MASK_IN_EOP_FLUSH; + in_cmd_flush_stat = A7_PDMA_IN_CMD_FLUSH_STAT; + in_cmd_reg = A7_PDMA_IN_CMD; + ack_intr_reg = A7_PDMA_ACK_INTR; + } + + /* We get two interrupt notifications from each job. + * The in_data means all data was sent to memory and then + * we request a status flush command to write the per-job + * status to its status vector. This ensures that the + * tasklet can detect exactly how many submitted jobs + * that have finished. + */ + if (intr & mask_in_data) + ack |= mask_in_data; + + if (intr & mask_in_eop_flush) + ack |= mask_in_eop_flush; + else + writel_relaxed(in_cmd_flush_stat, base + in_cmd_reg); + + writel_relaxed(ack, base + ack_intr_reg); + + if (intr & mask_in_eop_flush) + tasklet_schedule(&ac->task); + + return IRQ_HANDLED; +} + +/*------------------- Algorithm definitions ----------------------------------*/ + +/* Hashes */ +static struct ahash_alg hash_algos[] = { + /* SHA-1 */ + { + .init = artpec6_crypto_sha1_init, + .update = artpec6_crypto_hash_update, + .final = artpec6_crypto_hash_final, + .digest = artpec6_crypto_sha1_digest, + .import = artpec6_crypto_hash_import, + .export = artpec6_crypto_hash_export, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct artpec6_hash_export_state), + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "artpec-sha1", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_hashalg_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_init = artpec6_crypto_ahash_init, + .cra_exit = artpec6_crypto_ahash_exit, + } + }, + /* SHA-256 */ + { + .init = artpec6_crypto_sha256_init, + .update = artpec6_crypto_hash_update, + .final = artpec6_crypto_hash_final, + .digest = artpec6_crypto_sha256_digest, + .import = artpec6_crypto_hash_import, + .export = artpec6_crypto_hash_export, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct artpec6_hash_export_state), + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "artpec-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_hashalg_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_init = artpec6_crypto_ahash_init, + .cra_exit = artpec6_crypto_ahash_exit, + } + }, + /* HMAC SHA-256 */ + { + .init = artpec6_crypto_hmac_sha256_init, + .update = artpec6_crypto_hash_update, + .final = artpec6_crypto_hash_final, + .digest = artpec6_crypto_hmac_sha256_digest, + .import = artpec6_crypto_hash_import, + .export = artpec6_crypto_hash_export, + .setkey = artpec6_crypto_hash_set_key, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct artpec6_hash_export_state), + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "artpec-hmac-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_hashalg_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_init = artpec6_crypto_ahash_init_hmac_sha256, + .cra_exit = artpec6_crypto_ahash_exit, + } + }, +}; + +static struct ahash_alg artpec7_hash_algos[] = { + /* SHA-384 */ + { + .init = artpec6_crypto_sha384_init, + .update = artpec6_crypto_hash_update, + .final = artpec6_crypto_hash_final, + .digest = artpec6_crypto_sha384_digest, + .import = artpec6_crypto_hash_import, + .export = artpec6_crypto_hash_export, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct artpec6_hash_export_state), + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "artpec-sha384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_hashalg_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_init = artpec6_crypto_ahash_init, + .cra_exit = artpec6_crypto_ahash_exit, + } + }, + /* HMAC SHA-384 */ + { + .init = artpec6_crypto_hmac_sha384_init, + .update = artpec6_crypto_hash_update, + .final = artpec6_crypto_hash_final, + .digest = artpec6_crypto_hmac_sha384_digest, + .import = artpec6_crypto_hash_import, + .export = artpec6_crypto_hash_export, + .setkey = artpec6_crypto_hash_set_key, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct artpec6_hash_export_state), + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "artpec-hmac-sha384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_hashalg_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_init = artpec6_crypto_ahash_init_hmac_sha384, + .cra_exit = artpec6_crypto_ahash_exit, + } + }, + /* SHA-512 */ + { + .init = artpec6_crypto_sha512_init, + .update = artpec6_crypto_hash_update, + .final = artpec6_crypto_hash_final, + .digest = artpec6_crypto_sha512_digest, + .import = artpec6_crypto_hash_import, + .export = artpec6_crypto_hash_export, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct artpec6_hash_export_state), + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "artpec-sha512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_hashalg_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_init = artpec6_crypto_ahash_init, + .cra_exit = artpec6_crypto_ahash_exit, + } + }, + /* HMAC SHA-512 */ + { + .init = artpec6_crypto_hmac_sha512_init, + .update = artpec6_crypto_hash_update, + .final = artpec6_crypto_hash_final, + .digest = artpec6_crypto_hmac_sha512_digest, + .import = artpec6_crypto_hash_import, + .export = artpec6_crypto_hash_export, + .setkey = artpec6_crypto_hash_set_key, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct artpec6_hash_export_state), + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "artpec-hmac-sha512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_hashalg_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_init = artpec6_crypto_ahash_init_hmac_sha512, + .cra_exit = artpec6_crypto_ahash_exit, + } + }, +}; + +/* Crypto */ +static struct skcipher_alg crypto_algos[] = { + /* AES - ECB */ + { + .base = { + .cra_name = "ecb(aes)", + .cra_driver_name = "artpec6-ecb-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = artpec6_crypto_cipher_set_key, + .encrypt = artpec6_crypto_encrypt, + .decrypt = artpec6_crypto_decrypt, + .init = artpec6_crypto_aes_ecb_init, + .exit = artpec6_crypto_aes_exit, + }, + /* AES - CTR */ + { + .base = { + .cra_name = "ctr(aes)", + .cra_driver_name = "artpec6-ctr-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = artpec6_crypto_cipher_set_key, + .encrypt = artpec6_crypto_ctr_encrypt, + .decrypt = artpec6_crypto_ctr_decrypt, + .init = artpec6_crypto_aes_ctr_init, + .exit = artpec6_crypto_aes_ctr_exit, + }, + /* AES - CBC */ + { + .base = { + .cra_name = "cbc(aes)", + .cra_driver_name = "artpec6-cbc-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = artpec6_crypto_cipher_set_key, + .encrypt = artpec6_crypto_encrypt, + .decrypt = artpec6_crypto_decrypt, + .init = artpec6_crypto_aes_cbc_init, + .exit = artpec6_crypto_aes_exit + }, + /* AES - XTS */ + { + .base = { + .cra_name = "xts(aes)", + .cra_driver_name = "artpec6-xts-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + }, + .min_keysize = 2*AES_MIN_KEY_SIZE, + .max_keysize = 2*AES_MAX_KEY_SIZE, + .ivsize = 16, + .setkey = artpec6_crypto_xts_set_key, + .encrypt = artpec6_crypto_encrypt, + .decrypt = artpec6_crypto_decrypt, + .init = artpec6_crypto_aes_xts_init, + .exit = artpec6_crypto_aes_exit, + }, +}; + +static struct aead_alg aead_algos[] = { + { + .init = artpec6_crypto_aead_init, + .setkey = artpec6_crypto_aead_set_key, + .encrypt = artpec6_crypto_aead_encrypt, + .decrypt = artpec6_crypto_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "artpec-gcm-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct artpec6_cryptotfm_context), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + }, + } +}; + +#ifdef CONFIG_DEBUG_FS + +struct dbgfs_u32 { + char *name; + mode_t mode; + u32 *flag; + char *desc; +}; + +static void artpec6_crypto_init_debugfs(void) +{ + dbgfs_root = debugfs_create_dir("artpec6_crypto", NULL); + + if (!dbgfs_root || IS_ERR(dbgfs_root)) { + dbgfs_root = NULL; + pr_err("%s: Could not initialise debugfs!\n", MODULE_NAME); + return; + } + +#ifdef CONFIG_FAULT_INJECTION + fault_create_debugfs_attr("fail_status_read", dbgfs_root, + &artpec6_crypto_fail_status_read); + + fault_create_debugfs_attr("fail_dma_array_full", dbgfs_root, + &artpec6_crypto_fail_dma_array_full); +#endif +} + +static void artpec6_crypto_free_debugfs(void) +{ + if (!dbgfs_root) + return; + + debugfs_remove_recursive(dbgfs_root); + dbgfs_root = NULL; +} +#endif + +static const struct of_device_id artpec6_crypto_of_match[] = { + { .compatible = "axis,artpec6-crypto", .data = (void *)ARTPEC6_CRYPTO }, + { .compatible = "axis,artpec7-crypto", .data = (void *)ARTPEC7_CRYPTO }, + {} +}; +MODULE_DEVICE_TABLE(of, artpec6_crypto_of_match); + +static int artpec6_crypto_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + enum artpec6_crypto_variant variant; + struct artpec6_crypto *ac; + struct device *dev = &pdev->dev; + void __iomem *base; + struct resource *res; + int irq; + int err; + + if (artpec6_crypto_dev) + return -ENODEV; + + match = of_match_node(artpec6_crypto_of_match, dev->of_node); + if (!match) + return -EINVAL; + + variant = (enum artpec6_crypto_variant)match->data; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -ENODEV; + + ac = devm_kzalloc(&pdev->dev, sizeof(struct artpec6_crypto), + GFP_KERNEL); + if (!ac) + return -ENOMEM; + + platform_set_drvdata(pdev, ac); + ac->variant = variant; + + spin_lock_init(&ac->queue_lock); + INIT_LIST_HEAD(&ac->queue); + INIT_LIST_HEAD(&ac->pending); + setup_timer(&ac->timer, artpec6_crypto_timeout, (unsigned long) ac); + + ac->base = base; + + ac->dma_cache = kmem_cache_create("artpec6_crypto_dma", + sizeof(struct artpec6_crypto_dma_descriptors), + 64, + 0, + NULL); + if (!ac->dma_cache) + return -ENOMEM; + +#ifdef CONFIG_DEBUG_FS + artpec6_crypto_init_debugfs(); +#endif + + tasklet_init(&ac->task, artpec6_crypto_task, + (unsigned long)ac); + + ac->pad_buffer = devm_kzalloc(&pdev->dev, 2 * ARTPEC_CACHE_LINE_MAX, + GFP_KERNEL); + if (!ac->pad_buffer) + return -ENOMEM; + ac->pad_buffer = PTR_ALIGN(ac->pad_buffer, ARTPEC_CACHE_LINE_MAX); + + ac->zero_buffer = devm_kzalloc(&pdev->dev, 2 * ARTPEC_CACHE_LINE_MAX, + GFP_KERNEL); + if (!ac->zero_buffer) + return -ENOMEM; + ac->zero_buffer = PTR_ALIGN(ac->zero_buffer, ARTPEC_CACHE_LINE_MAX); + + err = init_crypto_hw(ac); + if (err) + goto free_cache; + + err = devm_request_irq(&pdev->dev, irq, artpec6_crypto_irq, 0, + "artpec6-crypto", ac); + if (err) + goto disable_hw; + + artpec6_crypto_dev = &pdev->dev; + + err = crypto_register_ahashes(hash_algos, ARRAY_SIZE(hash_algos)); + if (err) { + dev_err(dev, "Failed to register ahashes\n"); + goto disable_hw; + } + + if (variant != ARTPEC6_CRYPTO) { + err = crypto_register_ahashes(artpec7_hash_algos, + ARRAY_SIZE(artpec7_hash_algos)); + if (err) { + dev_err(dev, "Failed to register ahashes\n"); + goto unregister_ahashes; + } + } + + err = crypto_register_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos)); + if (err) { + dev_err(dev, "Failed to register ciphers\n"); + goto unregister_a7_ahashes; + } + + err = crypto_register_aeads(aead_algos, ARRAY_SIZE(aead_algos)); + if (err) { + dev_err(dev, "Failed to register aeads\n"); + goto unregister_algs; + } + + return 0; + +unregister_algs: + crypto_unregister_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos)); +unregister_a7_ahashes: + if (variant != ARTPEC6_CRYPTO) + crypto_unregister_ahashes(artpec7_hash_algos, + ARRAY_SIZE(artpec7_hash_algos)); +unregister_ahashes: + crypto_unregister_ahashes(hash_algos, ARRAY_SIZE(hash_algos)); +disable_hw: + artpec6_crypto_disable_hw(ac); +free_cache: + kmem_cache_destroy(ac->dma_cache); + return err; +} + +static int artpec6_crypto_remove(struct platform_device *pdev) +{ + struct artpec6_crypto *ac = platform_get_drvdata(pdev); + int irq = platform_get_irq(pdev, 0); + + crypto_unregister_ahashes(hash_algos, ARRAY_SIZE(hash_algos)); + if (ac->variant != ARTPEC6_CRYPTO) + crypto_unregister_ahashes(artpec7_hash_algos, + ARRAY_SIZE(artpec7_hash_algos)); + crypto_unregister_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos)); + crypto_unregister_aeads(aead_algos, ARRAY_SIZE(aead_algos)); + + tasklet_disable(&ac->task); + devm_free_irq(&pdev->dev, irq, ac); + tasklet_kill(&ac->task); + del_timer_sync(&ac->timer); + + artpec6_crypto_disable_hw(ac); + + kmem_cache_destroy(ac->dma_cache); +#ifdef CONFIG_DEBUG_FS + artpec6_crypto_free_debugfs(); +#endif + return 0; +} + +static struct platform_driver artpec6_crypto_driver = { + .probe = artpec6_crypto_probe, + .remove = artpec6_crypto_remove, + .driver = { + .name = "artpec6-crypto", + .owner = THIS_MODULE, + .of_match_table = artpec6_crypto_of_match, + }, +}; + +module_platform_driver(artpec6_crypto_driver); + +MODULE_AUTHOR("Axis Communications AB"); +MODULE_DESCRIPTION("ARTPEC-6 Crypto driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index 9cfd36c1bcb6..8685c7e4debd 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -90,8 +90,6 @@ static int aead_pri = 150; module_param(aead_pri, int, 0644); MODULE_PARM_DESC(aead_pri, "Priority for AEAD algos"); -#define MAX_SPUS 16 - /* A type 3 BCM header, expected to precede the SPU header for SPU-M. * Bits 3 and 4 in the first byte encode the channel number (the dma ringset). * 0x60 - ring 0 @@ -120,7 +118,7 @@ static u8 select_channel(void) { u8 chan_idx = atomic_inc_return(&iproc_priv.next_chan); - return chan_idx % iproc_priv.spu.num_spu; + return chan_idx % iproc_priv.spu.num_chan; } /** @@ -4528,8 +4526,13 @@ static void spu_functions_register(struct device *dev, */ static int spu_mb_init(struct device *dev) { - struct mbox_client *mcl = &iproc_priv.mcl[iproc_priv.spu.num_spu]; - int err; + struct mbox_client *mcl = &iproc_priv.mcl; + int err, i; + + iproc_priv.mbox = devm_kcalloc(dev, iproc_priv.spu.num_chan, + sizeof(struct mbox_chan *), GFP_KERNEL); + if (!iproc_priv.mbox) + return -ENOMEM; mcl->dev = dev; mcl->tx_block = false; @@ -4538,25 +4541,33 @@ static int spu_mb_init(struct device *dev) mcl->rx_callback = spu_rx_callback; mcl->tx_done = NULL; - iproc_priv.mbox[iproc_priv.spu.num_spu] = - mbox_request_channel(mcl, 0); - if (IS_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu])) { - err = (int)PTR_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu]); - dev_err(dev, - "Mbox channel %d request failed with err %d", - iproc_priv.spu.num_spu, err); - iproc_priv.mbox[iproc_priv.spu.num_spu] = NULL; - return err; + for (i = 0; i < iproc_priv.spu.num_chan; i++) { + iproc_priv.mbox[i] = mbox_request_channel(mcl, i); + if (IS_ERR(iproc_priv.mbox[i])) { + err = (int)PTR_ERR(iproc_priv.mbox[i]); + dev_err(dev, + "Mbox channel %d request failed with err %d", + i, err); + iproc_priv.mbox[i] = NULL; + goto free_channels; + } } return 0; +free_channels: + for (i = 0; i < iproc_priv.spu.num_chan; i++) { + if (iproc_priv.mbox[i]) + mbox_free_channel(iproc_priv.mbox[i]); + } + + return err; } static void spu_mb_release(struct platform_device *pdev) { int i; - for (i = 0; i < iproc_priv.spu.num_spu; i++) + for (i = 0; i < iproc_priv.spu.num_chan; i++) mbox_free_channel(iproc_priv.mbox[i]); } @@ -4567,7 +4578,7 @@ static void spu_counters_init(void) atomic_set(&iproc_priv.session_count, 0); atomic_set(&iproc_priv.stream_count, 0); - atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_spu); + atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_chan); atomic64_set(&iproc_priv.bytes_in, 0); atomic64_set(&iproc_priv.bytes_out, 0); for (i = 0; i < SPU_OP_NUM; i++) { @@ -4809,47 +4820,38 @@ static int spu_dt_read(struct platform_device *pdev) struct resource *spu_ctrl_regs; const struct of_device_id *match; const struct spu_type_subtype *matched_spu_type; - void __iomem *spu_reg_vbase[MAX_SPUS]; - int err; + struct device_node *dn = pdev->dev.of_node; + int err, i; - match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev); - matched_spu_type = match->data; + /* Count number of mailbox channels */ + spu->num_chan = of_count_phandle_with_args(dn, "mboxes", "#mbox-cells"); - if (iproc_priv.spu.num_spu > 1) { - /* If this is 2nd or later SPU, make sure it's same type */ - if ((spu->spu_type != matched_spu_type->type) || - (spu->spu_subtype != matched_spu_type->subtype)) { - err = -EINVAL; - dev_err(&pdev->dev, "Multiple SPU types not allowed"); - return err; - } - } else { - /* Record type of first SPU */ - spu->spu_type = matched_spu_type->type; - spu->spu_subtype = matched_spu_type->subtype; + match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev); + if (!match) { + dev_err(&pdev->dev, "Failed to match device\n"); + return -ENODEV; } - /* Get and map SPU registers */ - spu_ctrl_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!spu_ctrl_regs) { - err = -EINVAL; - dev_err(&pdev->dev, "Invalid/missing registers for SPU\n"); - return err; - } + matched_spu_type = match->data; - spu_reg_vbase[iproc_priv.spu.num_spu] = - devm_ioremap_resource(dev, spu_ctrl_regs); - if (IS_ERR(spu_reg_vbase[iproc_priv.spu.num_spu])) { - err = PTR_ERR(spu_reg_vbase[iproc_priv.spu.num_spu]); - dev_err(&pdev->dev, "Failed to map registers: %d\n", - err); - spu_reg_vbase[iproc_priv.spu.num_spu] = NULL; - return err; - } + spu->spu_type = matched_spu_type->type; + spu->spu_subtype = matched_spu_type->subtype; - dev_dbg(dev, "SPU %d detected.", iproc_priv.spu.num_spu); + i = 0; + for (i = 0; (i < MAX_SPUS) && ((spu_ctrl_regs = + platform_get_resource(pdev, IORESOURCE_MEM, i)) != NULL); i++) { - spu->reg_vbase[iproc_priv.spu.num_spu] = spu_reg_vbase; + spu->reg_vbase[i] = devm_ioremap_resource(dev, spu_ctrl_regs); + if (IS_ERR(spu->reg_vbase[i])) { + err = PTR_ERR(spu->reg_vbase[i]); + dev_err(&pdev->dev, "Failed to map registers: %d\n", + err); + spu->reg_vbase[i] = NULL; + return err; + } + } + spu->num_spu = i; + dev_dbg(dev, "Device has %d SPUs", spu->num_spu); return 0; } @@ -4860,8 +4862,8 @@ int bcm_spu_probe(struct platform_device *pdev) struct spu_hw *spu = &iproc_priv.spu; int err = 0; - iproc_priv.pdev[iproc_priv.spu.num_spu] = pdev; - platform_set_drvdata(iproc_priv.pdev[iproc_priv.spu.num_spu], + iproc_priv.pdev = pdev; + platform_set_drvdata(iproc_priv.pdev, &iproc_priv); err = spu_dt_read(pdev); @@ -4872,12 +4874,6 @@ int bcm_spu_probe(struct platform_device *pdev) if (err < 0) goto failure; - iproc_priv.spu.num_spu++; - - /* If already initialized, we've just added another SPU and are done */ - if (iproc_priv.inited) - return 0; - if (spu->spu_type == SPU_TYPE_SPUM) iproc_priv.bcm_hdr_len = 8; else if (spu->spu_type == SPU_TYPE_SPU2) @@ -4893,8 +4889,6 @@ int bcm_spu_probe(struct platform_device *pdev) if (err < 0) goto fail_reg; - iproc_priv.inited = true; - return 0; fail_reg: diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h index 51dca529ce8f..57a55eb2a255 100644 --- a/drivers/crypto/bcm/cipher.h +++ b/drivers/crypto/bcm/cipher.h @@ -427,10 +427,13 @@ struct spu_hw { /* The number of SPUs on this platform */ u32 num_spu; + + /* The number of SPU channels on this platform */ + u32 num_chan; }; struct device_private { - struct platform_device *pdev[MAX_SPUS]; + struct platform_device *pdev; struct spu_hw spu; @@ -470,12 +473,10 @@ struct device_private { /* Number of ICV check failures for AEAD messages */ atomic_t bad_icv; - struct mbox_client mcl[MAX_SPUS]; - /* Array of mailbox channel pointers, one for each channel */ - struct mbox_chan *mbox[MAX_SPUS]; + struct mbox_client mcl; - /* Driver initialized */ - bool inited; + /* Array of mailbox channel pointers, one for each channel */ + struct mbox_chan **mbox; }; extern struct device_private iproc_priv; diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 0488b7f81dcf..54f3b375a453 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -81,40 +81,6 @@ #define debug(format, arg...) #endif -#ifdef DEBUG -#include <linux/highmem.h> - -static void dbg_dump_sg(const char *level, const char *prefix_str, - int prefix_type, int rowsize, int groupsize, - struct scatterlist *sg, size_t tlen, bool ascii) -{ - struct scatterlist *it; - void *it_page; - size_t len; - void *buf; - - for (it = sg; it != NULL && tlen > 0 ; it = sg_next(sg)) { - /* - * make sure the scatterlist's page - * has a valid virtual memory mapping - */ - it_page = kmap_atomic(sg_page(it)); - if (unlikely(!it_page)) { - printk(KERN_ERR "dbg_dump_sg: kmap failed\n"); - return; - } - - buf = it_page + it->offset; - len = min_t(size_t, tlen, it->length); - print_hex_dump(level, prefix_str, prefix_type, rowsize, - groupsize, buf, len, ascii); - tlen -= len; - - kunmap_atomic(it_page); - } -} -#endif - static struct list_head alg_list; struct caam_alg_entry { @@ -898,10 +864,10 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err, print_hex_dump(KERN_ERR, "dstiv @"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->info, edesc->src_nents > 1 ? 100 : ivsize, 1); - dbg_dump_sg(KERN_ERR, "dst @"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->dst, - edesc->dst_nents > 1 ? 100 : req->nbytes, 1); #endif + caam_dump_sg(KERN_ERR, "dst @" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->dst, + edesc->dst_nents > 1 ? 100 : req->nbytes, 1); ablkcipher_unmap(jrdev, edesc, req); @@ -937,10 +903,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, print_hex_dump(KERN_ERR, "dstiv @"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->info, ivsize, 1); - dbg_dump_sg(KERN_ERR, "dst @"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->dst, - edesc->dst_nents > 1 ? 100 : req->nbytes, 1); #endif + caam_dump_sg(KERN_ERR, "dst @" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->dst, + edesc->dst_nents > 1 ? 100 : req->nbytes, 1); ablkcipher_unmap(jrdev, edesc, req); @@ -1107,10 +1073,10 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, ivsize, 1); pr_err("asked=%d, nbytes%d\n", (int)edesc->src_nents > 1 ? 100 : req->nbytes, req->nbytes); - dbg_dump_sg(KERN_ERR, "src @"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->src, - edesc->src_nents > 1 ? 100 : req->nbytes, 1); #endif + caam_dump_sg(KERN_ERR, "src @" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->src, + edesc->src_nents > 1 ? 100 : req->nbytes, 1); len = desc_len(sh_desc); init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE); @@ -1164,10 +1130,10 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr, print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ", DUMP_PREFIX_ADDRESS, 16, 4, req->info, ivsize, 1); - dbg_dump_sg(KERN_ERR, "src @" __stringify(__LINE__) ": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->src, - edesc->src_nents > 1 ? 100 : req->nbytes, 1); #endif + caam_dump_sg(KERN_ERR, "src @" __stringify(__LINE__) ": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->src, + edesc->src_nents > 1 ? 100 : req->nbytes, 1); len = desc_len(sh_desc); init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE); @@ -1449,11 +1415,9 @@ static int aead_decrypt(struct aead_request *req) u32 *desc; int ret = 0; -#ifdef DEBUG - dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->src, - req->assoclen + req->cryptlen, 1); -#endif + caam_dump_sg(KERN_ERR, "dec src@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->src, + req->assoclen + req->cryptlen, 1); /* allocate extended descriptor */ edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c index 6f9c7ec0e339..530c14ee32de 100644 --- a/drivers/crypto/caam/caamalg_desc.c +++ b/drivers/crypto/caam/caamalg_desc.c @@ -599,7 +599,7 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata, /* skip key loading if they are loaded due to sharing */ key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD | JUMP_COND_SELF); + JUMP_COND_SHRD); if (cdata->key_inline) append_key_as_imm(desc, cdata->key_virt, cdata->keylen, cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); @@ -688,8 +688,7 @@ void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata, /* skip key loading if they are loaded due to sharing */ key_jump_cmd = append_jump(desc, JUMP_JSL | - JUMP_TEST_ALL | JUMP_COND_SHRD | - JUMP_COND_SELF); + JUMP_TEST_ALL | JUMP_COND_SHRD); if (cdata->key_inline) append_key_as_imm(desc, cdata->key_virt, cdata->keylen, cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 78c4c0485c58..2eefc4a26bc2 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -12,7 +12,6 @@ #include "intern.h" #include "desc_constr.h" #include "error.h" -#include "sg_sw_sec4.h" #include "sg_sw_qm.h" #include "key_gen.h" #include "qi.h" @@ -399,6 +398,7 @@ badkey: * @iv_dma: dma address of iv for checking continuity and link table * @qm_sg_bytes: length of dma mapped h/w link table * @qm_sg_dma: bus physical mapped address of h/w link table + * @assoclen: associated data length, in CAAM endianness * @assoclen_dma: bus physical mapped address of req->assoclen * @drv_req: driver-specific request structure * @sgt: the h/w link table @@ -409,8 +409,12 @@ struct aead_edesc { dma_addr_t iv_dma; int qm_sg_bytes; dma_addr_t qm_sg_dma; + unsigned int assoclen; dma_addr_t assoclen_dma; struct caam_drv_req drv_req; +#define CAAM_QI_MAX_AEAD_SG \ + ((CAAM_QI_MEMCACHE_SIZE - offsetof(struct aead_edesc, sgt)) / \ + sizeof(struct qm_sg_entry)) struct qm_sg_entry sgt[0]; }; @@ -431,6 +435,9 @@ struct ablkcipher_edesc { int qm_sg_bytes; dma_addr_t qm_sg_dma; struct caam_drv_req drv_req; +#define CAAM_QI_MAX_ABLKCIPHER_SG \ + ((CAAM_QI_MEMCACHE_SIZE - offsetof(struct ablkcipher_edesc, sgt)) / \ + sizeof(struct qm_sg_entry)) struct qm_sg_entry sgt[0]; }; @@ -660,6 +667,14 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, */ qm_sg_ents = 1 + !!ivsize + mapped_src_nents + (mapped_dst_nents > 1 ? mapped_dst_nents : 0); + if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) { + dev_err(qidev, "Insufficient S/G entries: %d > %lu\n", + qm_sg_ents, CAAM_QI_MAX_AEAD_SG); + caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, + iv_dma, ivsize, op_type, 0, 0); + qi_cache_free(edesc); + return ERR_PTR(-ENOMEM); + } sg_table = &edesc->sgt[0]; qm_sg_bytes = qm_sg_ents * sizeof(*sg_table); @@ -670,7 +685,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, edesc->drv_req.cbk = aead_done; edesc->drv_req.drv_ctx = drv_ctx; - edesc->assoclen_dma = dma_map_single(qidev, &req->assoclen, 4, + edesc->assoclen = cpu_to_caam32(req->assoclen); + edesc->assoclen_dma = dma_map_single(qidev, &edesc->assoclen, 4, DMA_TO_DEVICE); if (dma_mapping_error(qidev, edesc->assoclen_dma)) { dev_err(qidev, "unable to map assoclen\n"); @@ -776,9 +792,9 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status) struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req); struct caam_ctx *caam_ctx = crypto_ablkcipher_ctx(ablkcipher); struct device *qidev = caam_ctx->qidev; -#ifdef DEBUG int ivsize = crypto_ablkcipher_ivsize(ablkcipher); +#ifdef DEBUG dev_err(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status); #endif @@ -791,14 +807,21 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status) print_hex_dump(KERN_ERR, "dstiv @" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->info, edesc->src_nents > 1 ? 100 : ivsize, 1); - dbg_dump_sg(KERN_ERR, "dst @" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->dst, - edesc->dst_nents > 1 ? 100 : req->nbytes, 1); + caam_dump_sg(KERN_ERR, "dst @" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, req->dst, + edesc->dst_nents > 1 ? 100 : req->nbytes, 1); #endif ablkcipher_unmap(qidev, edesc, req); qi_cache_free(edesc); + /* + * The crypto API expects us to set the IV (req->info) to the last + * ciphertext block. This is used e.g. by the CTS mode. + */ + scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize, + ivsize, 0); + ablkcipher_request_complete(req, status); } @@ -880,6 +903,15 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request } dst_sg_idx = qm_sg_ents; + qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0; + if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) { + dev_err(qidev, "Insufficient S/G entries: %d > %lu\n", + qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG); + caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, + iv_dma, ivsize, op_type, 0, 0); + return ERR_PTR(-ENOMEM); + } + /* allocate space for base edesc and link tables */ edesc = qi_cache_alloc(GFP_DMA | flags); if (unlikely(!edesc)) { @@ -892,7 +924,6 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->iv_dma = iv_dma; - qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0; sg_table = &edesc->sgt[0]; edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table); edesc->drv_req.app_ctx = req; @@ -1026,6 +1057,14 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( qm_sg_ents += 1 + mapped_dst_nents; } + if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) { + dev_err(qidev, "Insufficient S/G entries: %d > %lu\n", + qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG); + caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, + iv_dma, ivsize, GIVENCRYPT, 0, 0); + return ERR_PTR(-ENOMEM); + } + /* allocate space for base edesc and link tables */ edesc = qi_cache_alloc(GFP_DMA | flags); if (!edesc) { @@ -1968,7 +2007,7 @@ static struct caam_aead_alg driver_aeads[] = { .cra_name = "echainiv(authenc(hmac(sha256)," "cbc(des)))", .cra_driver_name = "echainiv-authenc-" - "hmac-sha256-cbc-desi-" + "hmac-sha256-cbc-des-" "caam-qi", .cra_blocksize = DES_BLOCK_SIZE, }, diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 910ec61cae09..698580b60b2f 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -791,8 +791,8 @@ static int ahash_update_ctx(struct ahash_request *req) to_hash - *buflen, *next_buflen, 0); } else { - (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= - cpu_to_caam32(SEC4_SG_LEN_FIN); + sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index - + 1); } desc = edesc->hw_desc; @@ -882,8 +882,7 @@ static int ahash_final_ctx(struct ahash_request *req) if (ret) goto unmap_ctx; - (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= - cpu_to_caam32(SEC4_SG_LEN_FIN); + sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index - 1); edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 41398da3edf4..fde07d4ff019 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -285,11 +285,7 @@ static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev) if (err) return err; - err = caam_init_buf(ctx, 1); - if (err) - return err; - - return 0; + return caam_init_buf(ctx, 1); } static struct hwrng caam_rng = { diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index dd353e342c12..dacb53fb690e 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -17,6 +17,8 @@ bool caam_little_end; EXPORT_SYMBOL(caam_little_end); +bool caam_dpaa2; +EXPORT_SYMBOL(caam_dpaa2); #ifdef CONFIG_CAAM_QI #include "qi.h" @@ -319,8 +321,11 @@ static int caam_remove(struct platform_device *pdev) caam_qi_shutdown(ctrlpriv->qidev); #endif - /* De-initialize RNG state handles initialized by this driver. */ - if (ctrlpriv->rng4_sh_init) + /* + * De-initialize RNG state handles initialized by this driver. + * In case of DPAA 2.x, RNG is managed by MC firmware. + */ + if (!caam_dpaa2 && ctrlpriv->rng4_sh_init) deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init); /* Shut down debug views */ @@ -444,7 +449,6 @@ static int caam_probe(struct platform_device *pdev) dev = &pdev->dev; dev_set_drvdata(dev, ctrlpriv); - ctrlpriv->pdev = pdev; nprop = pdev->dev.of_node; /* Enable clocking */ @@ -553,12 +557,17 @@ static int caam_probe(struct platform_device *pdev) /* * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel, - * long pointers in master configuration register + * long pointers in master configuration register. + * In case of DPAA 2.x, Management Complex firmware performs + * the configuration. */ - clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR, - MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF | - MCFGR_WDENABLE | MCFGR_LARGE_BURST | - (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0)); + caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2); + if (!caam_dpaa2) + clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR, + MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF | + MCFGR_WDENABLE | MCFGR_LARGE_BURST | + (sizeof(dma_addr_t) == sizeof(u64) ? + MCFGR_LONG_PTR : 0)); /* * Read the Compile Time paramters and SCFGR to determine @@ -587,7 +596,9 @@ static int caam_probe(struct platform_device *pdev) JRSTART_JR3_START); if (sizeof(dma_addr_t) == sizeof(u64)) { - if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) + if (caam_dpaa2) + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(49)); + else if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); else ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36)); @@ -630,11 +641,9 @@ static int caam_probe(struct platform_device *pdev) ring++; } - /* Check to see if QI present. If so, enable */ - ctrlpriv->qi_present = - !!(rd_reg32(&ctrl->perfmon.comp_parms_ms) & - CTPR_MS_QI_MASK); - if (ctrlpriv->qi_present) { + /* Check to see if (DPAA 1.x) QI present. If so, enable */ + ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK); + if (ctrlpriv->qi_present && !caam_dpaa2) { ctrlpriv->qi = (struct caam_queue_if __iomem __force *) ((__force uint8_t *)ctrl + BLOCK_OFFSET * QI_BLOCK_NUMBER @@ -662,8 +671,10 @@ static int caam_probe(struct platform_device *pdev) /* * If SEC has RNG version >= 4 and RNG state handle has not been * already instantiated, do RNG instantiation + * In case of DPAA 2.x, RNG is managed by MC firmware. */ - if ((cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) { + if (!caam_dpaa2 && + (cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) { ctrlpriv->rng4_sh_init = rd_reg32(&ctrl->r4tst[0].rdsta); /* @@ -731,63 +742,43 @@ static int caam_probe(struct platform_device *pdev) /* Report "alive" for developer to see */ dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id, caam_get_era()); - dev_info(dev, "job rings = %d, qi = %d\n", - ctrlpriv->total_jobrs, ctrlpriv->qi_present); + dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n", + ctrlpriv->total_jobrs, ctrlpriv->qi_present, + caam_dpaa2 ? "yes" : "no"); #ifdef CONFIG_DEBUG_FS - - ctrlpriv->ctl_rq_dequeued = - debugfs_create_file("rq_dequeued", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->req_dequeued, - &caam_fops_u64_ro); - ctrlpriv->ctl_ob_enc_req = - debugfs_create_file("ob_rq_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_req, - &caam_fops_u64_ro); - ctrlpriv->ctl_ib_dec_req = - debugfs_create_file("ib_rq_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_req, - &caam_fops_u64_ro); - ctrlpriv->ctl_ob_enc_bytes = - debugfs_create_file("ob_bytes_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_bytes, - &caam_fops_u64_ro); - ctrlpriv->ctl_ob_prot_bytes = - debugfs_create_file("ob_bytes_protected", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_prot_bytes, - &caam_fops_u64_ro); - ctrlpriv->ctl_ib_dec_bytes = - debugfs_create_file("ib_bytes_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_bytes, - &caam_fops_u64_ro); - ctrlpriv->ctl_ib_valid_bytes = - debugfs_create_file("ib_bytes_validated", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_valid_bytes, - &caam_fops_u64_ro); + debugfs_create_file("rq_dequeued", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->req_dequeued, + &caam_fops_u64_ro); + debugfs_create_file("ob_rq_encrypted", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_req, + &caam_fops_u64_ro); + debugfs_create_file("ib_rq_decrypted", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_req, + &caam_fops_u64_ro); + debugfs_create_file("ob_bytes_encrypted", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_bytes, + &caam_fops_u64_ro); + debugfs_create_file("ob_bytes_protected", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_prot_bytes, + &caam_fops_u64_ro); + debugfs_create_file("ib_bytes_decrypted", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_bytes, + &caam_fops_u64_ro); + debugfs_create_file("ib_bytes_validated", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_valid_bytes, + &caam_fops_u64_ro); /* Controller level - global status values */ - ctrlpriv->ctl_faultaddr = - debugfs_create_file("fault_addr", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultaddr, - &caam_fops_u32_ro); - ctrlpriv->ctl_faultdetail = - debugfs_create_file("fault_detail", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultdetail, - &caam_fops_u32_ro); - ctrlpriv->ctl_faultstatus = - debugfs_create_file("fault_status", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->status, - &caam_fops_u32_ro); + debugfs_create_file("fault_addr", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultaddr, + &caam_fops_u32_ro); + debugfs_create_file("fault_detail", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultdetail, + &caam_fops_u32_ro); + debugfs_create_file("fault_status", S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->status, + &caam_fops_u32_ro); /* Internal covering keys (useful in non-secure mode only) */ ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0]; diff --git a/drivers/crypto/caam/ctrl.h b/drivers/crypto/caam/ctrl.h index cac5402a46eb..7e7bf68c9ef5 100644 --- a/drivers/crypto/caam/ctrl.h +++ b/drivers/crypto/caam/ctrl.h @@ -10,4 +10,6 @@ /* Prototypes for backend-level services exposed to APIs */ int caam_get_era(void); +extern bool caam_dpaa2; + #endif /* CTRL_H */ diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c index 6f44ccb55c63..3d639f3b45aa 100644 --- a/drivers/crypto/caam/error.c +++ b/drivers/crypto/caam/error.c @@ -9,6 +9,46 @@ #include "desc.h" #include "error.h" +#ifdef DEBUG +#include <linux/highmem.h> + +void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type, + int rowsize, int groupsize, struct scatterlist *sg, + size_t tlen, bool ascii) +{ + struct scatterlist *it; + void *it_page; + size_t len; + void *buf; + + for (it = sg; it && tlen > 0 ; it = sg_next(sg)) { + /* + * make sure the scatterlist's page + * has a valid virtual memory mapping + */ + it_page = kmap_atomic(sg_page(it)); + if (unlikely(!it_page)) { + pr_err("caam_dump_sg: kmap failed\n"); + return; + } + + buf = it_page + it->offset; + len = min_t(size_t, tlen, it->length); + print_hex_dump(level, prefix_str, prefix_type, rowsize, + groupsize, buf, len, ascii); + tlen -= len; + + kunmap_atomic(it_page); + } +} +#else +void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type, + int rowsize, int groupsize, struct scatterlist *sg, + size_t tlen, bool ascii) +{} +#endif /* DEBUG */ +EXPORT_SYMBOL(caam_dump_sg); + static const struct { u8 value; const char *error_text; diff --git a/drivers/crypto/caam/error.h b/drivers/crypto/caam/error.h index b6350b0d9153..250e1a21c473 100644 --- a/drivers/crypto/caam/error.h +++ b/drivers/crypto/caam/error.h @@ -8,4 +8,8 @@ #define CAAM_ERROR_H #define CAAM_ERROR_STR_MAX 302 void caam_jr_strstatus(struct device *jrdev, u32 status); + +void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type, + int rowsize, int groupsize, struct scatterlist *sg, + size_t tlen, bool ascii); #endif /* CAAM_ERROR_H */ diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 85b6c5835b8f..a52361258d3a 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -64,12 +64,9 @@ struct caam_drv_private_jr { * Driver-private storage for a single CAAM block instance */ struct caam_drv_private { - - struct device *dev; #ifdef CONFIG_CAAM_QI struct device *qidev; #endif - struct platform_device *pdev; /* Physical-presence section */ struct caam_ctrl __iomem *ctrl; /* controller region */ @@ -105,16 +102,8 @@ struct caam_drv_private { #ifdef CONFIG_DEBUG_FS struct dentry *dfs_root; struct dentry *ctl; /* controller dir */ - struct dentry *ctl_rq_dequeued, *ctl_ob_enc_req, *ctl_ib_dec_req; - struct dentry *ctl_ob_enc_bytes, *ctl_ob_prot_bytes; - struct dentry *ctl_ib_dec_bytes, *ctl_ib_valid_bytes; - struct dentry *ctl_faultaddr, *ctl_faultdetail, *ctl_faultstatus; - struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap; struct dentry *ctl_kek, *ctl_tkek, *ctl_tdsk; -#ifdef CONFIG_CAAM_QI - struct dentry *qi_congested; -#endif #endif }; diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 1ccfb317d468..d258953ff488 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -9,6 +9,7 @@ #include <linux/of_address.h> #include "compat.h" +#include "ctrl.h" #include "regs.h" #include "jr.h" #include "desc.h" @@ -499,7 +500,11 @@ static int caam_jr_probe(struct platform_device *pdev) jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl; if (sizeof(dma_addr_t) == sizeof(u64)) { - if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring")) + if (caam_dpaa2) + error = dma_set_mask_and_coherent(jrdev, + DMA_BIT_MASK(49)); + else if (of_device_is_compatible(nprop, + "fsl,sec-v5.0-job-ring")) error = dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(40)); else diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index 1990ed460c46..e4cf00014233 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -24,9 +24,6 @@ */ #define MAX_RSP_FQ_BACKLOG_PER_CPU 256 -/* Length of a single buffer in the QI driver memory cache */ -#define CAAM_QI_MEMCACHE_SIZE 512 - #define CAAM_QI_ENQUEUE_RETRIES 10000 #define CAAM_NAPI_WEIGHT 63 @@ -55,6 +52,7 @@ struct caam_qi_pcpu_priv { } ____cacheline_aligned; static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv); +static DEFINE_PER_CPU(int, last_cpu); /* * caam_qi_priv - CAAM QI backend private params @@ -203,8 +201,8 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev, goto init_req_fq_fail; } - dev_info(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid, - smp_processor_id()); + dev_dbg(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid, + smp_processor_id()); return req_fq; init_req_fq_fail: @@ -277,6 +275,7 @@ empty_fq: dev_err(qidev, "OOS of FQID: %u failed\n", fq->fqid); qman_destroy_fq(fq); + kfree(fq); return ret; } @@ -342,8 +341,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) drv_ctx->req_fq = old_fq; if (kill_fq(qidev, new_fq)) - dev_warn(qidev, "New CAAM FQ: %u kill failed\n", - new_fq->fqid); + dev_warn(qidev, "New CAAM FQ kill failed\n"); return ret; } @@ -373,10 +371,9 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) drv_ctx->req_fq = old_fq; if (kill_fq(qidev, new_fq)) - dev_warn(qidev, "New CAAM FQ: %u kill failed\n", - new_fq->fqid); + dev_warn(qidev, "New CAAM FQ kill failed\n"); } else if (kill_fq(qidev, old_fq)) { - dev_warn(qidev, "Old CAAM FQ: %u kill failed\n", old_fq->fqid); + dev_warn(qidev, "Old CAAM FQ kill failed\n"); } return 0; @@ -392,7 +389,6 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, dma_addr_t hwdesc; struct caam_drv_ctx *drv_ctx; const cpumask_t *cpus = qman_affine_cpus(); - static DEFINE_PER_CPU(int, last_cpu); num_words = desc_len(sh_desc); if (num_words > MAX_SDLEN) { @@ -511,7 +507,6 @@ int caam_qi_shutdown(struct device *qidev) if (kill_fq(qidev, per_cpu(pcpu_qipriv.rsp_fq, i))) dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i); - kfree(per_cpu(pcpu_qipriv.rsp_fq, i)); } /* @@ -646,7 +641,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu) per_cpu(pcpu_qipriv.rsp_fq, cpu) = fq; - dev_info(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu); + dev_dbg(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu); return 0; } @@ -679,7 +674,7 @@ static int init_cgr(struct device *qidev) return ret; } - dev_info(qidev, "Congestion threshold set to %llu\n", val); + dev_dbg(qidev, "Congestion threshold set to %llu\n", val); return 0; } @@ -737,6 +732,7 @@ int caam_qi_init(struct platform_device *caam_pdev) qi_pdev = platform_device_register_full(&qi_pdev_info); if (IS_ERR(qi_pdev)) return PTR_ERR(qi_pdev); + set_dma_ops(&qi_pdev->dev, get_dma_ops(ctrldev)); ctrlpriv = dev_get_drvdata(ctrldev); qidev = &qi_pdev->dev; @@ -795,10 +791,8 @@ int caam_qi_init(struct platform_device *caam_pdev) /* Done with the CGRs; restore the cpus allowed mask */ set_cpus_allowed_ptr(current, &old_cpumask); #ifdef CONFIG_DEBUG_FS - ctrlpriv->qi_congested = debugfs_create_file("qi_congested", 0444, - ctrlpriv->ctl, - ×_congested, - &caam_fops_u64_ro); + debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl, + ×_congested, &caam_fops_u64_ro); #endif dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n"); return 0; diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h index 33b0433f5f22..ecb21f207637 100644 --- a/drivers/crypto/caam/qi.h +++ b/drivers/crypto/caam/qi.h @@ -39,6 +39,9 @@ */ #define MAX_SDLEN ((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ) +/* Length of a single buffer in the QI driver memory cache */ +#define CAAM_QI_MEMCACHE_SIZE 768 + extern bool caam_congested __read_mostly; /* diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 84d2f838a063..2b5efff9ec3c 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -293,6 +293,7 @@ struct caam_perfmon { u32 cha_rev_ls; /* CRNR - CHA Rev No. Least significant half*/ #define CTPR_MS_QI_SHIFT 25 #define CTPR_MS_QI_MASK (0x1ull << CTPR_MS_QI_SHIFT) +#define CTPR_MS_DPAA2 BIT(13) #define CTPR_MS_VIRT_EN_INCL 0x00000001 #define CTPR_MS_VIRT_EN_POR 0x00000002 #define CTPR_MS_PG_SZ_MASK 0x10 diff --git a/drivers/crypto/caam/sg_sw_qm2.h b/drivers/crypto/caam/sg_sw_qm2.h new file mode 100644 index 000000000000..31b440757146 --- /dev/null +++ b/drivers/crypto/caam/sg_sw_qm2.h @@ -0,0 +1,81 @@ +/* + * Copyright 2015-2016 Freescale Semiconductor, Inc. + * Copyright 2017 NXP + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the names of the above-listed copyright holders nor the + * names of any contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SG_SW_QM2_H_ +#define _SG_SW_QM2_H_ + +#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h" + +static inline void dma_to_qm_sg_one(struct dpaa2_sg_entry *qm_sg_ptr, + dma_addr_t dma, u32 len, u16 offset) +{ + dpaa2_sg_set_addr(qm_sg_ptr, dma); + dpaa2_sg_set_format(qm_sg_ptr, dpaa2_sg_single); + dpaa2_sg_set_final(qm_sg_ptr, false); + dpaa2_sg_set_len(qm_sg_ptr, len); + dpaa2_sg_set_bpid(qm_sg_ptr, 0); + dpaa2_sg_set_offset(qm_sg_ptr, offset); +} + +/* + * convert scatterlist to h/w link table format + * but does not have final bit; instead, returns last entry + */ +static inline struct dpaa2_sg_entry * +sg_to_qm_sg(struct scatterlist *sg, int sg_count, + struct dpaa2_sg_entry *qm_sg_ptr, u16 offset) +{ + while (sg_count && sg) { + dma_to_qm_sg_one(qm_sg_ptr, sg_dma_address(sg), + sg_dma_len(sg), offset); + qm_sg_ptr++; + sg = sg_next(sg); + sg_count--; + } + return qm_sg_ptr - 1; +} + +/* + * convert scatterlist to h/w link table format + * scatterlist must have been previously dma mapped + */ +static inline void sg_to_qm_sg_last(struct scatterlist *sg, int sg_count, + struct dpaa2_sg_entry *qm_sg_ptr, + u16 offset) +{ + qm_sg_ptr = sg_to_qm_sg(sg, sg_count, qm_sg_ptr, offset); + dpaa2_sg_set_final(qm_sg_ptr, true); +} + +#endif /* _SG_SW_QM2_H_ */ diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index c6adad09c972..936b1b630058 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -5,7 +5,13 @@ * */ +#ifndef _SG_SW_SEC4_H_ +#define _SG_SW_SEC4_H_ + +#include "ctrl.h" #include "regs.h" +#include "sg_sw_qm2.h" +#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h" struct sec4_sg_entry { u64 ptr; @@ -19,9 +25,15 @@ struct sec4_sg_entry { static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, dma_addr_t dma, u32 len, u16 offset) { - sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma); - sec4_sg_ptr->len = cpu_to_caam32(len); - sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK); + if (caam_dpaa2) { + dma_to_qm_sg_one((struct dpaa2_sg_entry *)sec4_sg_ptr, dma, len, + offset); + } else { + sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma); + sec4_sg_ptr->len = cpu_to_caam32(len); + sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & + SEC4_SG_OFFSET_MASK); + } #ifdef DEBUG print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, @@ -47,6 +59,14 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count, return sec4_sg_ptr - 1; } +static inline void sg_to_sec4_set_last(struct sec4_sg_entry *sec4_sg_ptr) +{ + if (caam_dpaa2) + dpaa2_sg_set_final((struct dpaa2_sg_entry *)sec4_sg_ptr, true); + else + sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN); +} + /* * convert scatterlist to h/w link table format * scatterlist must have been previously dma mapped @@ -56,20 +76,7 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, u16 offset) { sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); - sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN); + sg_to_sec4_set_last(sec4_sg_ptr); } -static inline struct sec4_sg_entry *sg_to_sec4_sg_len( - struct scatterlist *sg, unsigned int total, - struct sec4_sg_entry *sec4_sg_ptr) -{ - do { - unsigned int len = min(sg_dma_len(sg), total); - - dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), len, 0); - sec4_sg_ptr++; - sg = sg_next(sg); - total -= len; - } while (total); - return sec4_sg_ptr - 1; -} +#endif /* _SG_SW_SEC4_H_ */ diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c index 4119c40e7c4b..34a6d8bf229e 100644 --- a/drivers/crypto/cavium/cpt/cptpf_main.c +++ b/drivers/crypto/cavium/cpt/cptpf_main.c @@ -268,8 +268,10 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) mcode = &cpt->mcode[cpt->next_mc_idx]; memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ); mcode->code_size = ntohl(ucode->code_length) * 2; - if (!mcode->code_size) - return -EINVAL; + if (!mcode->code_size) { + ret = -EINVAL; + goto fw_release; + } mcode->is_ae = is_ae; mcode->core_mask = 0ULL; @@ -280,7 +282,8 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) &mcode->phys_base, GFP_KERNEL); if (!mcode->code) { dev_err(dev, "Unable to allocate space for microcode"); - return -ENOMEM; + ret = -ENOMEM; + goto fw_release; } memcpy((void *)mcode->code, (void *)(fw_entry->data + sizeof(*ucode)), @@ -302,12 +305,14 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) ret = do_cpt_init(cpt, mcode); if (ret) { dev_err(dev, "do_cpt_init failed with ret: %d\n", ret); - return ret; + goto fw_release; } dev_info(dev, "Microcode Loaded %s\n", mcode->version); mcode->is_mc_valid = 1; cpt->next_mc_idx++; + +fw_release: release_firmware(fw_entry); return ret; diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index 9ccefb9b7232..fee7cb2ce747 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -513,8 +513,10 @@ static int nitrox_probe(struct pci_dev *pdev, pci_set_master(pdev); ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); - if (!ndev) + if (!ndev) { + err = -ENOMEM; goto ndev_fail; + } pci_set_drvdata(pdev, ndev); ndev->pdev = pdev; diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig index 2238f77aa248..6d626606b9c5 100644 --- a/drivers/crypto/ccp/Kconfig +++ b/drivers/crypto/ccp/Kconfig @@ -1,25 +1,33 @@ config CRYPTO_DEV_CCP_DD - tristate "Cryptographic Coprocessor device driver" - depends on CRYPTO_DEV_CCP + tristate "Secure Processor device driver" default m + help + Provides AMD Secure Processor device driver. + If you choose 'M' here, this module will be called ccp. + +config CRYPTO_DEV_SP_CCP + bool "Cryptographic Coprocessor device" + default y + depends on CRYPTO_DEV_CCP_DD select HW_RANDOM select DMA_ENGINE select DMADEVICES select CRYPTO_SHA1 select CRYPTO_SHA256 help - Provides the interface to use the AMD Cryptographic Coprocessor - which can be used to offload encryption operations such as SHA, - AES and more. If you choose 'M' here, this module will be called - ccp. + Provides the support for AMD Cryptographic Coprocessor (CCP) device + which can be used to offload encryption operations such as SHA, AES + and more. config CRYPTO_DEV_CCP_CRYPTO tristate "Encryption and hashing offload support" - depends on CRYPTO_DEV_CCP_DD default m + depends on CRYPTO_DEV_CCP_DD + depends on CRYPTO_DEV_SP_CCP select CRYPTO_HASH select CRYPTO_BLKCIPHER select CRYPTO_AUTHENC + select CRYPTO_RSA help Support for using the cryptographic API with the AMD Cryptographic Coprocessor. This module supports offload of SHA and AES algorithms. diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile index 59493fd3a751..57f8debfcfb3 100644 --- a/drivers/crypto/ccp/Makefile +++ b/drivers/crypto/ccp/Makefile @@ -1,12 +1,12 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o -ccp-objs := ccp-dev.o \ +ccp-objs := sp-dev.o sp-platform.o +ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \ ccp-ops.o \ ccp-dev-v3.o \ ccp-dev-v5.o \ - ccp-platform.o \ ccp-dmaengine.o \ ccp-debugfs.o -ccp-$(CONFIG_PCI) += ccp-pci.o +ccp-$(CONFIG_PCI) += sp-pci.o obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o ccp-crypto-objs := ccp-crypto-main.o \ @@ -15,4 +15,5 @@ ccp-crypto-objs := ccp-crypto-main.o \ ccp-crypto-aes-xts.o \ ccp-crypto-aes-galois.o \ ccp-crypto-des3.o \ + ccp-crypto-rsa.o \ ccp-crypto-sha.o diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c index 38ee6f348ea9..52313524a4dd 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) AES GCM crypto API support * - * Copyright (C) 2016 Advanced Micro Devices, Inc. + * Copyright (C) 2016,2017 Advanced Micro Devices, Inc. * * Author: Gary R Hook <gary.hook@amd.com> * diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 58a4244b4752..94b5bcf5b628 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -1,8 +1,9 @@ /* * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support * - * Copyright (C) 2013 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * + * Author: Gary R Hook <gary.hook@amd.com> * Author: Tom Lendacky <thomas.lendacky@amd.com> * * This program is free software; you can redistribute it and/or modify @@ -15,6 +16,7 @@ #include <linux/delay.h> #include <linux/scatterlist.h> #include <crypto/aes.h> +#include <crypto/xts.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> @@ -37,46 +39,26 @@ struct ccp_unit_size_map { u32 value; }; -static struct ccp_unit_size_map unit_size_map[] = { +static struct ccp_unit_size_map xts_unit_sizes[] = { { - .size = 4096, - .value = CCP_XTS_AES_UNIT_SIZE_4096, - }, - { - .size = 2048, - .value = CCP_XTS_AES_UNIT_SIZE_2048, - }, - { - .size = 1024, - .value = CCP_XTS_AES_UNIT_SIZE_1024, + .size = 16, + .value = CCP_XTS_AES_UNIT_SIZE_16, }, { - .size = 512, + .size = 512, .value = CCP_XTS_AES_UNIT_SIZE_512, }, { - .size = 256, - .value = CCP_XTS_AES_UNIT_SIZE__LAST, - }, - { - .size = 128, - .value = CCP_XTS_AES_UNIT_SIZE__LAST, - }, - { - .size = 64, - .value = CCP_XTS_AES_UNIT_SIZE__LAST, - }, - { - .size = 32, - .value = CCP_XTS_AES_UNIT_SIZE__LAST, + .size = 1024, + .value = CCP_XTS_AES_UNIT_SIZE_1024, }, { - .size = 16, - .value = CCP_XTS_AES_UNIT_SIZE_16, + .size = 2048, + .value = CCP_XTS_AES_UNIT_SIZE_2048, }, { - .size = 1, - .value = CCP_XTS_AES_UNIT_SIZE__LAST, + .size = 4096, + .value = CCP_XTS_AES_UNIT_SIZE_4096, }, }; @@ -96,15 +78,26 @@ static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); + struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx(xfm); + unsigned int ccpversion = ccp_version(); + int ret; - /* Only support 128-bit AES key with a 128-bit Tweak key, - * otherwise use the fallback + ret = xts_check_key(xfm, key, key_len); + if (ret) + return ret; + + /* Version 3 devices support 128-bit keys; version 5 devices can + * accommodate 128- and 256-bit keys. */ switch (key_len) { case AES_KEYSIZE_128 * 2: memcpy(ctx->u.aes.key, key, key_len); break; + case AES_KEYSIZE_256 * 2: + if (ccpversion > CCP_VERSION(3, 0)) + memcpy(ctx->u.aes.key, key, key_len); + break; } ctx->u.aes.key_len = key_len / 2; sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); @@ -117,6 +110,8 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, { struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); + unsigned int ccpversion = ccp_version(); + unsigned int fallback = 0; unsigned int unit; u32 unit_size; int ret; @@ -130,18 +125,32 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, if (!req->info) return -EINVAL; + /* Check conditions under which the CCP can fulfill a request. The + * device can handle input plaintext of a length that is a multiple + * of the unit_size, bug the crypto implementation only supports + * the unit_size being equal to the input length. This limits the + * number of scenarios we can handle. + */ unit_size = CCP_XTS_AES_UNIT_SIZE__LAST; - if (req->nbytes <= unit_size_map[0].size) { - for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) { - if (!(req->nbytes & (unit_size_map[unit].size - 1))) { - unit_size = unit_size_map[unit].value; - break; - } + for (unit = 0; unit < ARRAY_SIZE(xts_unit_sizes); unit++) { + if (req->nbytes == xts_unit_sizes[unit].size) { + unit_size = unit; + break; } } - - if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) || - (ctx->u.aes.key_len != AES_KEYSIZE_128)) { + /* The CCP has restrictions on block sizes. Also, a version 3 device + * only supports AES-128 operations; version 5 CCPs support both + * AES-128 and -256 operations. + */ + if (unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) + fallback = 1; + if ((ccpversion < CCP_VERSION(5, 0)) && + (ctx->u.aes.key_len != AES_KEYSIZE_128)) + fallback = 1; + if ((ctx->u.aes.key_len != AES_KEYSIZE_128) && + (ctx->u.aes.key_len != AES_KEYSIZE_256)) + fallback = 1; + if (fallback) { SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher); /* Use the fallback to process the request for any @@ -164,6 +173,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, memset(&rctx->cmd, 0, sizeof(rctx->cmd)); INIT_LIST_HEAD(&rctx->cmd.entry); rctx->cmd.engine = CCP_ENGINE_XTS_AES_128; + rctx->cmd.u.xts.type = CCP_AES_TYPE_128; rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT; rctx->cmd.u.xts.unit_size = unit_size; diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c index 5af7347ae03c..ae87b741f9d5 100644 --- a/drivers/crypto/ccp/ccp-crypto-des3.c +++ b/drivers/crypto/ccp/ccp-crypto-des3.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) DES3 crypto API support * - * Copyright (C) 2016 Advanced Micro Devices, Inc. + * Copyright (C) 2016,2017 Advanced Micro Devices, Inc. * * Author: Gary R Hook <ghook@amd.com> * diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 8dccbddabef1..35a9de7fd475 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) crypto API support * - * Copyright (C) 2013 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> * @@ -17,6 +17,7 @@ #include <linux/ccp.h> #include <linux/scatterlist.h> #include <crypto/internal/hash.h> +#include <crypto/internal/akcipher.h> #include "ccp-crypto.h" @@ -37,10 +38,15 @@ static unsigned int des3_disable; module_param(des3_disable, uint, 0444); MODULE_PARM_DESC(des3_disable, "Disable use of 3DES - any non-zero value"); +static unsigned int rsa_disable; +module_param(rsa_disable, uint, 0444); +MODULE_PARM_DESC(rsa_disable, "Disable use of RSA - any non-zero value"); + /* List heads for the supported algorithms */ static LIST_HEAD(hash_algs); static LIST_HEAD(cipher_algs); static LIST_HEAD(aead_algs); +static LIST_HEAD(akcipher_algs); /* For any tfm, requests for that tfm must be returned on the order * received. With multiple queues available, the CCP can process more @@ -358,6 +364,12 @@ static int ccp_register_algs(void) return ret; } + if (!rsa_disable) { + ret = ccp_register_rsa_algs(&akcipher_algs); + if (ret) + return ret; + } + return 0; } @@ -366,6 +378,7 @@ static void ccp_unregister_algs(void) struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp; struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp; struct ccp_crypto_aead *aead_alg, *aead_tmp; + struct ccp_crypto_akcipher_alg *akc_alg, *akc_tmp; list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) { crypto_unregister_ahash(&ahash_alg->alg); @@ -384,6 +397,12 @@ static void ccp_unregister_algs(void) list_del(&aead_alg->entry); kfree(aead_alg); } + + list_for_each_entry_safe(akc_alg, akc_tmp, &akcipher_algs, entry) { + crypto_unregister_akcipher(&akc_alg->alg); + list_del(&akc_alg->entry); + kfree(akc_alg); + } } static int ccp_crypto_init(void) diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c new file mode 100644 index 000000000000..e6db8672d89c --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-rsa.c @@ -0,0 +1,299 @@ +/* + * AMD Cryptographic Coprocessor (CCP) RSA crypto API support + * + * Copyright (C) 2017 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/internal/rsa.h> +#include <crypto/internal/akcipher.h> +#include <crypto/akcipher.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +static inline struct akcipher_request *akcipher_request_cast( + struct crypto_async_request *req) +{ + return container_of(req, struct akcipher_request, base); +} + +static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen, + const u8 *buf, size_t sz) +{ + int nskip; + + for (nskip = 0; nskip < sz; nskip++) + if (buf[nskip]) + break; + *kplen = sz - nskip; + *kpbuf = kzalloc(*kplen, GFP_KERNEL); + if (!*kpbuf) + return -ENOMEM; + memcpy(*kpbuf, buf + nskip, *kplen); + + return 0; +} + +static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret) +{ + struct akcipher_request *req = akcipher_request_cast(async_req); + struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req); + + if (ret) + return ret; + + req->dst_len = rctx->cmd.u.rsa.key_size >> 3; + + return 0; +} + +static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm) +{ + if (ccp_version() > CCP_VERSION(3, 0)) + return CCP5_RSA_MAXMOD; + else + return CCP_RSA_MAXMOD; +} + +static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req); + int ret = 0; + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_RSA; + + rctx->cmd.u.rsa.key_size = ctx->u.rsa.key_len; /* in bits */ + if (encrypt) { + rctx->cmd.u.rsa.exp = &ctx->u.rsa.e_sg; + rctx->cmd.u.rsa.exp_len = ctx->u.rsa.e_len; + } else { + rctx->cmd.u.rsa.exp = &ctx->u.rsa.d_sg; + rctx->cmd.u.rsa.exp_len = ctx->u.rsa.d_len; + } + rctx->cmd.u.rsa.mod = &ctx->u.rsa.n_sg; + rctx->cmd.u.rsa.mod_len = ctx->u.rsa.n_len; + rctx->cmd.u.rsa.src = req->src; + rctx->cmd.u.rsa.src_len = req->src_len; + rctx->cmd.u.rsa.dst = req->dst; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_rsa_encrypt(struct akcipher_request *req) +{ + return ccp_rsa_crypt(req, true); +} + +static int ccp_rsa_decrypt(struct akcipher_request *req) +{ + return ccp_rsa_crypt(req, false); +} + +static int ccp_check_key_length(unsigned int len) +{ + /* In bits */ + if (len < 8 || len > 4096) + return -EINVAL; + return 0; +} + +static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx) +{ + /* Clean up old key data */ + kzfree(ctx->u.rsa.e_buf); + ctx->u.rsa.e_buf = NULL; + ctx->u.rsa.e_len = 0; + kzfree(ctx->u.rsa.n_buf); + ctx->u.rsa.n_buf = NULL; + ctx->u.rsa.n_len = 0; + kzfree(ctx->u.rsa.d_buf); + ctx->u.rsa.d_buf = NULL; + ctx->u.rsa.d_len = 0; +} + +static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen, bool private) +{ + struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key; + int ret; + + ccp_rsa_free_key_bufs(ctx); + memset(&raw_key, 0, sizeof(raw_key)); + + /* Code borrowed from crypto/rsa.c */ + if (private) + ret = rsa_parse_priv_key(&raw_key, key, keylen); + else + ret = rsa_parse_pub_key(&raw_key, key, keylen); + if (ret) + goto n_key; + + ret = ccp_copy_and_save_keypart(&ctx->u.rsa.n_buf, &ctx->u.rsa.n_len, + raw_key.n, raw_key.n_sz); + if (ret) + goto key_err; + sg_init_one(&ctx->u.rsa.n_sg, ctx->u.rsa.n_buf, ctx->u.rsa.n_len); + + ctx->u.rsa.key_len = ctx->u.rsa.n_len << 3; /* convert to bits */ + if (ccp_check_key_length(ctx->u.rsa.key_len)) { + ret = -EINVAL; + goto key_err; + } + + ret = ccp_copy_and_save_keypart(&ctx->u.rsa.e_buf, &ctx->u.rsa.e_len, + raw_key.e, raw_key.e_sz); + if (ret) + goto key_err; + sg_init_one(&ctx->u.rsa.e_sg, ctx->u.rsa.e_buf, ctx->u.rsa.e_len); + + if (private) { + ret = ccp_copy_and_save_keypart(&ctx->u.rsa.d_buf, + &ctx->u.rsa.d_len, + raw_key.d, raw_key.d_sz); + if (ret) + goto key_err; + sg_init_one(&ctx->u.rsa.d_sg, + ctx->u.rsa.d_buf, ctx->u.rsa.d_len); + } + + return 0; + +key_err: + ccp_rsa_free_key_bufs(ctx); + +n_key: + return ret; +} + +static int ccp_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + return ccp_rsa_setkey(tfm, key, keylen, true); +} + +static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + return ccp_rsa_setkey(tfm, key, keylen, false); +} + +static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + + akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx)); + ctx->complete = ccp_rsa_complete; + + return 0; +} + +static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base); + + ccp_rsa_free_key_bufs(ctx); +} + +static struct akcipher_alg ccp_rsa_defaults = { + .encrypt = ccp_rsa_encrypt, + .decrypt = ccp_rsa_decrypt, + .sign = ccp_rsa_decrypt, + .verify = ccp_rsa_encrypt, + .set_pub_key = ccp_rsa_setpubkey, + .set_priv_key = ccp_rsa_setprivkey, + .max_size = ccp_rsa_maxsize, + .init = ccp_rsa_init_tfm, + .exit = ccp_rsa_exit_tfm, + .base = { + .cra_name = "rsa", + .cra_driver_name = "rsa-ccp", + .cra_priority = CCP_CRA_PRIORITY, + .cra_module = THIS_MODULE, + .cra_ctxsize = 2 * sizeof(struct ccp_ctx), + }, +}; + +struct ccp_rsa_def { + unsigned int version; + const char *name; + const char *driver_name; + unsigned int reqsize; + struct akcipher_alg *alg_defaults; +}; + +static struct ccp_rsa_def rsa_algs[] = { + { + .version = CCP_VERSION(3, 0), + .name = "rsa", + .driver_name = "rsa-ccp", + .reqsize = sizeof(struct ccp_rsa_req_ctx), + .alg_defaults = &ccp_rsa_defaults, + } +}; + +int ccp_register_rsa_alg(struct list_head *head, const struct ccp_rsa_def *def) +{ + struct ccp_crypto_akcipher_alg *ccp_alg; + struct akcipher_alg *alg; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + alg = &ccp_alg->alg; + *alg = *def->alg_defaults; + snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->driver_name); + ret = crypto_register_akcipher(alg); + if (ret) { + pr_err("%s akcipher algorithm registration error (%d)\n", + alg->base.cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} + +int ccp_register_rsa_algs(struct list_head *head) +{ + int i, ret; + unsigned int ccpversion = ccp_version(); + + /* Register the RSA algorithm in standard mode + * This works for CCP v3 and later + */ + for (i = 0; i < ARRAY_SIZE(rsa_algs); i++) { + if (rsa_algs[i].version > ccpversion) + continue; + ret = ccp_register_rsa_alg(head, &rsa_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index ce97b3868f4a..8b9b16d433f7 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) SHA crypto API support * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> * Author: Gary R Hook <gary.hook@amd.com> diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index dd5bf15f06e5..b9fd090c46c2 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) crypto API support * - * Copyright (C) 2013 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> * @@ -24,6 +24,8 @@ #include <crypto/ctr.h> #include <crypto/hash.h> #include <crypto/sha.h> +#include <crypto/akcipher.h> +#include <crypto/internal/rsa.h> #define CCP_LOG_LEVEL KERN_INFO @@ -58,6 +60,12 @@ struct ccp_crypto_ahash_alg { struct ahash_alg alg; }; +struct ccp_crypto_akcipher_alg { + struct list_head entry; + + struct akcipher_alg alg; +}; + static inline struct ccp_crypto_ablkcipher_alg * ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm) { @@ -91,7 +99,7 @@ struct ccp_aes_ctx { struct scatterlist key_sg; unsigned int key_len; - u8 key[AES_MAX_KEY_SIZE]; + u8 key[AES_MAX_KEY_SIZE * 2]; u8 nonce[CTR_RFC3686_NONCE_SIZE]; @@ -227,12 +235,35 @@ struct ccp_sha_exp_ctx { u8 buf[MAX_SHA_BLOCK_SIZE]; }; +/***** RSA related defines *****/ + +struct ccp_rsa_ctx { + unsigned int key_len; /* in bits */ + struct scatterlist e_sg; + u8 *e_buf; + unsigned int e_len; + struct scatterlist n_sg; + u8 *n_buf; + unsigned int n_len; + struct scatterlist d_sg; + u8 *d_buf; + unsigned int d_len; +}; + +struct ccp_rsa_req_ctx { + struct ccp_cmd cmd; +}; + +#define CCP_RSA_MAXMOD (4 * 1024 / 8) +#define CCP5_RSA_MAXMOD (16 * 1024 / 8) + /***** Common Context Structure *****/ struct ccp_ctx { int (*complete)(struct crypto_async_request *req, int ret); union { struct ccp_aes_ctx aes; + struct ccp_rsa_ctx rsa; struct ccp_sha_ctx sha; struct ccp_des3_ctx des3; } u; @@ -249,5 +280,6 @@ int ccp_register_aes_xts_algs(struct list_head *head); int ccp_register_aes_aeads(struct list_head *head); int ccp_register_sha_algs(struct list_head *head); int ccp_register_des3_algs(struct list_head *head); +int ccp_register_rsa_algs(struct list_head *head); #endif diff --git a/drivers/crypto/ccp/ccp-debugfs.c b/drivers/crypto/ccp/ccp-debugfs.c index 3cd6c83754e0..59d4ca4e72d8 100644 --- a/drivers/crypto/ccp/ccp-debugfs.c +++ b/drivers/crypto/ccp/ccp-debugfs.c @@ -305,19 +305,19 @@ void ccp5_debugfs_setup(struct ccp_device *ccp) ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir); if (!ccp->debugfs_instance) - return; + goto err; debugfs_info = debugfs_create_file("info", 0400, ccp->debugfs_instance, ccp, &ccp_debugfs_info_ops); if (!debugfs_info) - return; + goto err; debugfs_stats = debugfs_create_file("stats", 0600, ccp->debugfs_instance, ccp, &ccp_debugfs_stats_ops); if (!debugfs_stats) - return; + goto err; for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; @@ -327,15 +327,20 @@ void ccp5_debugfs_setup(struct ccp_device *ccp) debugfs_q_instance = debugfs_create_dir(name, ccp->debugfs_instance); if (!debugfs_q_instance) - return; + goto err; debugfs_q_stats = debugfs_create_file("stats", 0600, debugfs_q_instance, cmd_q, &ccp_debugfs_queue_ops); if (!debugfs_q_stats) - return; + goto err; } + + return; + +err: + debugfs_remove_recursive(ccp->debugfs_instance); } void ccp5_debugfs_destroy(void) diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 367c2e30656f..240bebbcb8ac 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> * Author: Gary R Hook <gary.hook@amd.com> @@ -359,8 +359,7 @@ static void ccp_irq_bh(unsigned long data) static irqreturn_t ccp_irq_handler(int irq, void *data) { - struct device *dev = data; - struct ccp_device *ccp = dev_get_drvdata(dev); + struct ccp_device *ccp = (struct ccp_device *)data; ccp_disable_queue_interrupts(ccp); if (ccp->use_tasklet) @@ -454,7 +453,7 @@ static int ccp_init(struct ccp_device *ccp) iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG); /* Request an irq */ - ret = ccp->get_irq(ccp); + ret = sp_request_ccp_irq(ccp->sp, ccp_irq_handler, ccp->name, ccp); if (ret) { dev_err(dev, "unable to allocate an IRQ\n"); goto e_pool; @@ -511,7 +510,7 @@ e_kthread: if (ccp->cmd_q[i].kthread) kthread_stop(ccp->cmd_q[i].kthread); - ccp->free_irq(ccp); + sp_free_ccp_irq(ccp->sp, ccp); e_pool: for (i = 0; i < ccp->cmd_q_count; i++) @@ -550,7 +549,7 @@ static void ccp_destroy(struct ccp_device *ccp) if (ccp->cmd_q[i].kthread) kthread_stop(ccp->cmd_q[i].kthread); - ccp->free_irq(ccp); + sp_free_ccp_irq(ccp->sp, ccp); for (i = 0; i < ccp->cmd_q_count; i++) dma_pool_destroy(ccp->cmd_q[i].dma_pool); @@ -586,10 +585,17 @@ static const struct ccp_actions ccp3_actions = { .irqhandler = ccp_irq_handler, }; +const struct ccp_vdata ccpv3_platform = { + .version = CCP_VERSION(3, 0), + .setup = NULL, + .perform = &ccp3_actions, + .offset = 0, +}; + const struct ccp_vdata ccpv3 = { .version = CCP_VERSION(3, 0), .setup = NULL, .perform = &ccp3_actions, - .bar = 2, .offset = 0x20000, + .rsamax = CCP_RSA_MAX_WIDTH, }; diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index b10d2d2075cb..65604fc65e8f 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2016 Advanced Micro Devices, Inc. + * Copyright (C) 2016,2017 Advanced Micro Devices, Inc. * * Author: Gary R Hook <gary.hook@amd.com> * @@ -145,6 +145,7 @@ union ccp_function { #define CCP_AES_MODE(p) ((p)->aes.mode) #define CCP_AES_TYPE(p) ((p)->aes.type) #define CCP_XTS_SIZE(p) ((p)->aes_xts.size) +#define CCP_XTS_TYPE(p) ((p)->aes_xts.type) #define CCP_XTS_ENCRYPT(p) ((p)->aes_xts.encrypt) #define CCP_DES3_SIZE(p) ((p)->des3.size) #define CCP_DES3_ENCRYPT(p) ((p)->des3.encrypt) @@ -344,6 +345,7 @@ static int ccp5_perform_xts_aes(struct ccp_op *op) CCP5_CMD_PROT(&desc) = 0; function.raw = 0; + CCP_XTS_TYPE(&function) = op->u.xts.type; CCP_XTS_ENCRYPT(&function) = op->u.xts.action; CCP_XTS_SIZE(&function) = op->u.xts.unit_size; CCP5_CMD_FUNCTION(&desc) = function.raw; @@ -469,7 +471,7 @@ static int ccp5_perform_rsa(struct ccp_op *op) CCP5_CMD_PROT(&desc) = 0; function.raw = 0; - CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3; + CCP_RSA_SIZE(&function) = (op->u.rsa.mod_size + 7) >> 3; CCP5_CMD_FUNCTION(&desc) = function.raw; CCP5_CMD_LEN(&desc) = op->u.rsa.input_len; @@ -484,10 +486,10 @@ static int ccp5_perform_rsa(struct ccp_op *op) CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; - /* Exponent is in LSB memory */ - CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE; - CCP5_CMD_KEY_HI(&desc) = 0; - CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; + /* Key (Exponent) is in external memory */ + CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma); + CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma); + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM; return ccp5_do_cmd(&desc, op->cmd_q); } @@ -769,8 +771,7 @@ static void ccp5_irq_bh(unsigned long data) static irqreturn_t ccp5_irq_handler(int irq, void *data) { - struct device *dev = data; - struct ccp_device *ccp = dev_get_drvdata(dev); + struct ccp_device *ccp = (struct ccp_device *)data; ccp5_disable_queue_interrupts(ccp); ccp->total_interrupts++; @@ -881,7 +882,7 @@ static int ccp5_init(struct ccp_device *ccp) dev_dbg(dev, "Requesting an IRQ...\n"); /* Request an irq */ - ret = ccp->get_irq(ccp); + ret = sp_request_ccp_irq(ccp->sp, ccp5_irq_handler, ccp->name, ccp); if (ret) { dev_err(dev, "unable to allocate an IRQ\n"); goto e_pool; @@ -987,7 +988,7 @@ e_kthread: kthread_stop(ccp->cmd_q[i].kthread); e_irq: - ccp->free_irq(ccp); + sp_free_ccp_irq(ccp->sp, ccp); e_pool: for (i = 0; i < ccp->cmd_q_count; i++) @@ -1037,7 +1038,7 @@ static void ccp5_destroy(struct ccp_device *ccp) if (ccp->cmd_q[i].kthread) kthread_stop(ccp->cmd_q[i].kthread); - ccp->free_irq(ccp); + sp_free_ccp_irq(ccp->sp, ccp); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; @@ -1106,15 +1107,14 @@ static const struct ccp_actions ccp5_actions = { .init = ccp5_init, .destroy = ccp5_destroy, .get_free_slots = ccp5_get_free_slots, - .irqhandler = ccp5_irq_handler, }; const struct ccp_vdata ccpv5a = { .version = CCP_VERSION(5, 0), .setup = ccp5_config, .perform = &ccp5_actions, - .bar = 2, .offset = 0x0, + .rsamax = CCP5_RSA_MAX_WIDTH, }; const struct ccp_vdata ccpv5b = { @@ -1122,6 +1122,6 @@ const struct ccp_vdata ccpv5b = { .dma_chan_attr = DMA_PRIVATE, .setup = ccp5other_config, .perform = &ccp5_actions, - .bar = 2, .offset = 0x0, + .rsamax = CCP5_RSA_MAX_WIDTH, }; diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index 2506b5025700..4e029b176641 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> * Author: Gary R Hook <gary.hook@amd.com> @@ -11,7 +11,6 @@ * published by the Free Software Foundation. */ -#include <linux/module.h> #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/sched.h> @@ -30,12 +29,6 @@ #include "ccp-dev.h" -MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); -MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>"); -MODULE_LICENSE("GPL"); -MODULE_VERSION("1.1.0"); -MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver"); - struct ccp_tasklet_data { struct completion completion; struct ccp_cmd *cmd; @@ -111,13 +104,6 @@ static LIST_HEAD(ccp_units); static DEFINE_SPINLOCK(ccp_rr_lock); static struct ccp_device *ccp_rr; -/* Ever-increasing value to produce unique unit numbers */ -static atomic_t ccp_unit_ordinal; -static unsigned int ccp_increment_unit_ordinal(void) -{ - return atomic_inc_return(&ccp_unit_ordinal); -} - /** * ccp_add_device - add a CCP device to the list * @@ -415,6 +401,7 @@ static void ccp_do_cmd_complete(unsigned long data) struct ccp_cmd *cmd = tdata->cmd; cmd->callback(cmd->data, cmd->ret); + complete(&tdata->completion); } @@ -464,14 +451,17 @@ int ccp_cmd_queue_thread(void *data) * * @dev: device struct of the CCP */ -struct ccp_device *ccp_alloc_struct(struct device *dev) +struct ccp_device *ccp_alloc_struct(struct sp_device *sp) { + struct device *dev = sp->dev; struct ccp_device *ccp; ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL); if (!ccp) return NULL; ccp->dev = dev; + ccp->sp = sp; + ccp->axcache = sp->axcache; INIT_LIST_HEAD(&ccp->cmd); INIT_LIST_HEAD(&ccp->backlog); @@ -486,9 +476,8 @@ struct ccp_device *ccp_alloc_struct(struct device *dev) init_waitqueue_head(&ccp->sb_queue); init_waitqueue_head(&ccp->suspend_queue); - ccp->ord = ccp_increment_unit_ordinal(); - snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord); - snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", ccp->ord); + snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", sp->ord); + snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", sp->ord); return ccp; } @@ -538,55 +527,100 @@ bool ccp_queues_suspended(struct ccp_device *ccp) return ccp->cmd_q_count == suspended; } -#endif -static int __init ccp_mod_init(void) +int ccp_dev_suspend(struct sp_device *sp, pm_message_t state) { -#ifdef CONFIG_X86 - int ret; + struct ccp_device *ccp = sp->ccp_data; + unsigned long flags; + unsigned int i; - ret = ccp_pci_init(); - if (ret) - return ret; + spin_lock_irqsave(&ccp->cmd_lock, flags); - /* Don't leave the driver loaded if init failed */ - if (ccp_present() != 0) { - ccp_pci_exit(); - return -ENODEV; + ccp->suspending = 1; + + /* Wake all the queue kthreads to prepare for suspend */ + for (i = 0; i < ccp->cmd_q_count; i++) + wake_up_process(ccp->cmd_q[i].kthread); + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* Wait for all queue kthreads to say they're done */ + while (!ccp_queues_suspended(ccp)) + wait_event_interruptible(ccp->suspend_queue, + ccp_queues_suspended(ccp)); + + return 0; +} + +int ccp_dev_resume(struct sp_device *sp) +{ + struct ccp_device *ccp = sp->ccp_data; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->suspending = 0; + + /* Wake up all the kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) { + ccp->cmd_q[i].suspended = 0; + wake_up_process(ccp->cmd_q[i].kthread); } + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + return 0; +} #endif -#ifdef CONFIG_ARM64 +int ccp_dev_init(struct sp_device *sp) +{ + struct device *dev = sp->dev; + struct ccp_device *ccp; int ret; - ret = ccp_platform_init(); + ret = -ENOMEM; + ccp = ccp_alloc_struct(sp); + if (!ccp) + goto e_err; + sp->ccp_data = ccp; + + ccp->vdata = (struct ccp_vdata *)sp->dev_vdata->ccp_vdata; + if (!ccp->vdata || !ccp->vdata->version) { + ret = -ENODEV; + dev_err(dev, "missing driver data\n"); + goto e_err; + } + + ccp->use_tasklet = sp->use_tasklet; + + ccp->io_regs = sp->io_map + ccp->vdata->offset; + if (ccp->vdata->setup) + ccp->vdata->setup(ccp); + + ret = ccp->vdata->perform->init(ccp); if (ret) - return ret; + goto e_err; - /* Don't leave the driver loaded if init failed */ - if (ccp_present() != 0) { - ccp_platform_exit(); - return -ENODEV; - } + dev_notice(dev, "ccp enabled\n"); return 0; -#endif - return -ENODEV; +e_err: + sp->ccp_data = NULL; + + dev_notice(dev, "ccp initialization failed\n"); + + return ret; } -static void __exit ccp_mod_exit(void) +void ccp_dev_destroy(struct sp_device *sp) { -#ifdef CONFIG_X86 - ccp_pci_exit(); -#endif + struct ccp_device *ccp = sp->ccp_data; -#ifdef CONFIG_ARM64 - ccp_platform_exit(); -#endif -} + if (!ccp) + return; -module_init(ccp_mod_init); -module_exit(ccp_mod_exit); + ccp->vdata->perform->destroy(ccp); +} diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index a70154ac7405..6810b65c1939 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> * Author: Gary R Hook <gary.hook@amd.com> @@ -27,6 +27,8 @@ #include <linux/irqreturn.h> #include <linux/dmaengine.h> +#include "sp-dev.h" + #define MAX_CCP_NAME_LEN 16 #define MAX_DMAPOOL_NAME_LEN 32 @@ -192,6 +194,7 @@ #define CCP_AES_CTX_SB_COUNT 1 #define CCP_XTS_AES_KEY_SB_COUNT 1 +#define CCP5_XTS_AES_KEY_SB_COUNT 2 #define CCP_XTS_AES_CTX_SB_COUNT 1 #define CCP_DES3_KEY_SB_COUNT 1 @@ -200,6 +203,7 @@ #define CCP_SHA_SB_COUNT 1 #define CCP_RSA_MAX_WIDTH 4096 +#define CCP5_RSA_MAX_WIDTH 16384 #define CCP_PASSTHRU_BLOCKSIZE 256 #define CCP_PASSTHRU_MASKSIZE 32 @@ -344,12 +348,11 @@ struct ccp_device { char rngname[MAX_CCP_NAME_LEN]; struct device *dev; + struct sp_device *sp; /* Bus specific device information */ void *dev_specific; - int (*get_irq)(struct ccp_device *ccp); - void (*free_irq)(struct ccp_device *ccp); unsigned int qim; unsigned int irq; bool use_tasklet; @@ -362,7 +365,6 @@ struct ccp_device { * them. */ struct mutex req_mutex ____cacheline_aligned; - void __iomem *io_map; void __iomem *io_regs; /* Master lists that all cmds are queued on. Because there can be @@ -497,6 +499,7 @@ struct ccp_aes_op { }; struct ccp_xts_aes_op { + enum ccp_aes_type type; enum ccp_aes_action action; enum ccp_xts_aes_unit_size unit_size; }; @@ -626,18 +629,12 @@ struct ccp5_desc { struct dword7 dw7; }; -int ccp_pci_init(void); -void ccp_pci_exit(void); - -int ccp_platform_init(void); -void ccp_platform_exit(void); - void ccp_add_device(struct ccp_device *ccp); void ccp_del_device(struct ccp_device *ccp); extern void ccp_log_error(struct ccp_device *, int); -struct ccp_device *ccp_alloc_struct(struct device *dev); +struct ccp_device *ccp_alloc_struct(struct sp_device *sp); bool ccp_queues_suspended(struct ccp_device *ccp); int ccp_cmd_queue_thread(void *data); int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait); @@ -669,16 +666,7 @@ struct ccp_actions { irqreturn_t (*irqhandler)(int, void *); }; -/* Structure to hold CCP version-specific values */ -struct ccp_vdata { - const unsigned int version; - const unsigned int dma_chan_attr; - void (*setup)(struct ccp_device *); - const struct ccp_actions *perform; - const unsigned int bar; - const unsigned int offset; -}; - +extern const struct ccp_vdata ccpv3_platform; extern const struct ccp_vdata ccpv3; extern const struct ccp_vdata ccpv5a; extern const struct ccp_vdata ccpv5b; diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index e00be01fbf5a..901343dd513e 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2016 Advanced Micro Devices, Inc. + * Copyright (C) 2016,2017 Advanced Micro Devices, Inc. * * Author: Gary R Hook <gary.hook@amd.com> * diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index c0dfdacbdff5..406b95329b3d 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> * Author: Gary R Hook <gary.hook@amd.com> @@ -168,7 +168,7 @@ static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa, wa->dma.address = dma_map_single(wa->dev, wa->address, len, dir); - if (!wa->dma.address) + if (dma_mapping_error(wa->dev, wa->dma.address)) return -ENOMEM; wa->dma.length = len; @@ -1038,6 +1038,8 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_op op; unsigned int unit_size, dm_offset; bool in_place = false; + unsigned int sb_count; + enum ccp_aes_type aestype; int ret; switch (xts->unit_size) { @@ -1061,7 +1063,11 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, return -EINVAL; } - if (xts->key_len != AES_KEYSIZE_128) + if (xts->key_len == AES_KEYSIZE_128) + aestype = CCP_AES_TYPE_128; + else if (xts->key_len == AES_KEYSIZE_256) + aestype = CCP_AES_TYPE_256; + else return -EINVAL; if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1))) @@ -1083,23 +1089,44 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, op.sb_key = cmd_q->sb_key; op.sb_ctx = cmd_q->sb_ctx; op.init = 1; + op.u.xts.type = aestype; op.u.xts.action = xts->action; op.u.xts.unit_size = xts->unit_size; - /* All supported key sizes fit in a single (32-byte) SB entry - * and must be in little endian format. Use the 256-bit byte - * swap passthru option to convert from big endian to little - * endian. + /* A version 3 device only supports 128-bit keys, which fits into a + * single SB entry. A version 5 device uses a 512-bit vector, so two + * SB entries. */ + if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) + sb_count = CCP_XTS_AES_KEY_SB_COUNT; + else + sb_count = CCP5_XTS_AES_KEY_SB_COUNT; ret = ccp_init_dm_workarea(&key, cmd_q, - CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES, + sb_count * CCP_SB_BYTES, DMA_TO_DEVICE); if (ret) return ret; - dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128; - ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); - ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len); + if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) { + /* All supported key sizes must be in little endian format. + * Use the 256-bit byte swap passthru option to convert from + * big endian to little endian. + */ + dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128; + ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); + ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len); + } else { + /* Version 5 CCPs use a 512-bit space for the key: each portion + * occupies 256 bits, or one entire slot, and is zero-padded. + */ + unsigned int pad; + + dm_offset = CCP_SB_BYTES; + pad = dm_offset - xts->key_len; + ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len); + ccp_set_dm_area(&key, dm_offset + pad, xts->key, xts->key_len, + xts->key_len); + } ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { @@ -1731,42 +1758,53 @@ e_ctx: static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_rsa_engine *rsa = &cmd->u.rsa; - struct ccp_dm_workarea exp, src; - struct ccp_data dst; + struct ccp_dm_workarea exp, src, dst; struct ccp_op op; unsigned int sb_count, i_len, o_len; int ret; - if (rsa->key_size > CCP_RSA_MAX_WIDTH) + /* Check against the maximum allowable size, in bits */ + if (rsa->key_size > cmd_q->ccp->vdata->rsamax) return -EINVAL; if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst) return -EINVAL; + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + /* The RSA modulus must precede the message being acted upon, so * it must be copied to a DMA area where the message and the * modulus can be concatenated. Therefore the input buffer * length required is twice the output buffer length (which - * must be a multiple of 256-bits). + * must be a multiple of 256-bits). Compute o_len, i_len in bytes. + * Buffer sizes must be a multiple of 32 bytes; rounding up may be + * required. */ - o_len = ((rsa->key_size + 255) / 256) * 32; + o_len = 32 * ((rsa->key_size + 255) / 256); i_len = o_len * 2; - sb_count = o_len / CCP_SB_BYTES; - - memset(&op, 0, sizeof(op)); - op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); - op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count); - - if (!op.sb_key) - return -EIO; + sb_count = 0; + if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) { + /* sb_count is the number of storage block slots required + * for the modulus. + */ + sb_count = o_len / CCP_SB_BYTES; + op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, + sb_count); + if (!op.sb_key) + return -EIO; + } else { + /* A version 5 device allows a modulus size that will not fit + * in the LSB, so the command will transfer it from memory. + * Set the sb key to the default, even though it's not used. + */ + op.sb_key = cmd_q->sb_key; + } - /* The RSA exponent may span multiple (32-byte) SB entries and must - * be in little endian format. Reverse copy each 32-byte chunk - * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk) - * and each byte within that chunk and do not perform any byte swap - * operations on the passthru operation. + /* The RSA exponent must be in little endian format. Reverse its + * byte order. */ ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE); if (ret) @@ -1775,11 +1813,22 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len); if (ret) goto e_exp; - ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key, - CCP_PASSTHRU_BYTESWAP_NOOP); - if (ret) { - cmd->engine_error = cmd_q->cmd_error; - goto e_exp; + + if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) { + /* Copy the exponent to the local storage block, using + * as many 32-byte blocks as were allocated above. It's + * already little endian, so no further change is required. + */ + ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_exp; + } + } else { + /* The exponent can be retrieved from memory via DMA. */ + op.exp.u.dma.address = exp.dma.address; + op.exp.u.dma.offset = 0; } /* Concatenate the modulus and the message. Both the modulus and @@ -1798,8 +1847,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_src; /* Prepare the output area for the operation */ - ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len, - o_len, DMA_FROM_DEVICE); + ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE); if (ret) goto e_src; @@ -1807,7 +1855,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) op.src.u.dma.address = src.dma.address; op.src.u.dma.offset = 0; op.src.u.dma.length = i_len; - op.dst.u.dma.address = dst.dm_wa.dma.address; + op.dst.u.dma.address = dst.dma.address; op.dst.u.dma.offset = 0; op.dst.u.dma.length = o_len; @@ -1820,10 +1868,10 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_dst; } - ccp_reverse_get_dm_area(&dst.dm_wa, 0, rsa->dst, 0, rsa->mod_len); + ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len); e_dst: - ccp_free_data(&dst, cmd_q); + ccp_dm_free(&dst); e_src: ccp_dm_free(&src); @@ -1832,7 +1880,8 @@ e_exp: ccp_dm_free(&exp); e_sb: - cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count); + if (sb_count) + cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count); return ret; } @@ -1992,7 +2041,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, memset(&op, 0, sizeof(op)); op.cmd_q = cmd_q; - op.jobid = ccp_gen_jobid(cmd_q->ccp); + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { /* Load the mask */ diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c deleted file mode 100644 index e880d4cf4ada..000000000000 --- a/drivers/crypto/ccp/ccp-pci.c +++ /dev/null @@ -1,356 +0,0 @@ -/* - * AMD Cryptographic Coprocessor (CCP) driver - * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. - * - * Author: Tom Lendacky <thomas.lendacky@amd.com> - * Author: Gary R Hook <gary.hook@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/pci.h> -#include <linux/pci_ids.h> -#include <linux/dma-mapping.h> -#include <linux/kthread.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <linux/delay.h> -#include <linux/ccp.h> - -#include "ccp-dev.h" - -#define MSIX_VECTORS 2 - -struct ccp_msix { - u32 vector; - char name[16]; -}; - -struct ccp_pci { - int msix_count; - struct ccp_msix msix[MSIX_VECTORS]; -}; - -static int ccp_get_msix_irqs(struct ccp_device *ccp) -{ - struct ccp_pci *ccp_pci = ccp->dev_specific; - struct device *dev = ccp->dev; - struct pci_dev *pdev = to_pci_dev(dev); - struct msix_entry msix_entry[MSIX_VECTORS]; - unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1; - int v, ret; - - for (v = 0; v < ARRAY_SIZE(msix_entry); v++) - msix_entry[v].entry = v; - - ret = pci_enable_msix_range(pdev, msix_entry, 1, v); - if (ret < 0) - return ret; - - ccp_pci->msix_count = ret; - for (v = 0; v < ccp_pci->msix_count; v++) { - /* Set the interrupt names and request the irqs */ - snprintf(ccp_pci->msix[v].name, name_len, "%s-%u", - ccp->name, v); - ccp_pci->msix[v].vector = msix_entry[v].vector; - ret = request_irq(ccp_pci->msix[v].vector, - ccp->vdata->perform->irqhandler, - 0, ccp_pci->msix[v].name, dev); - if (ret) { - dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n", - ret); - goto e_irq; - } - } - ccp->use_tasklet = true; - - return 0; - -e_irq: - while (v--) - free_irq(ccp_pci->msix[v].vector, dev); - - pci_disable_msix(pdev); - - ccp_pci->msix_count = 0; - - return ret; -} - -static int ccp_get_msi_irq(struct ccp_device *ccp) -{ - struct device *dev = ccp->dev; - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = pci_enable_msi(pdev); - if (ret) - return ret; - - ccp->irq = pdev->irq; - ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0, - ccp->name, dev); - if (ret) { - dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret); - goto e_msi; - } - ccp->use_tasklet = true; - - return 0; - -e_msi: - pci_disable_msi(pdev); - - return ret; -} - -static int ccp_get_irqs(struct ccp_device *ccp) -{ - struct device *dev = ccp->dev; - int ret; - - ret = ccp_get_msix_irqs(ccp); - if (!ret) - return 0; - - /* Couldn't get MSI-X vectors, try MSI */ - dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret); - ret = ccp_get_msi_irq(ccp); - if (!ret) - return 0; - - /* Couldn't get MSI interrupt */ - dev_notice(dev, "could not enable MSI (%d)\n", ret); - - return ret; -} - -static void ccp_free_irqs(struct ccp_device *ccp) -{ - struct ccp_pci *ccp_pci = ccp->dev_specific; - struct device *dev = ccp->dev; - struct pci_dev *pdev = to_pci_dev(dev); - - if (ccp_pci->msix_count) { - while (ccp_pci->msix_count--) - free_irq(ccp_pci->msix[ccp_pci->msix_count].vector, - dev); - pci_disable_msix(pdev); - } else if (ccp->irq) { - free_irq(ccp->irq, dev); - pci_disable_msi(pdev); - } - ccp->irq = 0; -} - -static int ccp_find_mmio_area(struct ccp_device *ccp) -{ - struct device *dev = ccp->dev; - struct pci_dev *pdev = to_pci_dev(dev); - resource_size_t io_len; - unsigned long io_flags; - - io_flags = pci_resource_flags(pdev, ccp->vdata->bar); - io_len = pci_resource_len(pdev, ccp->vdata->bar); - if ((io_flags & IORESOURCE_MEM) && - (io_len >= (ccp->vdata->offset + 0x800))) - return ccp->vdata->bar; - - return -EIO; -} - -static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - struct ccp_device *ccp; - struct ccp_pci *ccp_pci; - struct device *dev = &pdev->dev; - unsigned int bar; - int ret; - - ret = -ENOMEM; - ccp = ccp_alloc_struct(dev); - if (!ccp) - goto e_err; - - ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL); - if (!ccp_pci) - goto e_err; - - ccp->dev_specific = ccp_pci; - ccp->vdata = (struct ccp_vdata *)id->driver_data; - if (!ccp->vdata || !ccp->vdata->version) { - ret = -ENODEV; - dev_err(dev, "missing driver data\n"); - goto e_err; - } - ccp->get_irq = ccp_get_irqs; - ccp->free_irq = ccp_free_irqs; - - ret = pci_request_regions(pdev, "ccp"); - if (ret) { - dev_err(dev, "pci_request_regions failed (%d)\n", ret); - goto e_err; - } - - ret = pci_enable_device(pdev); - if (ret) { - dev_err(dev, "pci_enable_device failed (%d)\n", ret); - goto e_regions; - } - - pci_set_master(pdev); - - ret = ccp_find_mmio_area(ccp); - if (ret < 0) - goto e_device; - bar = ret; - - ret = -EIO; - ccp->io_map = pci_iomap(pdev, bar, 0); - if (!ccp->io_map) { - dev_err(dev, "pci_iomap failed\n"); - goto e_device; - } - ccp->io_regs = ccp->io_map + ccp->vdata->offset; - - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); - if (ret) { - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); - if (ret) { - dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", - ret); - goto e_iomap; - } - } - - dev_set_drvdata(dev, ccp); - - if (ccp->vdata->setup) - ccp->vdata->setup(ccp); - - ret = ccp->vdata->perform->init(ccp); - if (ret) - goto e_iomap; - - dev_notice(dev, "enabled\n"); - - return 0; - -e_iomap: - pci_iounmap(pdev, ccp->io_map); - -e_device: - pci_disable_device(pdev); - -e_regions: - pci_release_regions(pdev); - -e_err: - dev_notice(dev, "initialization failed\n"); - return ret; -} - -static void ccp_pci_remove(struct pci_dev *pdev) -{ - struct device *dev = &pdev->dev; - struct ccp_device *ccp = dev_get_drvdata(dev); - - if (!ccp) - return; - - ccp->vdata->perform->destroy(ccp); - - pci_iounmap(pdev, ccp->io_map); - - pci_disable_device(pdev); - - pci_release_regions(pdev); - - dev_notice(dev, "disabled\n"); -} - -#ifdef CONFIG_PM -static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state) -{ - struct device *dev = &pdev->dev; - struct ccp_device *ccp = dev_get_drvdata(dev); - unsigned long flags; - unsigned int i; - - spin_lock_irqsave(&ccp->cmd_lock, flags); - - ccp->suspending = 1; - - /* Wake all the queue kthreads to prepare for suspend */ - for (i = 0; i < ccp->cmd_q_count; i++) - wake_up_process(ccp->cmd_q[i].kthread); - - spin_unlock_irqrestore(&ccp->cmd_lock, flags); - - /* Wait for all queue kthreads to say they're done */ - while (!ccp_queues_suspended(ccp)) - wait_event_interruptible(ccp->suspend_queue, - ccp_queues_suspended(ccp)); - - return 0; -} - -static int ccp_pci_resume(struct pci_dev *pdev) -{ - struct device *dev = &pdev->dev; - struct ccp_device *ccp = dev_get_drvdata(dev); - unsigned long flags; - unsigned int i; - - spin_lock_irqsave(&ccp->cmd_lock, flags); - - ccp->suspending = 0; - - /* Wake up all the kthreads */ - for (i = 0; i < ccp->cmd_q_count; i++) { - ccp->cmd_q[i].suspended = 0; - wake_up_process(ccp->cmd_q[i].kthread); - } - - spin_unlock_irqrestore(&ccp->cmd_lock, flags); - - return 0; -} -#endif - -static const struct pci_device_id ccp_pci_table[] = { - { PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 }, - { PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&ccpv5a }, - { PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&ccpv5b }, - /* Last entry must be zero */ - { 0, } -}; -MODULE_DEVICE_TABLE(pci, ccp_pci_table); - -static struct pci_driver ccp_pci_driver = { - .name = "ccp", - .id_table = ccp_pci_table, - .probe = ccp_pci_probe, - .remove = ccp_pci_remove, -#ifdef CONFIG_PM - .suspend = ccp_pci_suspend, - .resume = ccp_pci_resume, -#endif -}; - -int ccp_pci_init(void) -{ - return pci_register_driver(&ccp_pci_driver); -} - -void ccp_pci_exit(void) -{ - pci_unregister_driver(&ccp_pci_driver); -} diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c deleted file mode 100644 index e26969e601ad..000000000000 --- a/drivers/crypto/ccp/ccp-platform.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * AMD Cryptographic Coprocessor (CCP) driver - * - * Copyright (C) 2014,2016 Advanced Micro Devices, Inc. - * - * Author: Tom Lendacky <thomas.lendacky@amd.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/platform_device.h> -#include <linux/ioport.h> -#include <linux/dma-mapping.h> -#include <linux/kthread.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <linux/delay.h> -#include <linux/ccp.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/acpi.h> - -#include "ccp-dev.h" - -struct ccp_platform { - int coherent; -}; - -static const struct acpi_device_id ccp_acpi_match[]; -static const struct of_device_id ccp_of_match[]; - -static struct ccp_vdata *ccp_get_of_version(struct platform_device *pdev) -{ -#ifdef CONFIG_OF - const struct of_device_id *match; - - match = of_match_node(ccp_of_match, pdev->dev.of_node); - if (match && match->data) - return (struct ccp_vdata *)match->data; -#endif - return NULL; -} - -static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev) -{ -#ifdef CONFIG_ACPI - const struct acpi_device_id *match; - - match = acpi_match_device(ccp_acpi_match, &pdev->dev); - if (match && match->driver_data) - return (struct ccp_vdata *)match->driver_data; -#endif - return NULL; -} - -static int ccp_get_irq(struct ccp_device *ccp) -{ - struct device *dev = ccp->dev; - struct platform_device *pdev = to_platform_device(dev); - int ret; - - ret = platform_get_irq(pdev, 0); - if (ret < 0) - return ret; - - ccp->irq = ret; - ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0, - ccp->name, dev); - if (ret) { - dev_notice(dev, "unable to allocate IRQ (%d)\n", ret); - return ret; - } - - return 0; -} - -static int ccp_get_irqs(struct ccp_device *ccp) -{ - struct device *dev = ccp->dev; - int ret; - - ret = ccp_get_irq(ccp); - if (!ret) - return 0; - - /* Couldn't get an interrupt */ - dev_notice(dev, "could not enable interrupts (%d)\n", ret); - - return ret; -} - -static void ccp_free_irqs(struct ccp_device *ccp) -{ - struct device *dev = ccp->dev; - - free_irq(ccp->irq, dev); -} - -static struct resource *ccp_find_mmio_area(struct ccp_device *ccp) -{ - struct device *dev = ccp->dev; - struct platform_device *pdev = to_platform_device(dev); - struct resource *ior; - - ior = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (ior && (resource_size(ior) >= 0x800)) - return ior; - - return NULL; -} - -static int ccp_platform_probe(struct platform_device *pdev) -{ - struct ccp_device *ccp; - struct ccp_platform *ccp_platform; - struct device *dev = &pdev->dev; - enum dev_dma_attr attr; - struct resource *ior; - int ret; - - ret = -ENOMEM; - ccp = ccp_alloc_struct(dev); - if (!ccp) - goto e_err; - - ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL); - if (!ccp_platform) - goto e_err; - - ccp->dev_specific = ccp_platform; - ccp->vdata = pdev->dev.of_node ? ccp_get_of_version(pdev) - : ccp_get_acpi_version(pdev); - if (!ccp->vdata || !ccp->vdata->version) { - ret = -ENODEV; - dev_err(dev, "missing driver data\n"); - goto e_err; - } - ccp->get_irq = ccp_get_irqs; - ccp->free_irq = ccp_free_irqs; - - ior = ccp_find_mmio_area(ccp); - ccp->io_map = devm_ioremap_resource(dev, ior); - if (IS_ERR(ccp->io_map)) { - ret = PTR_ERR(ccp->io_map); - goto e_err; - } - ccp->io_regs = ccp->io_map; - - attr = device_get_dma_attr(dev); - if (attr == DEV_DMA_NOT_SUPPORTED) { - dev_err(dev, "DMA is not supported"); - goto e_err; - } - - ccp_platform->coherent = (attr == DEV_DMA_COHERENT); - if (ccp_platform->coherent) - ccp->axcache = CACHE_WB_NO_ALLOC; - else - ccp->axcache = CACHE_NONE; - - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); - if (ret) { - dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); - goto e_err; - } - - dev_set_drvdata(dev, ccp); - - ret = ccp->vdata->perform->init(ccp); - if (ret) - goto e_err; - - dev_notice(dev, "enabled\n"); - - return 0; - -e_err: - dev_notice(dev, "initialization failed\n"); - return ret; -} - -static int ccp_platform_remove(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct ccp_device *ccp = dev_get_drvdata(dev); - - ccp->vdata->perform->destroy(ccp); - - dev_notice(dev, "disabled\n"); - - return 0; -} - -#ifdef CONFIG_PM -static int ccp_platform_suspend(struct platform_device *pdev, - pm_message_t state) -{ - struct device *dev = &pdev->dev; - struct ccp_device *ccp = dev_get_drvdata(dev); - unsigned long flags; - unsigned int i; - - spin_lock_irqsave(&ccp->cmd_lock, flags); - - ccp->suspending = 1; - - /* Wake all the queue kthreads to prepare for suspend */ - for (i = 0; i < ccp->cmd_q_count; i++) - wake_up_process(ccp->cmd_q[i].kthread); - - spin_unlock_irqrestore(&ccp->cmd_lock, flags); - - /* Wait for all queue kthreads to say they're done */ - while (!ccp_queues_suspended(ccp)) - wait_event_interruptible(ccp->suspend_queue, - ccp_queues_suspended(ccp)); - - return 0; -} - -static int ccp_platform_resume(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct ccp_device *ccp = dev_get_drvdata(dev); - unsigned long flags; - unsigned int i; - - spin_lock_irqsave(&ccp->cmd_lock, flags); - - ccp->suspending = 0; - - /* Wake up all the kthreads */ - for (i = 0; i < ccp->cmd_q_count; i++) { - ccp->cmd_q[i].suspended = 0; - wake_up_process(ccp->cmd_q[i].kthread); - } - - spin_unlock_irqrestore(&ccp->cmd_lock, flags); - - return 0; -} -#endif - -#ifdef CONFIG_ACPI -static const struct acpi_device_id ccp_acpi_match[] = { - { "AMDI0C00", (kernel_ulong_t)&ccpv3 }, - { }, -}; -MODULE_DEVICE_TABLE(acpi, ccp_acpi_match); -#endif - -#ifdef CONFIG_OF -static const struct of_device_id ccp_of_match[] = { - { .compatible = "amd,ccp-seattle-v1a", - .data = (const void *)&ccpv3 }, - { }, -}; -MODULE_DEVICE_TABLE(of, ccp_of_match); -#endif - -static struct platform_driver ccp_platform_driver = { - .driver = { - .name = "ccp", -#ifdef CONFIG_ACPI - .acpi_match_table = ccp_acpi_match, -#endif -#ifdef CONFIG_OF - .of_match_table = ccp_of_match, -#endif - }, - .probe = ccp_platform_probe, - .remove = ccp_platform_remove, -#ifdef CONFIG_PM - .suspend = ccp_platform_suspend, - .resume = ccp_platform_resume, -#endif -}; - -int ccp_platform_init(void) -{ - return platform_driver_register(&ccp_platform_driver); -} - -void ccp_platform_exit(void) -{ - platform_driver_unregister(&ccp_platform_driver); -} diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c new file mode 100644 index 000000000000..bef387c8abfd --- /dev/null +++ b/drivers/crypto/ccp/sp-dev.c @@ -0,0 +1,277 @@ +/* + * AMD Secure Processor driver + * + * Copyright (C) 2017 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + * Author: Brijesh Singh <brijesh.singh@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" +#include "sp-dev.h" + +MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.1.0"); +MODULE_DESCRIPTION("AMD Secure Processor driver"); + +/* List of SPs, SP count, read-write access lock, and access functions + * + * Lock structure: get sp_unit_lock for reading whenever we need to + * examine the SP list. + */ +static DEFINE_RWLOCK(sp_unit_lock); +static LIST_HEAD(sp_units); + +/* Ever-increasing value to produce unique unit numbers */ +static atomic_t sp_ordinal; + +static void sp_add_device(struct sp_device *sp) +{ + unsigned long flags; + + write_lock_irqsave(&sp_unit_lock, flags); + + list_add_tail(&sp->entry, &sp_units); + + write_unlock_irqrestore(&sp_unit_lock, flags); +} + +static void sp_del_device(struct sp_device *sp) +{ + unsigned long flags; + + write_lock_irqsave(&sp_unit_lock, flags); + + list_del(&sp->entry); + + write_unlock_irqrestore(&sp_unit_lock, flags); +} + +static irqreturn_t sp_irq_handler(int irq, void *data) +{ + struct sp_device *sp = data; + + if (sp->ccp_irq_handler) + sp->ccp_irq_handler(irq, sp->ccp_irq_data); + + if (sp->psp_irq_handler) + sp->psp_irq_handler(irq, sp->psp_irq_data); + + return IRQ_HANDLED; +} + +int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data) +{ + int ret; + + if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) { + /* Need a common routine to manage all interrupts */ + sp->ccp_irq_data = data; + sp->ccp_irq_handler = handler; + + if (!sp->irq_registered) { + ret = request_irq(sp->ccp_irq, sp_irq_handler, 0, + sp->name, sp); + if (ret) + return ret; + + sp->irq_registered = true; + } + } else { + /* Each sub-device can manage it's own interrupt */ + ret = request_irq(sp->ccp_irq, handler, 0, name, data); + if (ret) + return ret; + } + + return 0; +} + +int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data) +{ + int ret; + + if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) { + /* Need a common routine to manage all interrupts */ + sp->psp_irq_data = data; + sp->psp_irq_handler = handler; + + if (!sp->irq_registered) { + ret = request_irq(sp->psp_irq, sp_irq_handler, 0, + sp->name, sp); + if (ret) + return ret; + + sp->irq_registered = true; + } + } else { + /* Each sub-device can manage it's own interrupt */ + ret = request_irq(sp->psp_irq, handler, 0, name, data); + if (ret) + return ret; + } + + return 0; +} + +void sp_free_ccp_irq(struct sp_device *sp, void *data) +{ + if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) { + /* Using common routine to manage all interrupts */ + if (!sp->psp_irq_handler) { + /* Nothing else using it, so free it */ + free_irq(sp->ccp_irq, sp); + + sp->irq_registered = false; + } + + sp->ccp_irq_handler = NULL; + sp->ccp_irq_data = NULL; + } else { + /* Each sub-device can manage it's own interrupt */ + free_irq(sp->ccp_irq, data); + } +} + +void sp_free_psp_irq(struct sp_device *sp, void *data) +{ + if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) { + /* Using common routine to manage all interrupts */ + if (!sp->ccp_irq_handler) { + /* Nothing else using it, so free it */ + free_irq(sp->psp_irq, sp); + + sp->irq_registered = false; + } + + sp->psp_irq_handler = NULL; + sp->psp_irq_data = NULL; + } else { + /* Each sub-device can manage it's own interrupt */ + free_irq(sp->psp_irq, data); + } +} + +/** + * sp_alloc_struct - allocate and initialize the sp_device struct + * + * @dev: device struct of the SP + */ +struct sp_device *sp_alloc_struct(struct device *dev) +{ + struct sp_device *sp; + + sp = devm_kzalloc(dev, sizeof(*sp), GFP_KERNEL); + if (!sp) + return NULL; + + sp->dev = dev; + sp->ord = atomic_inc_return(&sp_ordinal); + snprintf(sp->name, SP_MAX_NAME_LEN, "sp-%u", sp->ord); + + return sp; +} + +int sp_init(struct sp_device *sp) +{ + sp_add_device(sp); + + if (sp->dev_vdata->ccp_vdata) + ccp_dev_init(sp); + + return 0; +} + +void sp_destroy(struct sp_device *sp) +{ + if (sp->dev_vdata->ccp_vdata) + ccp_dev_destroy(sp); + + sp_del_device(sp); +} + +#ifdef CONFIG_PM +int sp_suspend(struct sp_device *sp, pm_message_t state) +{ + int ret; + + if (sp->dev_vdata->ccp_vdata) { + ret = ccp_dev_suspend(sp, state); + if (ret) + return ret; + } + + return 0; +} + +int sp_resume(struct sp_device *sp) +{ + int ret; + + if (sp->dev_vdata->ccp_vdata) { + ret = ccp_dev_resume(sp); + if (ret) + return ret; + } + + return 0; +} +#endif + +static int __init sp_mod_init(void) +{ +#ifdef CONFIG_X86 + int ret; + + ret = sp_pci_init(); + if (ret) + return ret; + + return 0; +#endif + +#ifdef CONFIG_ARM64 + int ret; + + ret = sp_platform_init(); + if (ret) + return ret; + + return 0; +#endif + + return -ENODEV; +} + +static void __exit sp_mod_exit(void) +{ +#ifdef CONFIG_X86 + sp_pci_exit(); +#endif + +#ifdef CONFIG_ARM64 + sp_platform_exit(); +#endif +} + +module_init(sp_mod_init); +module_exit(sp_mod_exit); diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h new file mode 100644 index 000000000000..5ab486ade1ad --- /dev/null +++ b/drivers/crypto/ccp/sp-dev.h @@ -0,0 +1,133 @@ +/* + * AMD Secure Processor driver + * + * Copyright (C) 2017 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + * Author: Brijesh Singh <brijesh.singh@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __SP_DEV_H__ +#define __SP_DEV_H__ + +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/dmapool.h> +#include <linux/hw_random.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/irqreturn.h> + +#define SP_MAX_NAME_LEN 32 + +#define CACHE_NONE 0x00 +#define CACHE_WB_NO_ALLOC 0xb7 + +/* Structure to hold CCP device data */ +struct ccp_device; +struct ccp_vdata { + const unsigned int version; + const unsigned int dma_chan_attr; + void (*setup)(struct ccp_device *); + const struct ccp_actions *perform; + const unsigned int offset; + const unsigned int rsamax; +}; +/* Structure to hold SP device data */ +struct sp_dev_vdata { + const unsigned int bar; + + const struct ccp_vdata *ccp_vdata; + void *psp_vdata; +}; + +struct sp_device { + struct list_head entry; + + struct device *dev; + + struct sp_dev_vdata *dev_vdata; + unsigned int ord; + char name[SP_MAX_NAME_LEN]; + + /* Bus specific device information */ + void *dev_specific; + + /* I/O area used for device communication. */ + void __iomem *io_map; + + /* DMA caching attribute support */ + unsigned int axcache; + + bool irq_registered; + bool use_tasklet; + + unsigned int ccp_irq; + irq_handler_t ccp_irq_handler; + void *ccp_irq_data; + + unsigned int psp_irq; + irq_handler_t psp_irq_handler; + void *psp_irq_data; + + void *ccp_data; + void *psp_data; +}; + +int sp_pci_init(void); +void sp_pci_exit(void); + +int sp_platform_init(void); +void sp_platform_exit(void); + +struct sp_device *sp_alloc_struct(struct device *dev); + +int sp_init(struct sp_device *sp); +void sp_destroy(struct sp_device *sp); +struct sp_device *sp_get_master(void); + +int sp_suspend(struct sp_device *sp, pm_message_t state); +int sp_resume(struct sp_device *sp); +int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data); +void sp_free_ccp_irq(struct sp_device *sp, void *data); +int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data); +void sp_free_psp_irq(struct sp_device *sp, void *data); + +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + +int ccp_dev_init(struct sp_device *sp); +void ccp_dev_destroy(struct sp_device *sp); + +int ccp_dev_suspend(struct sp_device *sp, pm_message_t state); +int ccp_dev_resume(struct sp_device *sp); + +#else /* !CONFIG_CRYPTO_DEV_SP_CCP */ + +static inline int ccp_dev_init(struct sp_device *sp) +{ + return 0; +} +static inline void ccp_dev_destroy(struct sp_device *sp) { } + +static inline int ccp_dev_suspend(struct sp_device *sp, pm_message_t state) +{ + return 0; +} +static inline int ccp_dev_resume(struct sp_device *sp) +{ + return 0; +} +#endif /* CONFIG_CRYPTO_DEV_SP_CCP */ + +#endif diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c new file mode 100644 index 000000000000..9859aa683a28 --- /dev/null +++ b/drivers/crypto/ccp/sp-pci.c @@ -0,0 +1,276 @@ +/* + * AMD Secure Processor device driver + * + * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/dma-mapping.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +#define MSIX_VECTORS 2 + +struct sp_pci { + int msix_count; + struct msix_entry msix_entry[MSIX_VECTORS]; +}; + +static int sp_get_msix_irqs(struct sp_device *sp) +{ + struct sp_pci *sp_pci = sp->dev_specific; + struct device *dev = sp->dev; + struct pci_dev *pdev = to_pci_dev(dev); + int v, ret; + + for (v = 0; v < ARRAY_SIZE(sp_pci->msix_entry); v++) + sp_pci->msix_entry[v].entry = v; + + ret = pci_enable_msix_range(pdev, sp_pci->msix_entry, 1, v); + if (ret < 0) + return ret; + + sp_pci->msix_count = ret; + sp->use_tasklet = true; + + sp->psp_irq = sp_pci->msix_entry[0].vector; + sp->ccp_irq = (sp_pci->msix_count > 1) ? sp_pci->msix_entry[1].vector + : sp_pci->msix_entry[0].vector; + return 0; +} + +static int sp_get_msi_irq(struct sp_device *sp) +{ + struct device *dev = sp->dev; + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_enable_msi(pdev); + if (ret) + return ret; + + sp->ccp_irq = pdev->irq; + sp->psp_irq = pdev->irq; + + return 0; +} + +static int sp_get_irqs(struct sp_device *sp) +{ + struct device *dev = sp->dev; + int ret; + + ret = sp_get_msix_irqs(sp); + if (!ret) + return 0; + + /* Couldn't get MSI-X vectors, try MSI */ + dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret); + ret = sp_get_msi_irq(sp); + if (!ret) + return 0; + + /* Couldn't get MSI interrupt */ + dev_notice(dev, "could not enable MSI (%d)\n", ret); + + return ret; +} + +static void sp_free_irqs(struct sp_device *sp) +{ + struct sp_pci *sp_pci = sp->dev_specific; + struct device *dev = sp->dev; + struct pci_dev *pdev = to_pci_dev(dev); + + if (sp_pci->msix_count) + pci_disable_msix(pdev); + else if (sp->psp_irq) + pci_disable_msi(pdev); + + sp->ccp_irq = 0; + sp->psp_irq = 0; +} + +static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct sp_device *sp; + struct sp_pci *sp_pci; + struct device *dev = &pdev->dev; + void __iomem * const *iomap_table; + int bar_mask; + int ret; + + ret = -ENOMEM; + sp = sp_alloc_struct(dev); + if (!sp) + goto e_err; + + sp_pci = devm_kzalloc(dev, sizeof(*sp_pci), GFP_KERNEL); + if (!sp_pci) + goto e_err; + + sp->dev_specific = sp_pci; + sp->dev_vdata = (struct sp_dev_vdata *)id->driver_data; + if (!sp->dev_vdata) { + ret = -ENODEV; + dev_err(dev, "missing driver data\n"); + goto e_err; + } + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "pcim_enable_device failed (%d)\n", ret); + goto e_err; + } + + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); + ret = pcim_iomap_regions(pdev, bar_mask, "ccp"); + if (ret) { + dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret); + goto e_err; + } + + iomap_table = pcim_iomap_table(pdev); + if (!iomap_table) { + dev_err(dev, "pcim_iomap_table failed\n"); + ret = -ENOMEM; + goto e_err; + } + + sp->io_map = iomap_table[sp->dev_vdata->bar]; + if (!sp->io_map) { + dev_err(dev, "ioremap failed\n"); + ret = -ENOMEM; + goto e_err; + } + + ret = sp_get_irqs(sp); + if (ret) + goto e_err; + + pci_set_master(pdev); + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", + ret); + goto e_err; + } + } + + dev_set_drvdata(dev, sp); + + ret = sp_init(sp); + if (ret) + goto e_err; + + dev_notice(dev, "enabled\n"); + + return 0; + +e_err: + dev_notice(dev, "initialization failed\n"); + return ret; +} + +static void sp_pci_remove(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + if (!sp) + return; + + sp_destroy(sp); + + sp_free_irqs(sp); + + dev_notice(dev, "disabled\n"); +} + +#ifdef CONFIG_PM +static int sp_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + return sp_suspend(sp, state); +} + +static int sp_pci_resume(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + return sp_resume(sp); +} +#endif + +static const struct sp_dev_vdata dev_vdata[] = { + { + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv3, +#endif + }, + { + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv5a, +#endif + }, + { + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv5b, +#endif + }, +}; +static const struct pci_device_id sp_pci_table[] = { + { PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] }, + { PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&dev_vdata[1] }, + { PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&dev_vdata[2] }, + /* Last entry must be zero */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, sp_pci_table); + +static struct pci_driver sp_pci_driver = { + .name = "ccp", + .id_table = sp_pci_table, + .probe = sp_pci_probe, + .remove = sp_pci_remove, +#ifdef CONFIG_PM + .suspend = sp_pci_suspend, + .resume = sp_pci_resume, +#endif +}; + +int sp_pci_init(void) +{ + return pci_register_driver(&sp_pci_driver); +} + +void sp_pci_exit(void) +{ + pci_unregister_driver(&sp_pci_driver); +} diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c new file mode 100644 index 000000000000..71734f254fd1 --- /dev/null +++ b/drivers/crypto/ccp/sp-platform.c @@ -0,0 +1,256 @@ +/* + * AMD Secure Processor device driver + * + * Copyright (C) 2014,2016 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/ioport.h> +#include <linux/dma-mapping.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/ccp.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/acpi.h> + +#include "ccp-dev.h" + +struct sp_platform { + int coherent; + unsigned int irq_count; +}; + +static const struct acpi_device_id sp_acpi_match[]; +static const struct of_device_id sp_of_match[]; + +static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev) +{ +#ifdef CONFIG_OF + const struct of_device_id *match; + + match = of_match_node(sp_of_match, pdev->dev.of_node); + if (match && match->data) + return (struct sp_dev_vdata *)match->data; +#endif + return NULL; +} + +static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev) +{ +#ifdef CONFIG_ACPI + const struct acpi_device_id *match; + + match = acpi_match_device(sp_acpi_match, &pdev->dev); + if (match && match->driver_data) + return (struct sp_dev_vdata *)match->driver_data; +#endif + return NULL; +} + +static int sp_get_irqs(struct sp_device *sp) +{ + struct sp_platform *sp_platform = sp->dev_specific; + struct device *dev = sp->dev; + struct platform_device *pdev = to_platform_device(dev); + unsigned int i, count; + int ret; + + for (i = 0, count = 0; i < pdev->num_resources; i++) { + struct resource *res = &pdev->resource[i]; + + if (resource_type(res) == IORESOURCE_IRQ) + count++; + } + + sp_platform->irq_count = count; + + ret = platform_get_irq(pdev, 0); + if (ret < 0) { + dev_notice(dev, "unable to get IRQ (%d)\n", ret); + return ret; + } + + sp->psp_irq = ret; + if (count == 1) { + sp->ccp_irq = ret; + } else { + ret = platform_get_irq(pdev, 1); + if (ret < 0) { + dev_notice(dev, "unable to get IRQ (%d)\n", ret); + return ret; + } + + sp->ccp_irq = ret; + } + + return 0; +} + +static int sp_platform_probe(struct platform_device *pdev) +{ + struct sp_device *sp; + struct sp_platform *sp_platform; + struct device *dev = &pdev->dev; + enum dev_dma_attr attr; + struct resource *ior; + int ret; + + ret = -ENOMEM; + sp = sp_alloc_struct(dev); + if (!sp) + goto e_err; + + sp_platform = devm_kzalloc(dev, sizeof(*sp_platform), GFP_KERNEL); + if (!sp_platform) + goto e_err; + + sp->dev_specific = sp_platform; + sp->dev_vdata = pdev->dev.of_node ? sp_get_of_version(pdev) + : sp_get_acpi_version(pdev); + if (!sp->dev_vdata) { + ret = -ENODEV; + dev_err(dev, "missing driver data\n"); + goto e_err; + } + + ior = platform_get_resource(pdev, IORESOURCE_MEM, 0); + sp->io_map = devm_ioremap_resource(dev, ior); + if (IS_ERR(sp->io_map)) { + ret = PTR_ERR(sp->io_map); + goto e_err; + } + + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); + goto e_err; + } + + sp_platform->coherent = (attr == DEV_DMA_COHERENT); + if (sp_platform->coherent) + sp->axcache = CACHE_WB_NO_ALLOC; + else + sp->axcache = CACHE_NONE; + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + goto e_err; + } + + ret = sp_get_irqs(sp); + if (ret) + goto e_err; + + dev_set_drvdata(dev, sp); + + ret = sp_init(sp); + if (ret) + goto e_err; + + dev_notice(dev, "enabled\n"); + + return 0; + +e_err: + dev_notice(dev, "initialization failed\n"); + return ret; +} + +static int sp_platform_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + sp_destroy(sp); + + dev_notice(dev, "disabled\n"); + + return 0; +} + +#ifdef CONFIG_PM +static int sp_platform_suspend(struct platform_device *pdev, + pm_message_t state) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + return sp_suspend(sp, state); +} + +static int sp_platform_resume(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + return sp_resume(sp); +} +#endif + +static const struct sp_dev_vdata dev_vdata[] = { + { + .bar = 0, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv3_platform, +#endif + }, +}; + +#ifdef CONFIG_ACPI +static const struct acpi_device_id sp_acpi_match[] = { + { "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, sp_acpi_match); +#endif + +#ifdef CONFIG_OF +static const struct of_device_id sp_of_match[] = { + { .compatible = "amd,ccp-seattle-v1a", + .data = (const void *)&dev_vdata[0] }, + { }, +}; +MODULE_DEVICE_TABLE(of, sp_of_match); +#endif + +static struct platform_driver sp_platform_driver = { + .driver = { + .name = "ccp", +#ifdef CONFIG_ACPI + .acpi_match_table = sp_acpi_match, +#endif +#ifdef CONFIG_OF + .of_match_table = sp_of_match, +#endif + }, + .probe = sp_platform_probe, + .remove = sp_platform_remove, +#ifdef CONFIG_PM + .suspend = sp_platform_suspend, + .resume = sp_platform_resume, +#endif +}; + +int sp_platform_init(void) +{ + return platform_driver_register(&sp_platform_driver); +} + +void sp_platform_exit(void) +{ + platform_driver_unregister(&sp_platform_driver); +} diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c index fe538e5287a5..eb2a0a73cbed 100644 --- a/drivers/crypto/geode-aes.c +++ b/drivers/crypto/geode-aes.c @@ -1,10 +1,10 @@ /* Copyright (C) 2004-2006, Advanced Micro Devices, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ #include <linux/module.h> #include <linux/kernel.h> @@ -30,6 +30,7 @@ static inline void _writefield(u32 offset, void *value) { int i; + for (i = 0; i < 4; i++) iowrite32(((u32 *) value)[i], _iobase + offset + (i * 4)); } @@ -39,6 +40,7 @@ static inline void _readfield(u32 offset, void *value) { int i; + for (i = 0; i < 4; i++) ((u32 *) value)[i] = ioread32(_iobase + offset + (i * 4)); } @@ -515,6 +517,7 @@ static void geode_aes_remove(struct pci_dev *dev) static int geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id) { int ret; + ret = pci_enable_device(dev); if (ret) return ret; @@ -570,7 +573,7 @@ static int geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id) } static struct pci_device_id geode_aes_tbl[] = { - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_LX_AES), } , + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_LX_AES), }, { 0, } }; diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index 0c6a917a9ab8..b87000a0a01c 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -1054,7 +1054,7 @@ res_err: static int img_hash_remove(struct platform_device *pdev) { - static struct img_hash_dev *hdev; + struct img_hash_dev *hdev; hdev = platform_get_drvdata(pdev); spin_lock(&img_hash.lock); diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 1fabd4aee81b..89ba9e85c0f3 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -839,9 +839,10 @@ static int safexcel_probe(struct platform_device *pdev) snprintf(irq_name, 6, "ring%d", i); irq = safexcel_request_ring_irq(pdev, irq_name, safexcel_irq_ring, ring_irq); - - if (irq < 0) + if (irq < 0) { + ret = irq; goto err_clk; + } priv->ring[i].work_data.priv = priv; priv->ring[i].work_data.ring = i; diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c index 000b6500a22d..b182e941b0cd 100644 --- a/drivers/crypto/mediatek/mtk-platform.c +++ b/drivers/crypto/mediatek/mtk-platform.c @@ -500,7 +500,7 @@ static int mtk_crypto_probe(struct platform_device *pdev) cryp->irq[i] = platform_get_irq(pdev, i); if (cryp->irq[i] < 0) { dev_err(cryp->dev, "no IRQ:%d resource info\n", i); - return -ENXIO; + return cryp->irq[i]; } } diff --git a/drivers/crypto/mxc-scc.c b/drivers/crypto/mxc-scc.c index ee4be1b0d30b..e01c46387df8 100644 --- a/drivers/crypto/mxc-scc.c +++ b/drivers/crypto/mxc-scc.c @@ -708,8 +708,8 @@ static int mxc_scc_probe(struct platform_device *pdev) for (i = 0; i < 2; i++) { irq = platform_get_irq(pdev, i); if (irq < 0) { - dev_err(dev, "failed to get irq resource\n"); - ret = -EINVAL; + dev_err(dev, "failed to get irq resource: %d\n", irq); + ret = irq; goto err_out; } diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 625ee50fd78b..764be3e6933c 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -908,12 +908,16 @@ static int mxs_dcp_probe(struct platform_device *pdev) iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); dcp_vmi_irq = platform_get_irq(pdev, 0); - if (dcp_vmi_irq < 0) + if (dcp_vmi_irq < 0) { + dev_err(dev, "Failed to get IRQ: (%d)!\n", dcp_vmi_irq); return dcp_vmi_irq; + } dcp_irq = platform_get_irq(pdev, 1); - if (dcp_irq < 0) + if (dcp_irq < 0) { + dev_err(dev, "Failed to get IRQ: (%d)!\n", dcp_irq); return dcp_irq; + } sdcp = devm_kzalloc(dev, sizeof(*sdcp), GFP_KERNEL); if (!sdcp) diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 269451375b63..a9fd8b9e86cd 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1730,8 +1730,8 @@ static int spu_mdesc_walk_arcs(struct mdesc_handle *mdesc, continue; id = mdesc_get_property(mdesc, tgt, "id", NULL); if (table[*id] != NULL) { - dev_err(&dev->dev, "%s: SPU cpu slot already set.\n", - dev->dev.of_node->full_name); + dev_err(&dev->dev, "%pOF: SPU cpu slot already set.\n", + dev->dev.of_node); return -EINVAL; } cpumask_set_cpu(*id, &p->sharing); @@ -1751,8 +1751,8 @@ static int handle_exec_unit(struct spu_mdesc_info *ip, struct list_head *list, p = kzalloc(sizeof(struct spu_queue), GFP_KERNEL); if (!p) { - dev_err(&dev->dev, "%s: Could not allocate SPU queue.\n", - dev->dev.of_node->full_name); + dev_err(&dev->dev, "%pOF: Could not allocate SPU queue.\n", + dev->dev.of_node); return -ENOMEM; } @@ -1981,41 +1981,39 @@ static void n2_spu_driver_version(void) static int n2_crypto_probe(struct platform_device *dev) { struct mdesc_handle *mdesc; - const char *full_name; struct n2_crypto *np; int err; n2_spu_driver_version(); - full_name = dev->dev.of_node->full_name; - pr_info("Found N2CP at %s\n", full_name); + pr_info("Found N2CP at %pOF\n", dev->dev.of_node); np = alloc_n2cp(); if (!np) { - dev_err(&dev->dev, "%s: Unable to allocate n2cp.\n", - full_name); + dev_err(&dev->dev, "%pOF: Unable to allocate n2cp.\n", + dev->dev.of_node); return -ENOMEM; } err = grab_global_resources(); if (err) { - dev_err(&dev->dev, "%s: Unable to grab " - "global resources.\n", full_name); + dev_err(&dev->dev, "%pOF: Unable to grab global resources.\n", + dev->dev.of_node); goto out_free_n2cp; } mdesc = mdesc_grab(); if (!mdesc) { - dev_err(&dev->dev, "%s: Unable to grab MDESC.\n", - full_name); + dev_err(&dev->dev, "%pOF: Unable to grab MDESC.\n", + dev->dev.of_node); err = -ENODEV; goto out_free_global; } err = grab_mdesc_irq_props(mdesc, dev, &np->cwq_info, "n2cp"); if (err) { - dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n", - full_name); + dev_err(&dev->dev, "%pOF: Unable to grab IRQ props.\n", + dev->dev.of_node); mdesc_release(mdesc); goto out_free_global; } @@ -2026,15 +2024,15 @@ static int n2_crypto_probe(struct platform_device *dev) mdesc_release(mdesc); if (err) { - dev_err(&dev->dev, "%s: CWQ MDESC scan failed.\n", - full_name); + dev_err(&dev->dev, "%pOF: CWQ MDESC scan failed.\n", + dev->dev.of_node); goto out_free_global; } err = n2_register_algs(); if (err) { - dev_err(&dev->dev, "%s: Unable to register algorithms.\n", - full_name); + dev_err(&dev->dev, "%pOF: Unable to register algorithms.\n", + dev->dev.of_node); goto out_free_spu_list; } @@ -2092,42 +2090,40 @@ static void free_ncp(struct n2_mau *mp) static int n2_mau_probe(struct platform_device *dev) { struct mdesc_handle *mdesc; - const char *full_name; struct n2_mau *mp; int err; n2_spu_driver_version(); - full_name = dev->dev.of_node->full_name; - pr_info("Found NCP at %s\n", full_name); + pr_info("Found NCP at %pOF\n", dev->dev.of_node); mp = alloc_ncp(); if (!mp) { - dev_err(&dev->dev, "%s: Unable to allocate ncp.\n", - full_name); + dev_err(&dev->dev, "%pOF: Unable to allocate ncp.\n", + dev->dev.of_node); return -ENOMEM; } err = grab_global_resources(); if (err) { - dev_err(&dev->dev, "%s: Unable to grab " - "global resources.\n", full_name); + dev_err(&dev->dev, "%pOF: Unable to grab global resources.\n", + dev->dev.of_node); goto out_free_ncp; } mdesc = mdesc_grab(); if (!mdesc) { - dev_err(&dev->dev, "%s: Unable to grab MDESC.\n", - full_name); + dev_err(&dev->dev, "%pOF: Unable to grab MDESC.\n", + dev->dev.of_node); err = -ENODEV; goto out_free_global; } err = grab_mdesc_irq_props(mdesc, dev, &mp->mau_info, "ncp"); if (err) { - dev_err(&dev->dev, "%s: Unable to grab IRQ props.\n", - full_name); + dev_err(&dev->dev, "%pOF: Unable to grab IRQ props.\n", + dev->dev.of_node); mdesc_release(mdesc); goto out_free_global; } @@ -2138,8 +2134,8 @@ static int n2_mau_probe(struct platform_device *dev) mdesc_release(mdesc); if (err) { - dev_err(&dev->dev, "%s: MAU MDESC scan failed.\n", - full_name); + dev_err(&dev->dev, "%pOF: MAU MDESC scan failed.\n", + dev->dev.of_node); goto out_free_global; } diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 5120a17731d0..c376a3ee7c2c 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1095,6 +1095,7 @@ static int omap_aes_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(dev, "can't get IRQ resource\n"); + err = irq; goto err_irq; } diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 0bcab00e0ff5..d37c9506c36c 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -1023,7 +1023,8 @@ static int omap_des_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "can't get IRQ resource\n"); + dev_err(dev, "can't get IRQ resource: %d\n", irq); + err = irq; goto err_irq; } diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 9ad9d399daf1..c40ac30ec002 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2133,7 +2133,7 @@ data_err: static int omap_sham_remove(struct platform_device *pdev) { - static struct omap_sham_dev *dd; + struct omap_sham_dev *dd; int i, j; dd = platform_get_drvdata(pdev); diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index d3e25c37dc33..da8a2d3b5e9a 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -208,7 +208,7 @@ static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev) static void adf_resume(struct pci_dev *pdev) { dev_info(&pdev->dev, "Acceleration driver reset completed\n"); - dev_info(&pdev->dev, "Device is up and runnig\n"); + dev_info(&pdev->dev, "Device is up and running\n"); } static const struct pci_error_handlers adf_err_handler = { diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index d0f80c6241f9..c9d622abd90c 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -169,50 +169,82 @@ static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) { struct rk_crypto_info *dev = platform_get_drvdata(dev_id); u32 interrupt_status; - int err = 0; spin_lock(&dev->lock); interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS); CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status); + if (interrupt_status & 0x0a) { dev_warn(dev->dev, "DMA Error\n"); - err = -EFAULT; - } else if (interrupt_status & 0x05) { - err = dev->update(dev); + dev->err = -EFAULT; } - if (err) - dev->complete(dev, err); + tasklet_schedule(&dev->done_task); + spin_unlock(&dev->lock); return IRQ_HANDLED; } -static void rk_crypto_tasklet_cb(unsigned long data) +static int rk_crypto_enqueue(struct rk_crypto_info *dev, + struct crypto_async_request *async_req) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&dev->lock, flags); + ret = crypto_enqueue_request(&dev->queue, async_req); + if (dev->busy) { + spin_unlock_irqrestore(&dev->lock, flags); + return ret; + } + dev->busy = true; + spin_unlock_irqrestore(&dev->lock, flags); + tasklet_schedule(&dev->queue_task); + + return ret; +} + +static void rk_crypto_queue_task_cb(unsigned long data) { struct rk_crypto_info *dev = (struct rk_crypto_info *)data; struct crypto_async_request *async_req, *backlog; unsigned long flags; int err = 0; + dev->err = 0; spin_lock_irqsave(&dev->lock, flags); backlog = crypto_get_backlog(&dev->queue); async_req = crypto_dequeue_request(&dev->queue); - spin_unlock_irqrestore(&dev->lock, flags); + if (!async_req) { - dev_err(dev->dev, "async_req is NULL !!\n"); + dev->busy = false; + spin_unlock_irqrestore(&dev->lock, flags); return; } + spin_unlock_irqrestore(&dev->lock, flags); + if (backlog) { backlog->complete(backlog, -EINPROGRESS); backlog = NULL; } - if (crypto_tfm_alg_type(async_req->tfm) == CRYPTO_ALG_TYPE_ABLKCIPHER) - dev->ablk_req = ablkcipher_request_cast(async_req); - else - dev->ahash_req = ahash_request_cast(async_req); + dev->async_req = async_req; err = dev->start(dev); if (err) - dev->complete(dev, err); + dev->complete(dev->async_req, err); +} + +static void rk_crypto_done_task_cb(unsigned long data) +{ + struct rk_crypto_info *dev = (struct rk_crypto_info *)data; + + if (dev->err) { + dev->complete(dev->async_req, dev->err); + return; + } + + dev->err = dev->update(dev); + if (dev->err) + dev->complete(dev->async_req, dev->err); } static struct rk_crypto_tmp *rk_cipher_algs[] = { @@ -361,14 +393,18 @@ static int rk_crypto_probe(struct platform_device *pdev) crypto_info->dev = &pdev->dev; platform_set_drvdata(pdev, crypto_info); - tasklet_init(&crypto_info->crypto_tasklet, - rk_crypto_tasklet_cb, (unsigned long)crypto_info); + tasklet_init(&crypto_info->queue_task, + rk_crypto_queue_task_cb, (unsigned long)crypto_info); + tasklet_init(&crypto_info->done_task, + rk_crypto_done_task_cb, (unsigned long)crypto_info); crypto_init_queue(&crypto_info->queue, 50); crypto_info->enable_clk = rk_crypto_enable_clk; crypto_info->disable_clk = rk_crypto_disable_clk; crypto_info->load_data = rk_load_data; crypto_info->unload_data = rk_unload_data; + crypto_info->enqueue = rk_crypto_enqueue; + crypto_info->busy = false; err = rk_crypto_register(crypto_info); if (err) { @@ -380,7 +416,8 @@ static int rk_crypto_probe(struct platform_device *pdev) return 0; err_register_alg: - tasklet_kill(&crypto_info->crypto_tasklet); + tasklet_kill(&crypto_info->queue_task); + tasklet_kill(&crypto_info->done_task); err_crypto: return err; } @@ -390,7 +427,8 @@ static int rk_crypto_remove(struct platform_device *pdev) struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev); rk_crypto_unregister(); - tasklet_kill(&crypto_tmp->crypto_tasklet); + tasklet_kill(&crypto_tmp->done_task); + tasklet_kill(&crypto_tmp->queue_task); return 0; } diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index d7b71fea320b..ab6a1b4c40f0 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -190,9 +190,10 @@ struct rk_crypto_info { void __iomem *reg; int irq; struct crypto_queue queue; - struct tasklet_struct crypto_tasklet; - struct ablkcipher_request *ablk_req; - struct ahash_request *ahash_req; + struct tasklet_struct queue_task; + struct tasklet_struct done_task; + struct crypto_async_request *async_req; + int err; /* device lock */ spinlock_t lock; @@ -208,18 +209,20 @@ struct rk_crypto_info { size_t nents; unsigned int total; unsigned int count; - u32 mode; dma_addr_t addr_in; dma_addr_t addr_out; + bool busy; int (*start)(struct rk_crypto_info *dev); int (*update)(struct rk_crypto_info *dev); - void (*complete)(struct rk_crypto_info *dev, int err); + void (*complete)(struct crypto_async_request *base, int err); int (*enable_clk)(struct rk_crypto_info *dev); void (*disable_clk)(struct rk_crypto_info *dev); int (*load_data)(struct rk_crypto_info *dev, struct scatterlist *sg_src, struct scatterlist *sg_dst); void (*unload_data)(struct rk_crypto_info *dev); + int (*enqueue)(struct rk_crypto_info *dev, + struct crypto_async_request *async_req); }; /* the private variable of hash */ @@ -232,12 +235,14 @@ struct rk_ahash_ctx { /* the privete variable of hash for fallback */ struct rk_ahash_rctx { struct ahash_request fallback_req; + u32 mode; }; /* the private variable of cipher */ struct rk_cipher_ctx { struct rk_crypto_info *dev; unsigned int keylen; + u32 mode; }; enum alg_type { diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c index b5a3afe222e4..639c15c5364b 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c @@ -15,35 +15,19 @@ #define RK_CRYPTO_DEC BIT(0) -static void rk_crypto_complete(struct rk_crypto_info *dev, int err) +static void rk_crypto_complete(struct crypto_async_request *base, int err) { - if (dev->ablk_req->base.complete) - dev->ablk_req->base.complete(&dev->ablk_req->base, err); + if (base->complete) + base->complete(base, err); } static int rk_handle_req(struct rk_crypto_info *dev, struct ablkcipher_request *req) { - unsigned long flags; - int err; - if (!IS_ALIGNED(req->nbytes, dev->align_size)) return -EINVAL; - - dev->left_bytes = req->nbytes; - dev->total = req->nbytes; - dev->sg_src = req->src; - dev->first = req->src; - dev->nents = sg_nents(req->src); - dev->sg_dst = req->dst; - dev->aligned = 1; - dev->ablk_req = req; - - spin_lock_irqsave(&dev->lock, flags); - err = ablkcipher_enqueue_request(&dev->queue, req); - spin_unlock_irqrestore(&dev->lock, flags); - tasklet_schedule(&dev->crypto_tasklet); - return err; + else + return dev->enqueue(dev, &req->base); } static int rk_aes_setkey(struct crypto_ablkcipher *cipher, @@ -93,7 +77,7 @@ static int rk_aes_ecb_encrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_AES_ECB_MODE; + ctx->mode = RK_CRYPTO_AES_ECB_MODE; return rk_handle_req(dev, req); } @@ -103,7 +87,7 @@ static int rk_aes_ecb_decrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; + ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -113,7 +97,7 @@ static int rk_aes_cbc_encrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_AES_CBC_MODE; + ctx->mode = RK_CRYPTO_AES_CBC_MODE; return rk_handle_req(dev, req); } @@ -123,7 +107,7 @@ static int rk_aes_cbc_decrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; + ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -133,7 +117,7 @@ static int rk_des_ecb_encrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = 0; + ctx->mode = 0; return rk_handle_req(dev, req); } @@ -143,7 +127,7 @@ static int rk_des_ecb_decrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_DEC; + ctx->mode = RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -153,7 +137,7 @@ static int rk_des_cbc_encrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; + ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; return rk_handle_req(dev, req); } @@ -163,7 +147,7 @@ static int rk_des_cbc_decrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; + ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -173,7 +157,7 @@ static int rk_des3_ede_ecb_encrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_TDES_SELECT; + ctx->mode = RK_CRYPTO_TDES_SELECT; return rk_handle_req(dev, req); } @@ -183,7 +167,7 @@ static int rk_des3_ede_ecb_decrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; + ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } @@ -193,7 +177,7 @@ static int rk_des3_ede_cbc_encrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; + ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; return rk_handle_req(dev, req); } @@ -203,15 +187,16 @@ static int rk_des3_ede_cbc_decrypt(struct ablkcipher_request *req) struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); struct rk_crypto_info *dev = ctx->dev; - dev->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | + ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; return rk_handle_req(dev, req); } static void rk_ablk_hw_init(struct rk_crypto_info *dev) { - struct crypto_ablkcipher *cipher = - crypto_ablkcipher_reqtfm(dev->ablk_req); + struct ablkcipher_request *req = + ablkcipher_request_cast(dev->async_req); + struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req); struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(cipher); u32 ivsize, block, conf_reg = 0; @@ -220,25 +205,23 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) ivsize = crypto_ablkcipher_ivsize(cipher); if (block == DES_BLOCK_SIZE) { - dev->mode |= RK_CRYPTO_TDES_FIFO_MODE | + ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | RK_CRYPTO_TDES_BYTESWAP_KEY | RK_CRYPTO_TDES_BYTESWAP_IV; - CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, dev->mode); - memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, - dev->ablk_req->info, ivsize); + CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode); + memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->info, ivsize); conf_reg = RK_CRYPTO_DESSEL; } else { - dev->mode |= RK_CRYPTO_AES_FIFO_MODE | + ctx->mode |= RK_CRYPTO_AES_FIFO_MODE | RK_CRYPTO_AES_KEY_CHANGE | RK_CRYPTO_AES_BYTESWAP_KEY | RK_CRYPTO_AES_BYTESWAP_IV; if (ctx->keylen == AES_KEYSIZE_192) - dev->mode |= RK_CRYPTO_AES_192BIT_key; + ctx->mode |= RK_CRYPTO_AES_192BIT_key; else if (ctx->keylen == AES_KEYSIZE_256) - dev->mode |= RK_CRYPTO_AES_256BIT_key; - CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, dev->mode); - memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, - dev->ablk_req->info, ivsize); + ctx->mode |= RK_CRYPTO_AES_256BIT_key; + CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode); + memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->info, ivsize); } conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | RK_CRYPTO_BYTESWAP_BRFIFO; @@ -268,8 +251,18 @@ static int rk_set_data_start(struct rk_crypto_info *dev) static int rk_ablk_start(struct rk_crypto_info *dev) { + struct ablkcipher_request *req = + ablkcipher_request_cast(dev->async_req); unsigned long flags; - int err; + int err = 0; + + dev->left_bytes = req->nbytes; + dev->total = req->nbytes; + dev->sg_src = req->src; + dev->first = req->src; + dev->nents = sg_nents(req->src); + dev->sg_dst = req->dst; + dev->aligned = 1; spin_lock_irqsave(&dev->lock, flags); rk_ablk_hw_init(dev); @@ -280,15 +273,16 @@ static int rk_ablk_start(struct rk_crypto_info *dev) static void rk_iv_copyback(struct rk_crypto_info *dev) { - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(dev->ablk_req); + struct ablkcipher_request *req = + ablkcipher_request_cast(dev->async_req); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); u32 ivsize = crypto_ablkcipher_ivsize(tfm); if (ivsize == DES_BLOCK_SIZE) - memcpy_fromio(dev->ablk_req->info, - dev->reg + RK_CRYPTO_TDES_IV_0, ivsize); + memcpy_fromio(req->info, dev->reg + RK_CRYPTO_TDES_IV_0, + ivsize); else if (ivsize == AES_BLOCK_SIZE) - memcpy_fromio(dev->ablk_req->info, - dev->reg + RK_CRYPTO_AES_IV_0, ivsize); + memcpy_fromio(req->info, dev->reg + RK_CRYPTO_AES_IV_0, ivsize); } /* return: @@ -298,10 +292,12 @@ static void rk_iv_copyback(struct rk_crypto_info *dev) static int rk_ablk_rx(struct rk_crypto_info *dev) { int err = 0; + struct ablkcipher_request *req = + ablkcipher_request_cast(dev->async_req); dev->unload_data(dev); if (!dev->aligned) { - if (!sg_pcopy_from_buffer(dev->ablk_req->dst, dev->nents, + if (!sg_pcopy_from_buffer(req->dst, dev->nents, dev->addr_vir, dev->count, dev->total - dev->left_bytes - dev->count)) { @@ -324,7 +320,8 @@ static int rk_ablk_rx(struct rk_crypto_info *dev) } else { rk_iv_copyback(dev); /* here show the calculation is over without any err */ - dev->complete(dev, 0); + dev->complete(dev->async_req, 0); + tasklet_schedule(&dev->queue_task); } out_rx: return err; diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 718588219f75..821a506b9e17 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -40,14 +40,16 @@ static int zero_message_process(struct ahash_request *req) return 0; } -static void rk_ahash_crypto_complete(struct rk_crypto_info *dev, int err) +static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) { - if (dev->ahash_req->base.complete) - dev->ahash_req->base.complete(&dev->ahash_req->base, err); + if (base->complete) + base->complete(base, err); } static void rk_ahash_reg_init(struct rk_crypto_info *dev) { + struct ahash_request *req = ahash_request_cast(dev->async_req); + struct rk_ahash_rctx *rctx = ahash_request_ctx(req); int reg_status = 0; reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) | @@ -67,7 +69,7 @@ static void rk_ahash_reg_init(struct rk_crypto_info *dev) CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, RK_CRYPTO_HRDMA_ERR_INT | RK_CRYPTO_HRDMA_DONE_INT); - CRYPTO_WRITE(dev, RK_CRYPTO_HASH_CTRL, dev->mode | + CRYPTO_WRITE(dev, RK_CRYPTO_HASH_CTRL, rctx->mode | RK_CRYPTO_HASH_SWAP_DO); CRYPTO_WRITE(dev, RK_CRYPTO_CONF, RK_CRYPTO_BYTESWAP_HRFIFO | @@ -164,64 +166,13 @@ static int rk_ahash_export(struct ahash_request *req, void *out) static int rk_ahash_digest(struct ahash_request *req) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm); - struct rk_crypto_info *dev = NULL; - unsigned long flags; - int ret; + struct rk_crypto_info *dev = tctx->dev; if (!req->nbytes) return zero_message_process(req); - - dev = tctx->dev; - dev->total = req->nbytes; - dev->left_bytes = req->nbytes; - dev->aligned = 0; - dev->mode = 0; - dev->align_size = 4; - dev->sg_dst = NULL; - dev->sg_src = req->src; - dev->first = req->src; - dev->nents = sg_nents(req->src); - - switch (crypto_ahash_digestsize(tfm)) { - case SHA1_DIGEST_SIZE: - dev->mode = RK_CRYPTO_HASH_SHA1; - break; - case SHA256_DIGEST_SIZE: - dev->mode = RK_CRYPTO_HASH_SHA256; - break; - case MD5_DIGEST_SIZE: - dev->mode = RK_CRYPTO_HASH_MD5; - break; - default: - return -EINVAL; - } - - rk_ahash_reg_init(dev); - - spin_lock_irqsave(&dev->lock, flags); - ret = crypto_enqueue_request(&dev->queue, &req->base); - spin_unlock_irqrestore(&dev->lock, flags); - - tasklet_schedule(&dev->crypto_tasklet); - - /* - * it will take some time to process date after last dma transmission. - * - * waiting time is relative with the last date len, - * so cannot set a fixed time here. - * 10-50 makes system not call here frequently wasting - * efficiency, and make it response quickly when dma - * complete. - */ - while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS)) - usleep_range(10, 50); - - memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0, - crypto_ahash_digestsize(tfm)); - - return 0; + else + return dev->enqueue(dev, &req->base); } static void crypto_ahash_dma_start(struct rk_crypto_info *dev) @@ -244,12 +195,45 @@ static int rk_ahash_set_data_start(struct rk_crypto_info *dev) static int rk_ahash_start(struct rk_crypto_info *dev) { + struct ahash_request *req = ahash_request_cast(dev->async_req); + struct crypto_ahash *tfm; + struct rk_ahash_rctx *rctx; + + dev->total = req->nbytes; + dev->left_bytes = req->nbytes; + dev->aligned = 0; + dev->align_size = 4; + dev->sg_dst = NULL; + dev->sg_src = req->src; + dev->first = req->src; + dev->nents = sg_nents(req->src); + rctx = ahash_request_ctx(req); + rctx->mode = 0; + + tfm = crypto_ahash_reqtfm(req); + switch (crypto_ahash_digestsize(tfm)) { + case SHA1_DIGEST_SIZE: + rctx->mode = RK_CRYPTO_HASH_SHA1; + break; + case SHA256_DIGEST_SIZE: + rctx->mode = RK_CRYPTO_HASH_SHA256; + break; + case MD5_DIGEST_SIZE: + rctx->mode = RK_CRYPTO_HASH_MD5; + break; + default: + return -EINVAL; + } + + rk_ahash_reg_init(dev); return rk_ahash_set_data_start(dev); } static int rk_ahash_crypto_rx(struct rk_crypto_info *dev) { int err = 0; + struct ahash_request *req = ahash_request_cast(dev->async_req); + struct crypto_ahash *tfm; dev->unload_data(dev); if (dev->left_bytes) { @@ -264,7 +248,24 @@ static int rk_ahash_crypto_rx(struct rk_crypto_info *dev) } err = rk_ahash_set_data_start(dev); } else { - dev->complete(dev, 0); + /* + * it will take some time to process date after last dma + * transmission. + * + * waiting time is relative with the last date len, + * so cannot set a fixed time here. + * 10us makes system not call here frequently wasting + * efficiency, and make it response quickly when dma + * complete. + */ + while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS)) + udelay(10); + + tfm = crypto_ahash_reqtfm(req); + memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0, + crypto_ahash_digestsize(tfm)); + dev->complete(dev->async_req, 0); + tasklet_schedule(&dev->queue_task); } out_rx: diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 1d9ecd368b5b..08e7bdcaa6e3 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -202,7 +202,6 @@ struct sahara_dev { struct completion dma_completion; struct sahara_ctx *ctx; - spinlock_t lock; struct crypto_queue queue; unsigned long flags; @@ -543,10 +542,10 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) unmap_out: dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg, - DMA_TO_DEVICE); + DMA_FROM_DEVICE); unmap_in: dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, - DMA_FROM_DEVICE); + DMA_TO_DEVICE); return -EINVAL; } @@ -594,9 +593,9 @@ static int sahara_aes_process(struct ablkcipher_request *req) } dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg, - DMA_TO_DEVICE); - dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, DMA_FROM_DEVICE); + dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, + DMA_TO_DEVICE); return 0; } @@ -1376,13 +1375,13 @@ static void sahara_unregister_algs(struct sahara_dev *dev) crypto_unregister_ahash(&sha_v4_algs[i]); } -static struct platform_device_id sahara_platform_ids[] = { +static const struct platform_device_id sahara_platform_ids[] = { { .name = "sahara-imx27" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(platform, sahara_platform_ids); -static struct of_device_id sahara_dt_ids[] = { +static const struct of_device_id sahara_dt_ids[] = { { .compatible = "fsl,imx53-sahara" }, { .compatible = "fsl,imx27-sahara" }, { /* sentinel */ } @@ -1487,7 +1486,6 @@ static int sahara_probe(struct platform_device *pdev) crypto_init_queue(&dev->queue, SAHARA_QUEUE_LENGTH); - spin_lock_init(&dev->lock); mutex_init(&dev->queue_mutex); dev_ptr = dev; diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig index 09b4ec87c212..602332e02729 100644 --- a/drivers/crypto/stm32/Kconfig +++ b/drivers/crypto/stm32/Kconfig @@ -1,7 +1,20 @@ -config CRYPTO_DEV_STM32 - tristate "Support for STM32 crypto accelerators" +config CRC_DEV_STM32 + tristate "Support for STM32 crc accelerators" depends on ARCH_STM32 select CRYPTO_HASH help This enables support for the CRC32 hw accelerator which can be found - on STMicroelectronis STM32 SOC. + on STMicroelectronics STM32 SOC. + +config HASH_DEV_STM32 + tristate "Support for STM32 hash accelerators" + depends on ARCH_STM32 + depends on HAS_DMA + select CRYPTO_HASH + select CRYPTO_MD5 + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_ENGINE + help + This enables support for the HASH hw accelerator which can be found + on STMicroelectronics STM32 SOC. diff --git a/drivers/crypto/stm32/Makefile b/drivers/crypto/stm32/Makefile index 73b4c6e47f5f..73cd56cad0cc 100644 --- a/drivers/crypto/stm32/Makefile +++ b/drivers/crypto/stm32/Makefile @@ -1,2 +1,2 @@ -obj-$(CONFIG_CRYPTO_DEV_STM32) += stm32_cryp.o -stm32_cryp-objs := stm32_crc32.o +obj-$(CONFIG_CRC_DEV_STM32) += stm32_crc32.o +obj-$(CONFIG_HASH_DEV_STM32) += stm32-hash.o
\ No newline at end of file diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c new file mode 100644 index 000000000000..b585ce54a802 --- /dev/null +++ b/drivers/crypto/stm32/stm32-hash.c @@ -0,0 +1,1575 @@ +/* + * This file is part of STM32 Crypto driver for Linux. + * + * Copyright (C) 2017, STMicroelectronics - All Rights Reserved + * Author(s): Lionel DEBIEVE <lionel.debieve@st.com> for STMicroelectronics. + * + * License terms: GPL V2.0. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#include <linux/clk.h> +#include <linux/crypto.h> +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/reset.h> + +#include <crypto/engine.h> +#include <crypto/hash.h> +#include <crypto/md5.h> +#include <crypto/scatterwalk.h> +#include <crypto/sha.h> +#include <crypto/internal/hash.h> + +#define HASH_CR 0x00 +#define HASH_DIN 0x04 +#define HASH_STR 0x08 +#define HASH_IMR 0x20 +#define HASH_SR 0x24 +#define HASH_CSR(x) (0x0F8 + ((x) * 0x04)) +#define HASH_HREG(x) (0x310 + ((x) * 0x04)) +#define HASH_HWCFGR 0x3F0 +#define HASH_VER 0x3F4 +#define HASH_ID 0x3F8 + +/* Control Register */ +#define HASH_CR_INIT BIT(2) +#define HASH_CR_DMAE BIT(3) +#define HASH_CR_DATATYPE_POS 4 +#define HASH_CR_MODE BIT(6) +#define HASH_CR_MDMAT BIT(13) +#define HASH_CR_DMAA BIT(14) +#define HASH_CR_LKEY BIT(16) + +#define HASH_CR_ALGO_SHA1 0x0 +#define HASH_CR_ALGO_MD5 0x80 +#define HASH_CR_ALGO_SHA224 0x40000 +#define HASH_CR_ALGO_SHA256 0x40080 + +/* Interrupt */ +#define HASH_DINIE BIT(0) +#define HASH_DCIE BIT(1) + +/* Interrupt Mask */ +#define HASH_MASK_CALC_COMPLETION BIT(0) +#define HASH_MASK_DATA_INPUT BIT(1) + +/* Context swap register */ +#define HASH_CSR_REGISTER_NUMBER 53 + +/* Status Flags */ +#define HASH_SR_DATA_INPUT_READY BIT(0) +#define HASH_SR_OUTPUT_READY BIT(1) +#define HASH_SR_DMA_ACTIVE BIT(2) +#define HASH_SR_BUSY BIT(3) + +/* STR Register */ +#define HASH_STR_NBLW_MASK GENMASK(4, 0) +#define HASH_STR_DCAL BIT(8) + +#define HASH_FLAGS_INIT BIT(0) +#define HASH_FLAGS_OUTPUT_READY BIT(1) +#define HASH_FLAGS_CPU BIT(2) +#define HASH_FLAGS_DMA_READY BIT(3) +#define HASH_FLAGS_DMA_ACTIVE BIT(4) +#define HASH_FLAGS_HMAC_INIT BIT(5) +#define HASH_FLAGS_HMAC_FINAL BIT(6) +#define HASH_FLAGS_HMAC_KEY BIT(7) + +#define HASH_FLAGS_FINAL BIT(15) +#define HASH_FLAGS_FINUP BIT(16) +#define HASH_FLAGS_ALGO_MASK GENMASK(21, 18) +#define HASH_FLAGS_MD5 BIT(18) +#define HASH_FLAGS_SHA1 BIT(19) +#define HASH_FLAGS_SHA224 BIT(20) +#define HASH_FLAGS_SHA256 BIT(21) +#define HASH_FLAGS_ERRORS BIT(22) +#define HASH_FLAGS_HMAC BIT(23) + +#define HASH_OP_UPDATE 1 +#define HASH_OP_FINAL 2 + +enum stm32_hash_data_format { + HASH_DATA_32_BITS = 0x0, + HASH_DATA_16_BITS = 0x1, + HASH_DATA_8_BITS = 0x2, + HASH_DATA_1_BIT = 0x3 +}; + +#define HASH_BUFLEN 256 +#define HASH_LONG_KEY 64 +#define HASH_MAX_KEY_SIZE (SHA256_BLOCK_SIZE * 8) +#define HASH_QUEUE_LENGTH 16 +#define HASH_DMA_THRESHOLD 50 + +struct stm32_hash_ctx { + struct stm32_hash_dev *hdev; + unsigned long flags; + + u8 key[HASH_MAX_KEY_SIZE]; + int keylen; +}; + +struct stm32_hash_request_ctx { + struct stm32_hash_dev *hdev; + unsigned long flags; + unsigned long op; + + u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32)); + size_t digcnt; + size_t bufcnt; + size_t buflen; + + /* DMA */ + struct scatterlist *sg; + unsigned int offset; + unsigned int total; + struct scatterlist sg_key; + + dma_addr_t dma_addr; + size_t dma_ct; + int nents; + + u8 data_type; + + u8 buffer[HASH_BUFLEN] __aligned(sizeof(u32)); + + /* Export Context */ + u32 *hw_context; +}; + +struct stm32_hash_algs_info { + struct ahash_alg *algs_list; + size_t size; +}; + +struct stm32_hash_pdata { + struct stm32_hash_algs_info *algs_info; + size_t algs_info_size; +}; + +struct stm32_hash_dev { + struct list_head list; + struct device *dev; + struct clk *clk; + struct reset_control *rst; + void __iomem *io_base; + phys_addr_t phys_base; + u32 dma_mode; + u32 dma_maxburst; + + spinlock_t lock; /* lock to protect queue */ + + struct ahash_request *req; + struct crypto_engine *engine; + + int err; + unsigned long flags; + + struct dma_chan *dma_lch; + struct completion dma_completion; + + const struct stm32_hash_pdata *pdata; +}; + +struct stm32_hash_drv { + struct list_head dev_list; + spinlock_t lock; /* List protection access */ +}; + +static struct stm32_hash_drv stm32_hash = { + .dev_list = LIST_HEAD_INIT(stm32_hash.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(stm32_hash.lock), +}; + +static void stm32_hash_dma_callback(void *param); + +static inline u32 stm32_hash_read(struct stm32_hash_dev *hdev, u32 offset) +{ + return readl_relaxed(hdev->io_base + offset); +} + +static inline void stm32_hash_write(struct stm32_hash_dev *hdev, + u32 offset, u32 value) +{ + writel_relaxed(value, hdev->io_base + offset); +} + +static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) +{ + u32 status; + + return readl_relaxed_poll_timeout(hdev->io_base + HASH_SR, status, + !(status & HASH_SR_BUSY), 10, 10000); +} + +static void stm32_hash_set_nblw(struct stm32_hash_dev *hdev, int length) +{ + u32 reg; + + reg = stm32_hash_read(hdev, HASH_STR); + reg &= ~(HASH_STR_NBLW_MASK); + reg |= (8U * ((length) % 4U)); + stm32_hash_write(hdev, HASH_STR, reg); +} + +static int stm32_hash_write_key(struct stm32_hash_dev *hdev) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + u32 reg; + int keylen = ctx->keylen; + void *key = ctx->key; + + if (keylen) { + stm32_hash_set_nblw(hdev, keylen); + + while (keylen > 0) { + stm32_hash_write(hdev, HASH_DIN, *(u32 *)key); + keylen -= 4; + key += 4; + } + + reg = stm32_hash_read(hdev, HASH_STR); + reg |= HASH_STR_DCAL; + stm32_hash_write(hdev, HASH_STR, reg); + + return -EINPROGRESS; + } + + return 0; +} + +static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + u32 reg = HASH_CR_INIT; + + if (!(hdev->flags & HASH_FLAGS_INIT)) { + switch (rctx->flags & HASH_FLAGS_ALGO_MASK) { + case HASH_FLAGS_MD5: + reg |= HASH_CR_ALGO_MD5; + break; + case HASH_FLAGS_SHA1: + reg |= HASH_CR_ALGO_SHA1; + break; + case HASH_FLAGS_SHA224: + reg |= HASH_CR_ALGO_SHA224; + break; + case HASH_FLAGS_SHA256: + reg |= HASH_CR_ALGO_SHA256; + break; + default: + reg |= HASH_CR_ALGO_MD5; + } + + reg |= (rctx->data_type << HASH_CR_DATATYPE_POS); + + if (rctx->flags & HASH_FLAGS_HMAC) { + hdev->flags |= HASH_FLAGS_HMAC; + reg |= HASH_CR_MODE; + if (ctx->keylen > HASH_LONG_KEY) + reg |= HASH_CR_LKEY; + } + + stm32_hash_write(hdev, HASH_IMR, HASH_DCIE); + + stm32_hash_write(hdev, HASH_CR, reg); + + hdev->flags |= HASH_FLAGS_INIT; + + dev_dbg(hdev->dev, "Write Control %x\n", reg); + } +} + +static void stm32_hash_append_sg(struct stm32_hash_request_ctx *rctx) +{ + size_t count; + + while ((rctx->bufcnt < rctx->buflen) && rctx->total) { + count = min(rctx->sg->length - rctx->offset, rctx->total); + count = min(count, rctx->buflen - rctx->bufcnt); + + if (count <= 0) { + if ((rctx->sg->length == 0) && !sg_is_last(rctx->sg)) { + rctx->sg = sg_next(rctx->sg); + continue; + } else { + break; + } + } + + scatterwalk_map_and_copy(rctx->buffer + rctx->bufcnt, rctx->sg, + rctx->offset, count, 0); + + rctx->bufcnt += count; + rctx->offset += count; + rctx->total -= count; + + if (rctx->offset == rctx->sg->length) { + rctx->sg = sg_next(rctx->sg); + if (rctx->sg) + rctx->offset = 0; + else + rctx->total = 0; + } + } +} + +static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev, + const u8 *buf, size_t length, int final) +{ + unsigned int count, len32; + const u32 *buffer = (const u32 *)buf; + u32 reg; + + if (final) + hdev->flags |= HASH_FLAGS_FINAL; + + len32 = DIV_ROUND_UP(length, sizeof(u32)); + + dev_dbg(hdev->dev, "%s: length: %d, final: %x len32 %i\n", + __func__, length, final, len32); + + hdev->flags |= HASH_FLAGS_CPU; + + stm32_hash_write_ctrl(hdev); + + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + + if ((hdev->flags & HASH_FLAGS_HMAC) && + (hdev->flags & ~HASH_FLAGS_HMAC_KEY)) { + hdev->flags |= HASH_FLAGS_HMAC_KEY; + stm32_hash_write_key(hdev); + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + } + + for (count = 0; count < len32; count++) + stm32_hash_write(hdev, HASH_DIN, buffer[count]); + + if (final) { + stm32_hash_set_nblw(hdev, length); + reg = stm32_hash_read(hdev, HASH_STR); + reg |= HASH_STR_DCAL; + stm32_hash_write(hdev, HASH_STR, reg); + if (hdev->flags & HASH_FLAGS_HMAC) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + stm32_hash_write_key(hdev); + } + return -EINPROGRESS; + } + + return 0; +} + +static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); + int bufcnt, err = 0, final; + + dev_dbg(hdev->dev, "%s flags %lx\n", __func__, rctx->flags); + + final = (rctx->flags & HASH_FLAGS_FINUP); + + while ((rctx->total >= rctx->buflen) || + (rctx->bufcnt + rctx->total >= rctx->buflen)) { + stm32_hash_append_sg(rctx); + bufcnt = rctx->bufcnt; + rctx->bufcnt = 0; + err = stm32_hash_xmit_cpu(hdev, rctx->buffer, bufcnt, 0); + } + + stm32_hash_append_sg(rctx); + + if (final) { + bufcnt = rctx->bufcnt; + rctx->bufcnt = 0; + err = stm32_hash_xmit_cpu(hdev, rctx->buffer, bufcnt, + (rctx->flags & HASH_FLAGS_FINUP)); + } + + return err; +} + +static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, + struct scatterlist *sg, int length, int mdma) +{ + struct dma_async_tx_descriptor *in_desc; + dma_cookie_t cookie; + u32 reg; + int err; + + in_desc = dmaengine_prep_slave_sg(hdev->dma_lch, sg, 1, + DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + if (!in_desc) { + dev_err(hdev->dev, "dmaengine_prep_slave error\n"); + return -ENOMEM; + } + + reinit_completion(&hdev->dma_completion); + in_desc->callback = stm32_hash_dma_callback; + in_desc->callback_param = hdev; + + hdev->flags |= HASH_FLAGS_FINAL; + hdev->flags |= HASH_FLAGS_DMA_ACTIVE; + + reg = stm32_hash_read(hdev, HASH_CR); + + if (mdma) + reg |= HASH_CR_MDMAT; + else + reg &= ~HASH_CR_MDMAT; + + reg |= HASH_CR_DMAE; + + stm32_hash_write(hdev, HASH_CR, reg); + + stm32_hash_set_nblw(hdev, length); + + cookie = dmaengine_submit(in_desc); + err = dma_submit_error(cookie); + if (err) + return -ENOMEM; + + dma_async_issue_pending(hdev->dma_lch); + + if (!wait_for_completion_interruptible_timeout(&hdev->dma_completion, + msecs_to_jiffies(100))) + err = -ETIMEDOUT; + + if (dma_async_is_tx_complete(hdev->dma_lch, cookie, + NULL, NULL) != DMA_COMPLETE) + err = -ETIMEDOUT; + + if (err) { + dev_err(hdev->dev, "DMA Error %i\n", err); + dmaengine_terminate_all(hdev->dma_lch); + return err; + } + + return -EINPROGRESS; +} + +static void stm32_hash_dma_callback(void *param) +{ + struct stm32_hash_dev *hdev = param; + + complete(&hdev->dma_completion); + + hdev->flags |= HASH_FLAGS_DMA_READY; +} + +static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + int err; + + if (ctx->keylen < HASH_DMA_THRESHOLD || (hdev->dma_mode == 1)) { + err = stm32_hash_write_key(hdev); + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + } else { + if (!(hdev->flags & HASH_FLAGS_HMAC_KEY)) + sg_init_one(&rctx->sg_key, ctx->key, + ALIGN(ctx->keylen, sizeof(u32))); + + rctx->dma_ct = dma_map_sg(hdev->dev, &rctx->sg_key, 1, + DMA_TO_DEVICE); + if (rctx->dma_ct == 0) { + dev_err(hdev->dev, "dma_map_sg error\n"); + return -ENOMEM; + } + + err = stm32_hash_xmit_dma(hdev, &rctx->sg_key, ctx->keylen, 0); + + dma_unmap_sg(hdev->dev, &rctx->sg_key, 1, DMA_TO_DEVICE); + } + + return err; +} + +static int stm32_hash_dma_init(struct stm32_hash_dev *hdev) +{ + struct dma_slave_config dma_conf; + int err; + + memset(&dma_conf, 0, sizeof(dma_conf)); + + dma_conf.direction = DMA_MEM_TO_DEV; + dma_conf.dst_addr = hdev->phys_base + HASH_DIN; + dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_conf.src_maxburst = hdev->dma_maxburst; + dma_conf.dst_maxburst = hdev->dma_maxburst; + dma_conf.device_fc = false; + + hdev->dma_lch = dma_request_slave_channel(hdev->dev, "in"); + if (!hdev->dma_lch) { + dev_err(hdev->dev, "Couldn't acquire a slave DMA channel.\n"); + return -EBUSY; + } + + err = dmaengine_slave_config(hdev->dma_lch, &dma_conf); + if (err) { + dma_release_channel(hdev->dma_lch); + hdev->dma_lch = NULL; + dev_err(hdev->dev, "Couldn't configure DMA slave.\n"); + return err; + } + + init_completion(&hdev->dma_completion); + + return 0; +} + +static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); + struct scatterlist sg[1], *tsg; + int err = 0, len = 0, reg, ncp; + unsigned int i; + const u32 *buffer = (const u32 *)rctx->buffer; + + rctx->sg = hdev->req->src; + rctx->total = hdev->req->nbytes; + + rctx->nents = sg_nents(rctx->sg); + + if (rctx->nents < 0) + return -EINVAL; + + stm32_hash_write_ctrl(hdev); + + if (hdev->flags & HASH_FLAGS_HMAC) { + err = stm32_hash_hmac_dma_send(hdev); + if (err != -EINPROGRESS) + return err; + } + + for_each_sg(rctx->sg, tsg, rctx->nents, i) { + len = sg->length; + + sg[0] = *tsg; + if (sg_is_last(sg)) { + if (hdev->dma_mode == 1) { + len = (ALIGN(sg->length, 16) - 16); + + ncp = sg_pcopy_to_buffer( + rctx->sg, rctx->nents, + rctx->buffer, sg->length - len, + rctx->total - sg->length + len); + + sg->length = len; + } else { + if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { + len = sg->length; + sg->length = ALIGN(sg->length, + sizeof(u32)); + } + } + } + + rctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, + DMA_TO_DEVICE); + if (rctx->dma_ct == 0) { + dev_err(hdev->dev, "dma_map_sg error\n"); + return -ENOMEM; + } + + err = stm32_hash_xmit_dma(hdev, sg, len, + !sg_is_last(sg)); + + dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); + + if (err == -ENOMEM) + return err; + } + + if (hdev->dma_mode == 1) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + reg = stm32_hash_read(hdev, HASH_CR); + reg &= ~HASH_CR_DMAE; + reg |= HASH_CR_DMAA; + stm32_hash_write(hdev, HASH_CR, reg); + + for (i = 0; i < DIV_ROUND_UP(ncp, sizeof(u32)); i++) + stm32_hash_write(hdev, HASH_DIN, buffer[i]); + + stm32_hash_set_nblw(hdev, ncp); + reg = stm32_hash_read(hdev, HASH_STR); + reg |= HASH_STR_DCAL; + stm32_hash_write(hdev, HASH_STR, reg); + err = -EINPROGRESS; + } + + if (hdev->flags & HASH_FLAGS_HMAC) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + err = stm32_hash_hmac_dma_send(hdev); + } + + return err; +} + +static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) +{ + struct stm32_hash_dev *hdev = NULL, *tmp; + + spin_lock_bh(&stm32_hash.lock); + if (!ctx->hdev) { + list_for_each_entry(tmp, &stm32_hash.dev_list, list) { + hdev = tmp; + break; + } + ctx->hdev = hdev; + } else { + hdev = ctx->hdev; + } + + spin_unlock_bh(&stm32_hash.lock); + + return hdev; +} + +static bool stm32_hash_dma_aligned_data(struct ahash_request *req) +{ + struct scatterlist *sg; + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + int i; + + if (req->nbytes <= HASH_DMA_THRESHOLD) + return false; + + if (sg_nents(req->src) > 1) { + if (hdev->dma_mode == 1) + return false; + for_each_sg(req->src, sg, sg_nents(req->src), i) { + if ((!IS_ALIGNED(sg->length, sizeof(u32))) && + (!sg_is_last(sg))) + return false; + } + } + + if (req->src->offset % 4) + return false; + + return true; +} + +static int stm32_hash_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + + rctx->hdev = hdev; + + rctx->flags = HASH_FLAGS_CPU; + + rctx->digcnt = crypto_ahash_digestsize(tfm); + switch (rctx->digcnt) { + case MD5_DIGEST_SIZE: + rctx->flags |= HASH_FLAGS_MD5; + break; + case SHA1_DIGEST_SIZE: + rctx->flags |= HASH_FLAGS_SHA1; + break; + case SHA224_DIGEST_SIZE: + rctx->flags |= HASH_FLAGS_SHA224; + break; + case SHA256_DIGEST_SIZE: + rctx->flags |= HASH_FLAGS_SHA256; + break; + default: + return -EINVAL; + } + + rctx->bufcnt = 0; + rctx->buflen = HASH_BUFLEN; + rctx->total = 0; + rctx->offset = 0; + rctx->data_type = HASH_DATA_8_BITS; + + memset(rctx->buffer, 0, HASH_BUFLEN); + + if (ctx->flags & HASH_FLAGS_HMAC) + rctx->flags |= HASH_FLAGS_HMAC; + + dev_dbg(hdev->dev, "%s Flags %lx\n", __func__, rctx->flags); + + return 0; +} + +static int stm32_hash_update_req(struct stm32_hash_dev *hdev) +{ + return stm32_hash_update_cpu(hdev); +} + +static int stm32_hash_final_req(struct stm32_hash_dev *hdev) +{ + struct ahash_request *req = hdev->req; + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + int err; + + if (!(rctx->flags & HASH_FLAGS_CPU)) + err = stm32_hash_dma_send(hdev); + else + err = stm32_hash_xmit_cpu(hdev, rctx->buffer, rctx->bufcnt, 1); + + rctx->bufcnt = 0; + + return err; +} + +static void stm32_hash_copy_hash(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + u32 *hash = (u32 *)rctx->digest; + unsigned int i, hashsize; + + switch (rctx->flags & HASH_FLAGS_ALGO_MASK) { + case HASH_FLAGS_MD5: + hashsize = MD5_DIGEST_SIZE; + break; + case HASH_FLAGS_SHA1: + hashsize = SHA1_DIGEST_SIZE; + break; + case HASH_FLAGS_SHA224: + hashsize = SHA224_DIGEST_SIZE; + break; + case HASH_FLAGS_SHA256: + hashsize = SHA256_DIGEST_SIZE; + break; + default: + return; + } + + for (i = 0; i < hashsize / sizeof(u32); i++) + hash[i] = be32_to_cpu(stm32_hash_read(rctx->hdev, + HASH_HREG(i))); +} + +static int stm32_hash_finish(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + + if (!req->result) + return -EINVAL; + + memcpy(req->result, rctx->digest, rctx->digcnt); + + return 0; +} + +static void stm32_hash_finish_req(struct ahash_request *req, int err) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_dev *hdev = rctx->hdev; + + if (!err && (HASH_FLAGS_FINAL & hdev->flags)) { + stm32_hash_copy_hash(req); + err = stm32_hash_finish(req); + hdev->flags &= ~(HASH_FLAGS_FINAL | HASH_FLAGS_CPU | + HASH_FLAGS_INIT | HASH_FLAGS_DMA_READY | + HASH_FLAGS_OUTPUT_READY | HASH_FLAGS_HMAC | + HASH_FLAGS_HMAC_INIT | HASH_FLAGS_HMAC_FINAL | + HASH_FLAGS_HMAC_KEY); + } else { + rctx->flags |= HASH_FLAGS_ERRORS; + } + + crypto_finalize_hash_request(hdev->engine, req, err); +} + +static int stm32_hash_hw_init(struct stm32_hash_dev *hdev, + struct stm32_hash_request_ctx *rctx) +{ + if (!(HASH_FLAGS_INIT & hdev->flags)) { + stm32_hash_write(hdev, HASH_CR, HASH_CR_INIT); + stm32_hash_write(hdev, HASH_STR, 0); + stm32_hash_write(hdev, HASH_DIN, 0); + stm32_hash_write(hdev, HASH_IMR, 0); + hdev->err = 0; + } + + return 0; +} + +static int stm32_hash_handle_queue(struct stm32_hash_dev *hdev, + struct ahash_request *req) +{ + return crypto_transfer_hash_request_to_engine(hdev->engine, req); +} + +static int stm32_hash_prepare_req(struct crypto_engine *engine, + struct ahash_request *req) +{ + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + struct stm32_hash_request_ctx *rctx; + + if (!hdev) + return -ENODEV; + + hdev->req = req; + + rctx = ahash_request_ctx(req); + + dev_dbg(hdev->dev, "processing new req, op: %lu, nbytes %d\n", + rctx->op, req->nbytes); + + return stm32_hash_hw_init(hdev, rctx); +} + +static int stm32_hash_one_request(struct crypto_engine *engine, + struct ahash_request *req) +{ + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + struct stm32_hash_request_ctx *rctx; + int err = 0; + + if (!hdev) + return -ENODEV; + + hdev->req = req; + + rctx = ahash_request_ctx(req); + + if (rctx->op == HASH_OP_UPDATE) + err = stm32_hash_update_req(hdev); + else if (rctx->op == HASH_OP_FINAL) + err = stm32_hash_final_req(hdev); + + if (err != -EINPROGRESS) + /* done task will not finish it, so do it here */ + stm32_hash_finish_req(req, err); + + return 0; +} + +static int stm32_hash_enqueue(struct ahash_request *req, unsigned int op) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct stm32_hash_dev *hdev = ctx->hdev; + + rctx->op = op; + + return stm32_hash_handle_queue(hdev, req); +} + +static int stm32_hash_update(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + int ret; + + if (!req->nbytes || !(rctx->flags & HASH_FLAGS_CPU)) + return 0; + + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + + if ((rctx->bufcnt + rctx->total < rctx->buflen)) { + stm32_hash_append_sg(rctx); + return 0; + } + + ret = stm32_hash_enqueue(req, HASH_OP_UPDATE); + + if (rctx->flags & HASH_FLAGS_FINUP) + return ret; + + return 0; +} + +static int stm32_hash_final(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + + rctx->flags |= HASH_FLAGS_FINUP; + + return stm32_hash_enqueue(req, HASH_OP_FINAL); +} + +static int stm32_hash_finup(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + int err1, err2; + + rctx->flags |= HASH_FLAGS_FINUP; + + if (hdev->dma_lch && stm32_hash_dma_aligned_data(req)) + rctx->flags &= ~HASH_FLAGS_CPU; + + err1 = stm32_hash_update(req); + + if (err1 == -EINPROGRESS || err1 == -EBUSY) + return err1; + + /* + * final() has to be always called to cleanup resources + * even if update() failed, except EINPROGRESS + */ + err2 = stm32_hash_final(req); + + return err1 ?: err2; +} + +static int stm32_hash_digest(struct ahash_request *req) +{ + return stm32_hash_init(req) ?: stm32_hash_finup(req); +} + +static int stm32_hash_export(struct ahash_request *req, void *out) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + u32 *preg; + unsigned int i; + + while (!(stm32_hash_read(hdev, HASH_SR) & HASH_SR_DATA_INPUT_READY)) + cpu_relax(); + + rctx->hw_context = kmalloc(sizeof(u32) * (3 + HASH_CSR_REGISTER_NUMBER), + GFP_KERNEL); + + preg = rctx->hw_context; + + *preg++ = stm32_hash_read(hdev, HASH_IMR); + *preg++ = stm32_hash_read(hdev, HASH_STR); + *preg++ = stm32_hash_read(hdev, HASH_CR); + for (i = 0; i < HASH_CSR_REGISTER_NUMBER; i++) + *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); + + memcpy(out, rctx, sizeof(*rctx)); + + return 0; +} + +static int stm32_hash_import(struct ahash_request *req, const void *in) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + const u32 *preg = in; + u32 reg; + unsigned int i; + + memcpy(rctx, in, sizeof(*rctx)); + + preg = rctx->hw_context; + + stm32_hash_write(hdev, HASH_IMR, *preg++); + stm32_hash_write(hdev, HASH_STR, *preg++); + stm32_hash_write(hdev, HASH_CR, *preg); + reg = *preg++ | HASH_CR_INIT; + stm32_hash_write(hdev, HASH_CR, reg); + + for (i = 0; i < HASH_CSR_REGISTER_NUMBER; i++) + stm32_hash_write(hdev, HASH_CSR(i), *preg++); + + kfree(rctx->hw_context); + + return 0; +} + +static int stm32_hash_setkey(struct crypto_ahash *tfm, + const u8 *key, unsigned int keylen) +{ + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + + if (keylen <= HASH_MAX_KEY_SIZE) { + memcpy(ctx->key, key, keylen); + ctx->keylen = keylen; + } else { + return -ENOMEM; + } + + return 0; +} + +static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm, + const char *algs_hmac_name) +{ + struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct stm32_hash_request_ctx)); + + ctx->keylen = 0; + + if (algs_hmac_name) + ctx->flags |= HASH_FLAGS_HMAC; + + return 0; +} + +static int stm32_hash_cra_init(struct crypto_tfm *tfm) +{ + return stm32_hash_cra_init_algs(tfm, NULL); +} + +static int stm32_hash_cra_md5_init(struct crypto_tfm *tfm) +{ + return stm32_hash_cra_init_algs(tfm, "md5"); +} + +static int stm32_hash_cra_sha1_init(struct crypto_tfm *tfm) +{ + return stm32_hash_cra_init_algs(tfm, "sha1"); +} + +static int stm32_hash_cra_sha224_init(struct crypto_tfm *tfm) +{ + return stm32_hash_cra_init_algs(tfm, "sha224"); +} + +static int stm32_hash_cra_sha256_init(struct crypto_tfm *tfm) +{ + return stm32_hash_cra_init_algs(tfm, "sha256"); +} + +static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id) +{ + struct stm32_hash_dev *hdev = dev_id; + int err; + + if (HASH_FLAGS_CPU & hdev->flags) { + if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { + hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; + goto finish; + } + } else if (HASH_FLAGS_DMA_READY & hdev->flags) { + if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) { + hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE; + goto finish; + } + } + + return IRQ_HANDLED; + +finish: + /*Finish current request */ + stm32_hash_finish_req(hdev->req, err); + + return IRQ_HANDLED; +} + +static irqreturn_t stm32_hash_irq_handler(int irq, void *dev_id) +{ + struct stm32_hash_dev *hdev = dev_id; + u32 reg; + + reg = stm32_hash_read(hdev, HASH_SR); + if (reg & HASH_SR_OUTPUT_READY) { + reg &= ~HASH_SR_OUTPUT_READY; + stm32_hash_write(hdev, HASH_SR, reg); + hdev->flags |= HASH_FLAGS_OUTPUT_READY; + return IRQ_WAKE_THREAD; + } + + return IRQ_NONE; +} + +static struct ahash_alg algs_md5_sha1[] = { + { + .init = stm32_hash_init, + .update = stm32_hash_update, + .final = stm32_hash_final, + .finup = stm32_hash_finup, + .digest = stm32_hash_digest, + .export = stm32_hash_export, + .import = stm32_hash_import, + .halg = { + .digestsize = MD5_DIGEST_SIZE, + .statesize = sizeof(struct stm32_hash_request_ctx), + .base = { + .cra_name = "md5", + .cra_driver_name = "stm32-md5", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct stm32_hash_ctx), + .cra_alignmask = 3, + .cra_init = stm32_hash_cra_init, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = stm32_hash_init, + .update = stm32_hash_update, + .final = stm32_hash_final, + .finup = stm32_hash_finup, + .digest = stm32_hash_digest, + .export = stm32_hash_export, + .import = stm32_hash_import, + .setkey = stm32_hash_setkey, + .halg = { + .digestsize = MD5_DIGEST_SIZE, + .statesize = sizeof(struct stm32_hash_request_ctx), + .base = { + .cra_name = "hmac(md5)", + .cra_driver_name = "stm32-hmac-md5", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct stm32_hash_ctx), + .cra_alignmask = 3, + .cra_init = stm32_hash_cra_md5_init, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = stm32_hash_init, + .update = stm32_hash_update, + .final = stm32_hash_final, + .finup = stm32_hash_finup, + .digest = stm32_hash_digest, + .export = stm32_hash_export, + .import = stm32_hash_import, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct stm32_hash_request_ctx), + .base = { + .cra_name = "sha1", + .cra_driver_name = "stm32-sha1", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct stm32_hash_ctx), + .cra_alignmask = 3, + .cra_init = stm32_hash_cra_init, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = stm32_hash_init, + .update = stm32_hash_update, + .final = stm32_hash_final, + .finup = stm32_hash_finup, + .digest = stm32_hash_digest, + .export = stm32_hash_export, + .import = stm32_hash_import, + .setkey = stm32_hash_setkey, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct stm32_hash_request_ctx), + .base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "stm32-hmac-sha1", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct stm32_hash_ctx), + .cra_alignmask = 3, + .cra_init = stm32_hash_cra_sha1_init, + .cra_module = THIS_MODULE, + } + } + }, +}; + +static struct ahash_alg algs_sha224_sha256[] = { + { + .init = stm32_hash_init, + .update = stm32_hash_update, + .final = stm32_hash_final, + .finup = stm32_hash_finup, + .digest = stm32_hash_digest, + .export = stm32_hash_export, + .import = stm32_hash_import, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct stm32_hash_request_ctx), + .base = { + .cra_name = "sha224", + .cra_driver_name = "stm32-sha224", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct stm32_hash_ctx), + .cra_alignmask = 3, + .cra_init = stm32_hash_cra_init, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = stm32_hash_init, + .update = stm32_hash_update, + .final = stm32_hash_final, + .finup = stm32_hash_finup, + .digest = stm32_hash_digest, + .setkey = stm32_hash_setkey, + .export = stm32_hash_export, + .import = stm32_hash_import, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct stm32_hash_request_ctx), + .base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "stm32-hmac-sha224", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct stm32_hash_ctx), + .cra_alignmask = 3, + .cra_init = stm32_hash_cra_sha224_init, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = stm32_hash_init, + .update = stm32_hash_update, + .final = stm32_hash_final, + .finup = stm32_hash_finup, + .digest = stm32_hash_digest, + .export = stm32_hash_export, + .import = stm32_hash_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct stm32_hash_request_ctx), + .base = { + .cra_name = "sha256", + .cra_driver_name = "stm32-sha256", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct stm32_hash_ctx), + .cra_alignmask = 3, + .cra_init = stm32_hash_cra_init, + .cra_module = THIS_MODULE, + } + } + }, + { + .init = stm32_hash_init, + .update = stm32_hash_update, + .final = stm32_hash_final, + .finup = stm32_hash_finup, + .digest = stm32_hash_digest, + .export = stm32_hash_export, + .import = stm32_hash_import, + .setkey = stm32_hash_setkey, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct stm32_hash_request_ctx), + .base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "stm32-hmac-sha256", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct stm32_hash_ctx), + .cra_alignmask = 3, + .cra_init = stm32_hash_cra_sha256_init, + .cra_module = THIS_MODULE, + } + } + }, +}; + +static int stm32_hash_register_algs(struct stm32_hash_dev *hdev) +{ + unsigned int i, j; + int err; + + for (i = 0; i < hdev->pdata->algs_info_size; i++) { + for (j = 0; j < hdev->pdata->algs_info[i].size; j++) { + err = crypto_register_ahash( + &hdev->pdata->algs_info[i].algs_list[j]); + if (err) + goto err_algs; + } + } + + return 0; +err_algs: + dev_err(hdev->dev, "Algo %d : %d failed\n", i, j); + for (; i--; ) { + for (; j--;) + crypto_unregister_ahash( + &hdev->pdata->algs_info[i].algs_list[j]); + } + + return err; +} + +static int stm32_hash_unregister_algs(struct stm32_hash_dev *hdev) +{ + unsigned int i, j; + + for (i = 0; i < hdev->pdata->algs_info_size; i++) { + for (j = 0; j < hdev->pdata->algs_info[i].size; j++) + crypto_unregister_ahash( + &hdev->pdata->algs_info[i].algs_list[j]); + } + + return 0; +} + +static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f4[] = { + { + .algs_list = algs_md5_sha1, + .size = ARRAY_SIZE(algs_md5_sha1), + }, +}; + +static const struct stm32_hash_pdata stm32_hash_pdata_stm32f4 = { + .algs_info = stm32_hash_algs_info_stm32f4, + .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32f4), +}; + +static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f7[] = { + { + .algs_list = algs_md5_sha1, + .size = ARRAY_SIZE(algs_md5_sha1), + }, + { + .algs_list = algs_sha224_sha256, + .size = ARRAY_SIZE(algs_sha224_sha256), + }, +}; + +static const struct stm32_hash_pdata stm32_hash_pdata_stm32f7 = { + .algs_info = stm32_hash_algs_info_stm32f7, + .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32f7), +}; + +static const struct of_device_id stm32_hash_of_match[] = { + { + .compatible = "st,stm32f456-hash", + .data = &stm32_hash_pdata_stm32f4, + }, + { + .compatible = "st,stm32f756-hash", + .data = &stm32_hash_pdata_stm32f7, + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, stm32_hash_of_match); + +static int stm32_hash_get_of_match(struct stm32_hash_dev *hdev, + struct device *dev) +{ + const struct of_device_id *match; + int err; + + match = of_match_device(stm32_hash_of_match, dev); + if (!match) { + dev_err(dev, "no compatible OF match\n"); + return -EINVAL; + } + + err = of_property_read_u32(dev->of_node, "dma-maxburst", + &hdev->dma_maxburst); + + hdev->pdata = match->data; + + return err; +} + +static int stm32_hash_probe(struct platform_device *pdev) +{ + struct stm32_hash_dev *hdev; + struct device *dev = &pdev->dev; + struct resource *res; + int ret, irq; + + hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL); + if (!hdev) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + hdev->io_base = devm_ioremap_resource(dev, res); + if (IS_ERR(hdev->io_base)) + return PTR_ERR(hdev->io_base); + + hdev->phys_base = res->start; + + ret = stm32_hash_get_of_match(hdev, dev); + if (ret) + return ret; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "Cannot get IRQ resource\n"); + return irq; + } + + ret = devm_request_threaded_irq(dev, irq, stm32_hash_irq_handler, + stm32_hash_irq_thread, IRQF_ONESHOT, + dev_name(dev), hdev); + if (ret) { + dev_err(dev, "Cannot grab IRQ\n"); + return ret; + } + + hdev->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(hdev->clk)) { + dev_err(dev, "failed to get clock for hash (%lu)\n", + PTR_ERR(hdev->clk)); + return PTR_ERR(hdev->clk); + } + + ret = clk_prepare_enable(hdev->clk); + if (ret) { + dev_err(dev, "failed to enable hash clock (%d)\n", ret); + return ret; + } + + hdev->rst = devm_reset_control_get(&pdev->dev, NULL); + if (!IS_ERR(hdev->rst)) { + reset_control_assert(hdev->rst); + udelay(2); + reset_control_deassert(hdev->rst); + } + + hdev->dev = dev; + + platform_set_drvdata(pdev, hdev); + + ret = stm32_hash_dma_init(hdev); + if (ret) + dev_dbg(dev, "DMA mode not available\n"); + + spin_lock(&stm32_hash.lock); + list_add_tail(&hdev->list, &stm32_hash.dev_list); + spin_unlock(&stm32_hash.lock); + + /* Initialize crypto engine */ + hdev->engine = crypto_engine_alloc_init(dev, 1); + if (!hdev->engine) { + ret = -ENOMEM; + goto err_engine; + } + + hdev->engine->prepare_hash_request = stm32_hash_prepare_req; + hdev->engine->hash_one_request = stm32_hash_one_request; + + ret = crypto_engine_start(hdev->engine); + if (ret) + goto err_engine_start; + + hdev->dma_mode = stm32_hash_read(hdev, HASH_HWCFGR); + + /* Register algos */ + ret = stm32_hash_register_algs(hdev); + if (ret) + goto err_algs; + + dev_info(dev, "Init HASH done HW ver %x DMA mode %u\n", + stm32_hash_read(hdev, HASH_VER), hdev->dma_mode); + + return 0; + +err_algs: +err_engine_start: + crypto_engine_exit(hdev->engine); +err_engine: + spin_lock(&stm32_hash.lock); + list_del(&hdev->list); + spin_unlock(&stm32_hash.lock); + + if (hdev->dma_lch) + dma_release_channel(hdev->dma_lch); + + clk_disable_unprepare(hdev->clk); + + return ret; +} + +static int stm32_hash_remove(struct platform_device *pdev) +{ + static struct stm32_hash_dev *hdev; + + hdev = platform_get_drvdata(pdev); + if (!hdev) + return -ENODEV; + + stm32_hash_unregister_algs(hdev); + + crypto_engine_exit(hdev->engine); + + spin_lock(&stm32_hash.lock); + list_del(&hdev->list); + spin_unlock(&stm32_hash.lock); + + if (hdev->dma_lch) + dma_release_channel(hdev->dma_lch); + + clk_disable_unprepare(hdev->clk); + + return 0; +} + +static struct platform_driver stm32_hash_driver = { + .probe = stm32_hash_probe, + .remove = stm32_hash_remove, + .driver = { + .name = "stm32-hash", + .of_match_table = stm32_hash_of_match, + } +}; + +module_platform_driver(stm32_hash_driver); + +MODULE_DESCRIPTION("STM32 SHA1/224/256 & MD5 (HMAC) hw accelerator driver"); +MODULE_AUTHOR("Lionel Debieve <lionel.debieve@st.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c index ec83b1e6bfe8..090582baecfe 100644 --- a/drivers/crypto/stm32/stm32_crc32.c +++ b/drivers/crypto/stm32/stm32_crc32.c @@ -107,12 +107,12 @@ static int stm32_crc_init(struct shash_desc *desc) spin_unlock_bh(&crc_list.lock); /* Reset, set key, poly and configure in bit reverse mode */ - writel(bitrev32(mctx->key), ctx->crc->regs + CRC_INIT); - writel(bitrev32(mctx->poly), ctx->crc->regs + CRC_POL); - writel(CRC_CR_RESET | CRC_CR_REVERSE, ctx->crc->regs + CRC_CR); + writel_relaxed(bitrev32(mctx->key), ctx->crc->regs + CRC_INIT); + writel_relaxed(bitrev32(mctx->poly), ctx->crc->regs + CRC_POL); + writel_relaxed(CRC_CR_RESET | CRC_CR_REVERSE, ctx->crc->regs + CRC_CR); /* Store partial result */ - ctx->partial = readl(ctx->crc->regs + CRC_DR); + ctx->partial = readl_relaxed(ctx->crc->regs + CRC_DR); ctx->crc->nb_pending_bytes = 0; return 0; @@ -135,7 +135,8 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8, if (crc->nb_pending_bytes == sizeof(u32)) { /* Process completed pending data */ - writel(*(u32 *)crc->pending_data, crc->regs + CRC_DR); + writel_relaxed(*(u32 *)crc->pending_data, + crc->regs + CRC_DR); crc->nb_pending_bytes = 0; } } @@ -143,10 +144,10 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8, d32 = (u32 *)d8; for (i = 0; i < length >> 2; i++) /* Process 32 bits data */ - writel(*(d32++), crc->regs + CRC_DR); + writel_relaxed(*(d32++), crc->regs + CRC_DR); /* Store partial result */ - ctx->partial = readl(crc->regs + CRC_DR); + ctx->partial = readl_relaxed(crc->regs + CRC_DR); /* Check for pending data (non 32 bits) */ length &= 3; @@ -295,7 +296,7 @@ static int stm32_crc_remove(struct platform_device *pdev) list_del(&crc->list); spin_unlock(&crc_list.lock); - crypto_unregister_shash(algs); + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); clk_disable_unprepare(crc->clk); diff --git a/drivers/crypto/sunxi-ss/Makefile b/drivers/crypto/sunxi-ss/Makefile index 8f4c7a273141..ccb893219079 100644 --- a/drivers/crypto/sunxi-ss/Makefile +++ b/drivers/crypto/sunxi-ss/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sun4i-ss.o sun4i-ss-y += sun4i-ss-core.o sun4i-ss-hash.o sun4i-ss-cipher.o +sun4i-ss-$(CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG) += sun4i-ss-prng.o diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/sunxi-ss/sun4i-ss-core.c index 02ad8256e900..1547cbe13dc2 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-core.c @@ -213,6 +213,23 @@ static struct sun4i_ss_alg_template ss_algs[] = { } } }, +#ifdef CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG +{ + .type = CRYPTO_ALG_TYPE_RNG, + .alg.rng = { + .base = { + .cra_name = "stdrng", + .cra_driver_name = "sun4i_ss_rng", + .cra_priority = 300, + .cra_ctxsize = 0, + .cra_module = THIS_MODULE, + }, + .generate = sun4i_ss_prng_generate, + .seed = sun4i_ss_prng_seed, + .seedsize = SS_SEED_LEN / BITS_PER_BYTE, + } +}, +#endif }; static int sun4i_ss_probe(struct platform_device *pdev) @@ -355,6 +372,13 @@ static int sun4i_ss_probe(struct platform_device *pdev) goto error_alg; } break; + case CRYPTO_ALG_TYPE_RNG: + err = crypto_register_rng(&ss_algs[i].alg.rng); + if (err) { + dev_err(ss->dev, "Fail to register %s\n", + ss_algs[i].alg.rng.base.cra_name); + } + break; } } platform_set_drvdata(pdev, ss); @@ -369,6 +393,9 @@ error_alg: case CRYPTO_ALG_TYPE_AHASH: crypto_unregister_ahash(&ss_algs[i].alg.hash); break; + case CRYPTO_ALG_TYPE_RNG: + crypto_unregister_rng(&ss_algs[i].alg.rng); + break; } } if (ss->reset) @@ -393,6 +420,9 @@ static int sun4i_ss_remove(struct platform_device *pdev) case CRYPTO_ALG_TYPE_AHASH: crypto_unregister_ahash(&ss_algs[i].alg.hash); break; + case CRYPTO_ALG_TYPE_RNG: + crypto_unregister_rng(&ss_algs[i].alg.rng); + break; } } diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-prng.c b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c new file mode 100644 index 000000000000..0d01d1624252 --- /dev/null +++ b/drivers/crypto/sunxi-ss/sun4i-ss-prng.c @@ -0,0 +1,56 @@ +#include "sun4i-ss.h" + +int sun4i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed, + unsigned int slen) +{ + struct sun4i_ss_alg_template *algt; + struct rng_alg *alg = crypto_rng_alg(tfm); + + algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng); + memcpy(algt->ss->seed, seed, slen); + + return 0; +} + +int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int dlen) +{ + struct sun4i_ss_alg_template *algt; + struct rng_alg *alg = crypto_rng_alg(tfm); + int i; + u32 v; + u32 *data = (u32 *)dst; + const u32 mode = SS_OP_PRNG | SS_PRNG_CONTINUE | SS_ENABLED; + size_t len; + struct sun4i_ss_ctx *ss; + unsigned int todo = (dlen / 4) * 4; + + algt = container_of(alg, struct sun4i_ss_alg_template, alg.rng); + ss = algt->ss; + + spin_lock(&ss->slock); + + writel(mode, ss->base + SS_CTL); + + while (todo > 0) { + /* write the seed */ + for (i = 0; i < SS_SEED_LEN / BITS_PER_LONG; i++) + writel(ss->seed[i], ss->base + SS_KEY0 + i * 4); + + /* Read the random data */ + len = min_t(size_t, SS_DATA_LEN / BITS_PER_BYTE, todo); + readsl(ss->base + SS_TXFIFO, data, len / 4); + data += len / 4; + todo -= len; + + /* Update the seed */ + for (i = 0; i < SS_SEED_LEN / BITS_PER_LONG; i++) { + v = readl(ss->base + SS_KEY0 + i * 4); + ss->seed[i] = v; + } + } + + writel(0, ss->base + SS_CTL); + spin_unlock(&ss->slock); + return dlen; +} diff --git a/drivers/crypto/sunxi-ss/sun4i-ss.h b/drivers/crypto/sunxi-ss/sun4i-ss.h index a0e1efc1cb2a..f3ac90692ac6 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss.h +++ b/drivers/crypto/sunxi-ss/sun4i-ss.h @@ -32,6 +32,7 @@ #include <crypto/aes.h> #include <crypto/des.h> #include <crypto/internal/rng.h> +#include <crypto/rng.h> #define SS_CTL 0x00 #define SS_KEY0 0x04 @@ -127,6 +128,9 @@ #define SS_RXFIFO_EMP_INT_ENABLE (1 << 2) #define SS_TXFIFO_AVA_INT_ENABLE (1 << 0) +#define SS_SEED_LEN 192 +#define SS_DATA_LEN 160 + struct sun4i_ss_ctx { void __iomem *base; int irq; @@ -136,6 +140,9 @@ struct sun4i_ss_ctx { struct device *dev; struct resource *res; spinlock_t slock; /* control the use of the device */ +#ifdef CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG + u32 seed[SS_SEED_LEN / BITS_PER_LONG]; +#endif }; struct sun4i_ss_alg_template { @@ -144,6 +151,7 @@ struct sun4i_ss_alg_template { union { struct skcipher_alg crypto; struct ahash_alg hash; + struct rng_alg rng; } alg; struct sun4i_ss_ctx *ss; }; @@ -201,3 +209,6 @@ int sun4i_ss_des_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); int sun4i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); +int sun4i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int dlen); +int sun4i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed, unsigned int slen); diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c index 49defda4e03d..5035b0dc1e40 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_algs.c @@ -27,12 +27,68 @@ #include <uapi/linux/virtio_crypto.h> #include "virtio_crypto_common.h" + +struct virtio_crypto_ablkcipher_ctx { + struct virtio_crypto *vcrypto; + struct crypto_tfm *tfm; + + struct virtio_crypto_sym_session_info enc_sess_info; + struct virtio_crypto_sym_session_info dec_sess_info; +}; + +struct virtio_crypto_sym_request { + struct virtio_crypto_request base; + + /* Cipher or aead */ + uint32_t type; + struct virtio_crypto_ablkcipher_ctx *ablkcipher_ctx; + struct ablkcipher_request *ablkcipher_req; + uint8_t *iv; + /* Encryption? */ + bool encrypt; +}; + /* * The algs_lock protects the below global virtio_crypto_active_devs * and crypto algorithms registion. */ static DEFINE_MUTEX(algs_lock); static unsigned int virtio_crypto_active_devs; +static void virtio_crypto_ablkcipher_finalize_req( + struct virtio_crypto_sym_request *vc_sym_req, + struct ablkcipher_request *req, + int err); + +static void virtio_crypto_dataq_sym_callback + (struct virtio_crypto_request *vc_req, int len) +{ + struct virtio_crypto_sym_request *vc_sym_req = + container_of(vc_req, struct virtio_crypto_sym_request, base); + struct ablkcipher_request *ablk_req; + int error; + + /* Finish the encrypt or decrypt process */ + if (vc_sym_req->type == VIRTIO_CRYPTO_SYM_OP_CIPHER) { + switch (vc_req->status) { + case VIRTIO_CRYPTO_OK: + error = 0; + break; + case VIRTIO_CRYPTO_INVSESS: + case VIRTIO_CRYPTO_ERR: + error = -EINVAL; + break; + case VIRTIO_CRYPTO_BADMSG: + error = -EBADMSG; + break; + default: + error = -EIO; + break; + } + ablk_req = vc_sym_req->ablkcipher_req; + virtio_crypto_ablkcipher_finalize_req(vc_sym_req, + ablk_req, error); + } +} static u64 virtio_crypto_alg_sg_nents_length(struct scatterlist *sg) { @@ -286,13 +342,14 @@ static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm, } static int -__virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, +__virtio_crypto_ablkcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req, struct ablkcipher_request *req, struct data_queue *data_vq) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct virtio_crypto_ablkcipher_ctx *ctx = vc_sym_req->ablkcipher_ctx; + struct virtio_crypto_request *vc_req = &vc_sym_req->base; unsigned int ivsize = crypto_ablkcipher_ivsize(tfm); - struct virtio_crypto_ablkcipher_ctx *ctx = vc_req->ablkcipher_ctx; struct virtio_crypto *vcrypto = ctx->vcrypto; struct virtio_crypto_op_data_req *req_data; int src_nents, dst_nents; @@ -326,9 +383,9 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, } vc_req->req_data = req_data; - vc_req->type = VIRTIO_CRYPTO_SYM_OP_CIPHER; + vc_sym_req->type = VIRTIO_CRYPTO_SYM_OP_CIPHER; /* Head of operation */ - if (vc_req->encrypt) { + if (vc_sym_req->encrypt) { req_data->header.session_id = cpu_to_le64(ctx->enc_sess_info.session_id); req_data->header.opcode = @@ -383,7 +440,7 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, memcpy(iv, req->info, ivsize); sg_init_one(&iv_sg, iv, ivsize); sgs[num_out++] = &iv_sg; - vc_req->iv = iv; + vc_sym_req->iv = iv; /* Source data */ for (i = 0; i < src_nents; i++) @@ -421,15 +478,18 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req) { struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req); struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm); - struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); + struct virtio_crypto_sym_request *vc_sym_req = + ablkcipher_request_ctx(req); + struct virtio_crypto_request *vc_req = &vc_sym_req->base; struct virtio_crypto *vcrypto = ctx->vcrypto; /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; - vc_req->ablkcipher_ctx = ctx; - vc_req->ablkcipher_req = req; - vc_req->encrypt = true; vc_req->dataq = data_vq; + vc_req->alg_cb = virtio_crypto_dataq_sym_callback; + vc_sym_req->ablkcipher_ctx = ctx; + vc_sym_req->ablkcipher_req = req; + vc_sym_req->encrypt = true; return crypto_transfer_cipher_request_to_engine(data_vq->engine, req); } @@ -438,16 +498,18 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) { struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req); struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm); - struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); + struct virtio_crypto_sym_request *vc_sym_req = + ablkcipher_request_ctx(req); + struct virtio_crypto_request *vc_req = &vc_sym_req->base; struct virtio_crypto *vcrypto = ctx->vcrypto; /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; - vc_req->ablkcipher_ctx = ctx; - vc_req->ablkcipher_req = req; - - vc_req->encrypt = false; vc_req->dataq = data_vq; + vc_req->alg_cb = virtio_crypto_dataq_sym_callback; + vc_sym_req->ablkcipher_ctx = ctx; + vc_sym_req->ablkcipher_req = req; + vc_sym_req->encrypt = false; return crypto_transfer_cipher_request_to_engine(data_vq->engine, req); } @@ -456,7 +518,7 @@ static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm) { struct virtio_crypto_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm); - tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_request); + tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_sym_request); ctx->tfm = tfm; return 0; @@ -479,11 +541,13 @@ int virtio_crypto_ablkcipher_crypt_req( struct crypto_engine *engine, struct ablkcipher_request *req) { - struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); + struct virtio_crypto_sym_request *vc_sym_req = + ablkcipher_request_ctx(req); + struct virtio_crypto_request *vc_req = &vc_sym_req->base; struct data_queue *data_vq = vc_req->dataq; int ret; - ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq); + ret = __virtio_crypto_ablkcipher_do_req(vc_sym_req, req, data_vq); if (ret < 0) return ret; @@ -492,14 +556,15 @@ int virtio_crypto_ablkcipher_crypt_req( return 0; } -void virtio_crypto_ablkcipher_finalize_req( - struct virtio_crypto_request *vc_req, +static void virtio_crypto_ablkcipher_finalize_req( + struct virtio_crypto_sym_request *vc_sym_req, struct ablkcipher_request *req, int err) { - crypto_finalize_cipher_request(vc_req->dataq->engine, req, err); - - virtcrypto_clear_request(vc_req); + crypto_finalize_cipher_request(vc_sym_req->base.dataq->engine, + req, err); + kzfree(vc_sym_req->iv); + virtcrypto_clear_request(&vc_sym_req->base); } static struct crypto_alg virtio_crypto_algs[] = { { diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index da6d8c0ea407..e976539a05d9 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -83,26 +83,16 @@ struct virtio_crypto_sym_session_info { __u64 session_id; }; -struct virtio_crypto_ablkcipher_ctx { - struct virtio_crypto *vcrypto; - struct crypto_tfm *tfm; - - struct virtio_crypto_sym_session_info enc_sess_info; - struct virtio_crypto_sym_session_info dec_sess_info; -}; +struct virtio_crypto_request; +typedef void (*virtio_crypto_data_callback) + (struct virtio_crypto_request *vc_req, int len); struct virtio_crypto_request { - /* Cipher or aead */ - uint32_t type; uint8_t status; - struct virtio_crypto_ablkcipher_ctx *ablkcipher_ctx; - struct ablkcipher_request *ablkcipher_req; struct virtio_crypto_op_data_req *req_data; struct scatterlist **sgs; - uint8_t *iv; - /* Encryption? */ - bool encrypt; struct data_queue *dataq; + virtio_crypto_data_callback alg_cb; }; int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev); @@ -119,10 +109,6 @@ void virtcrypto_dev_stop(struct virtio_crypto *vcrypto); int virtio_crypto_ablkcipher_crypt_req( struct crypto_engine *engine, struct ablkcipher_request *req); -void virtio_crypto_ablkcipher_finalize_req( - struct virtio_crypto_request *vc_req, - struct ablkcipher_request *req, - int err); void virtcrypto_clear_request(struct virtio_crypto_request *vc_req); diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index a111cd72797b..ff1410a32c2b 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -29,7 +29,6 @@ void virtcrypto_clear_request(struct virtio_crypto_request *vc_req) { if (vc_req) { - kzfree(vc_req->iv); kzfree(vc_req->req_data); kfree(vc_req->sgs); } @@ -41,40 +40,18 @@ static void virtcrypto_dataq_callback(struct virtqueue *vq) struct virtio_crypto_request *vc_req; unsigned long flags; unsigned int len; - struct ablkcipher_request *ablk_req; - int error; unsigned int qid = vq->index; spin_lock_irqsave(&vcrypto->data_vq[qid].lock, flags); do { virtqueue_disable_cb(vq); while ((vc_req = virtqueue_get_buf(vq, &len)) != NULL) { - if (vc_req->type == VIRTIO_CRYPTO_SYM_OP_CIPHER) { - switch (vc_req->status) { - case VIRTIO_CRYPTO_OK: - error = 0; - break; - case VIRTIO_CRYPTO_INVSESS: - case VIRTIO_CRYPTO_ERR: - error = -EINVAL; - break; - case VIRTIO_CRYPTO_BADMSG: - error = -EBADMSG; - break; - default: - error = -EIO; - break; - } - ablk_req = vc_req->ablkcipher_req; - - spin_unlock_irqrestore( - &vcrypto->data_vq[qid].lock, flags); - /* Finish the encrypt or decrypt process */ - virtio_crypto_ablkcipher_finalize_req(vc_req, - ablk_req, error); - spin_lock_irqsave( - &vcrypto->data_vq[qid].lock, flags); - } + spin_unlock_irqrestore( + &vcrypto->data_vq[qid].lock, flags); + if (vc_req->alg_cb) + vc_req->alg_cb(vc_req, len); + spin_lock_irqsave( + &vcrypto->data_vq[qid].lock, flags); } } while (!virtqueue_enable_cb(vq)); spin_unlock_irqrestore(&vcrypto->data_vq[qid].lock, flags); @@ -270,7 +247,7 @@ static int virtcrypto_update_status(struct virtio_crypto *vcrypto) return -EPERM; } - dev_info(&vcrypto->vdev->dev, "Accelerator is ready\n"); + dev_info(&vcrypto->vdev->dev, "Accelerator device is ready\n"); } else { virtcrypto_dev_stop(vcrypto); dev_info(&vcrypto->vdev->dev, "Accelerator is not ready\n"); diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 9c26d9e8dbea..17d84217dd76 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -104,8 +104,7 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, pagefault_enable(); preempt_enable(); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); + crypto_xor_cpy(dst, keystream, src, nbytes); crypto_inc(ctrblk, AES_BLOCK_SIZE); } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cdf6b1e12460..fa17e5452796 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -758,9 +758,8 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, int i, r; /* xor whitening with sector number */ - memcpy(buf, tcw->whitening, TCW_WHITENING_SIZE); - crypto_xor(buf, (u8 *)§or, 8); - crypto_xor(&buf[8], (u8 *)§or, 8); + crypto_xor_cpy(buf, tcw->whitening, (u8 *)§or, 8); + crypto_xor_cpy(&buf[8], tcw->whitening + 8, (u8 *)§or, 8); /* calculate crc32 for every 32bit part and xor it */ desc->tfm = tcw->crc32_tfm; @@ -805,10 +804,10 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, } /* Calculate IV */ - memcpy(iv, tcw->iv_seed, cc->iv_size); - crypto_xor(iv, (u8 *)§or, 8); + crypto_xor_cpy(iv, tcw->iv_seed, (u8 *)§or, 8); if (cc->iv_size > 8) - crypto_xor(&iv[8], (u8 *)§or, cc->iv_size - 8); + crypto_xor_cpy(&iv[8], tcw->iv_seed + 8, (u8 *)§or, + cc->iv_size - 8); return r; } diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 436c4c2683c7..e3cebf640c00 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -192,7 +192,7 @@ static inline unsigned int crypto_queue_len(struct crypto_queue *queue) } void crypto_inc(u8 *a, unsigned int size); -void __crypto_xor(u8 *dst, const u8 *src, unsigned int size); +void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int size); static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size) { @@ -207,7 +207,26 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size) size -= sizeof(unsigned long); } } else { - __crypto_xor(dst, src, size); + __crypto_xor(dst, dst, src, size); + } +} + +static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2, + unsigned int size) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + __builtin_constant_p(size) && + (size % sizeof(unsigned long)) == 0) { + unsigned long *d = (unsigned long *)dst; + unsigned long *s1 = (unsigned long *)src1; + unsigned long *s2 = (unsigned long *)src2; + + while (size > 0) { + *d++ = *s1++ ^ *s2++; + size -= sizeof(unsigned long); + } + } else { + __crypto_xor(dst, src1, src2, size); } } diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index e2b9c6fe2714..75ec9c662268 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -20,6 +20,9 @@ #include <linux/types.h> #include <net/sock.h> +#include <crypto/aead.h> +#include <crypto/skcipher.h> + #define ALG_MAX_PAGES 16 struct crypto_async_request; @@ -68,6 +71,99 @@ struct af_alg_sgl { unsigned int npages; }; +/* TX SGL entry */ +struct af_alg_tsgl { + struct list_head list; + unsigned int cur; /* Last processed SG entry */ + struct scatterlist sg[0]; /* Array of SGs forming the SGL */ +}; + +#define MAX_SGL_ENTS ((4096 - sizeof(struct af_alg_tsgl)) / \ + sizeof(struct scatterlist) - 1) + +/* RX SGL entry */ +struct af_alg_rsgl { + struct af_alg_sgl sgl; + struct list_head list; + size_t sg_num_bytes; /* Bytes of data in that SGL */ +}; + +/** + * struct af_alg_async_req - definition of crypto request + * @iocb: IOCB for AIO operations + * @sk: Socket the request is associated with + * @first_rsgl: First RX SG + * @last_rsgl: Pointer to last RX SG + * @rsgl_list: Track RX SGs + * @tsgl: Private, per request TX SGL of buffers to process + * @tsgl_entries: Number of entries in priv. TX SGL + * @outlen: Number of output bytes generated by crypto op + * @areqlen: Length of this data structure + * @cra_u: Cipher request + */ +struct af_alg_async_req { + struct kiocb *iocb; + struct sock *sk; + + struct af_alg_rsgl first_rsgl; + struct af_alg_rsgl *last_rsgl; + struct list_head rsgl_list; + + struct scatterlist *tsgl; + unsigned int tsgl_entries; + + unsigned int outlen; + unsigned int areqlen; + + union { + struct aead_request aead_req; + struct skcipher_request skcipher_req; + } cra_u; + + /* req ctx trails this struct */ +}; + +/** + * struct af_alg_ctx - definition of the crypto context + * + * The crypto context tracks the input data during the lifetime of an AF_ALG + * socket. + * + * @tsgl_list: Link to TX SGL + * @iv: IV for cipher operation + * @aead_assoclen: Length of AAD for AEAD cipher operations + * @completion: Work queue for synchronous operation + * @used: TX bytes sent to kernel. This variable is used to + * ensure that user space cannot cause the kernel + * to allocate too much memory in sendmsg operation. + * @rcvused: Total RX bytes to be filled by kernel. This variable + * is used to ensure user space cannot cause the kernel + * to allocate too much memory in a recvmsg operation. + * @more: More data to be expected from user space? + * @merge: Shall new data from user space be merged into existing + * SG? + * @enc: Cryptographic operation to be performed when + * recvmsg is invoked. + * @len: Length of memory allocated for this data structure. + */ +struct af_alg_ctx { + struct list_head tsgl_list; + + void *iv; + size_t aead_assoclen; + + struct af_alg_completion completion; + + size_t used; + size_t rcvused; + + bool more; + bool merge; + bool enc; + + unsigned int len; +}; + int af_alg_register_type(const struct af_alg_type *type); int af_alg_unregister_type(const struct af_alg_type *type); @@ -94,4 +190,78 @@ static inline void af_alg_init_completion(struct af_alg_completion *completion) init_completion(&completion->completion); } +/** + * Size of available buffer for sending data from user space to kernel. + * + * @sk socket of connection to user space + * @return number of bytes still available + */ +static inline int af_alg_sndbuf(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + + return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) - + ctx->used, 0); +} + +/** + * Can the send buffer still be written to? + * + * @sk socket of connection to user space + * @return true => writable, false => not writable + */ +static inline bool af_alg_writable(struct sock *sk) +{ + return PAGE_SIZE <= af_alg_sndbuf(sk); +} + +/** + * Size of available buffer used by kernel for the RX user space operation. + * + * @sk socket of connection to user space + * @return number of bytes still available + */ +static inline int af_alg_rcvbuf(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + + return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) - + ctx->rcvused, 0); +} + +/** + * Can the RX buffer still be written to? + * + * @sk socket of connection to user space + * @return true => writable, false => not writable + */ +static inline bool af_alg_readable(struct sock *sk) +{ + return PAGE_SIZE <= af_alg_rcvbuf(sk); +} + +int af_alg_alloc_tsgl(struct sock *sk); +unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset); +void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, + size_t dst_offset); +void af_alg_free_areq_sgls(struct af_alg_async_req *areq); +int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags); +void af_alg_wmem_wakeup(struct sock *sk); +int af_alg_wait_for_data(struct sock *sk, unsigned flags); +void af_alg_data_wakeup(struct sock *sk); +int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, + unsigned int ivsize); +ssize_t af_alg_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int flags); +void af_alg_async_cb(struct crypto_async_request *_req, int err); +unsigned int af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait); +struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk, + unsigned int areqlen); +int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, + struct af_alg_async_req *areq, size_t maxsize, + size_t *outlen); + #endif /* _CRYPTO_IF_ALG_H */ diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h index 479a0078f0f7..805686ba2be4 100644 --- a/include/crypto/internal/akcipher.h +++ b/include/crypto/internal/akcipher.h @@ -38,6 +38,12 @@ static inline void *akcipher_request_ctx(struct akcipher_request *req) return req->__ctx; } +static inline void akcipher_set_reqsize(struct crypto_akcipher *akcipher, + unsigned int reqsize) +{ + crypto_akcipher_alg(akcipher)->reqsize = reqsize; +} + static inline void *akcipher_tfm_ctx(struct crypto_akcipher *tfm) { return tfm->base.__crt_ctx; diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f6d9af3efa45..f0b44c16e88f 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -76,6 +76,8 @@ static inline int crypto_ahash_walk_last(struct crypto_hash_walk *walk) int crypto_register_ahash(struct ahash_alg *alg); int crypto_unregister_ahash(struct ahash_alg *alg); +int crypto_register_ahashes(struct ahash_alg *algs, int count); +void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); void ahash_free_instance(struct crypto_instance *inst); diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 2133d17b7156..1bde0a6514fa 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -145,6 +145,16 @@ static inline struct crypto_kpp *crypto_kpp_reqtfm(struct kpp_request *req) return __crypto_kpp_tfm(req->base.tfm); } +static inline u32 crypto_kpp_get_flags(struct crypto_kpp *tfm) +{ + return crypto_tfm_get_flags(crypto_kpp_tfm(tfm)); +} + +static inline void crypto_kpp_set_flags(struct crypto_kpp *tfm, u32 flags) +{ + crypto_tfm_set_flags(crypto_kpp_tfm(tfm), flags); +} + /** * crypto_free_kpp() - free KPP tfm handle * diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 3285c944194a..7e9c991c95e0 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> * Author: Gary R Hook <gary.hook@amd.com> @@ -20,12 +20,10 @@ #include <crypto/aes.h> #include <crypto/sha.h> - struct ccp_device; struct ccp_cmd; -#if defined(CONFIG_CRYPTO_DEV_CCP_DD) || \ - defined(CONFIG_CRYPTO_DEV_CCP_DD_MODULE) +#if defined(CONFIG_CRYPTO_DEV_SP_CCP) /** * ccp_present - check if a CCP device is present @@ -71,7 +69,7 @@ unsigned int ccp_version(void); */ int ccp_enqueue_cmd(struct ccp_cmd *cmd); -#else /* CONFIG_CRYPTO_DEV_CCP_DD is not enabled */ +#else /* CONFIG_CRYPTO_DEV_CCP_SP_DEV is not enabled */ static inline int ccp_present(void) { @@ -88,7 +86,7 @@ static inline int ccp_enqueue_cmd(struct ccp_cmd *cmd) return -ENODEV; } -#endif /* CONFIG_CRYPTO_DEV_CCP_DD */ +#endif /* CONFIG_CRYPTO_DEV_SP_CCP */ /***** AES engine *****/ @@ -231,6 +229,7 @@ enum ccp_xts_aes_unit_size { * AES operation the new IV overwrites the old IV. */ struct ccp_xts_aes_engine { + enum ccp_aes_type type; enum ccp_aes_action action; enum ccp_xts_aes_unit_size unit_size; diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index 93336502af08..57fd45ab7af1 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -176,8 +176,8 @@ extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("adds %1, %4, %5\n" \ "adc %0, %2, %3" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "%r" ((USItype)(ah)), \ "rI" ((USItype)(bh)), \ "%r" ((USItype)(al)), \ @@ -185,15 +185,15 @@ extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subs %1, %4, %5\n" \ "sbc %0, %2, %3" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ + : "=r" (sh), \ + "=&r" (sl) \ : "r" ((USItype)(ah)), \ "rI" ((USItype)(bh)), \ "r" ((USItype)(al)), \ "rI" ((USItype)(bl))) #if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ #define umul_ppmm(xh, xl, a, b) \ - __asm__ ("%@ Inlined umul_ppmm\n" \ + __asm__ ("@ Inlined umul_ppmm\n" \ "mov %|r0, %2, lsr #16 @ AAAA\n" \ "mov %|r2, %3, lsr #16 @ BBBB\n" \ "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ @@ -206,19 +206,19 @@ extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); "addcs %|r2, %|r2, #65536\n" \ "adds %1, %|r1, %|r0, lsl #16\n" \ "adc %0, %|r2, %|r0, lsr #16" \ - : "=&r" ((USItype)(xh)), \ - "=r" ((USItype)(xl)) \ + : "=&r" (xh), \ + "=r" (xl) \ : "r" ((USItype)(a)), \ "r" ((USItype)(b)) \ : "r0", "r1", "r2") #else #define umul_ppmm(xh, xl, a, b) \ - __asm__ ("%@ Inlined umul_ppmm\n" \ - "umull %r1, %r0, %r2, %r3" \ - : "=&r" ((USItype)(xh)), \ - "=&r" ((USItype)(xl)) \ + __asm__ ("@ Inlined umul_ppmm\n" \ + "umull %1, %0, %2, %3" \ + : "=&r" (xh), \ + "=&r" (xl) \ : "r" ((USItype)(a)), \ - "r" ((USItype)(b)) \ + "r" ((USItype)(b)) \ : "r0", "r1") #endif #define UMUL_TIME 20 |