diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-02 12:57:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-02 12:57:42 -0700 |
commit | a20acf99f75e49271381d65db097c9763060a1e8 (patch) | |
tree | 3cf661125e86b7625171b96b885bf5395f62e684 /arch/sparc/crypto | |
parent | 437589a74b6a590d175f86cf9f7b2efcee7765e7 (diff) | |
parent | 42a4172b6ebb4a419085c6caee7c135e51cae5ea (diff) | |
download | linux-a20acf99f75e49271381d65db097c9763060a1e8.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next
Pull sparc updates from David Miller:
"Largely this is simply adding support for the Niagara 4 cpu.
Major areas are perf events (chip now supports 4 counters and can
monitor any event on each counter), crypto (opcodes are availble for
sha1, sha256, sha512, md5, crc32c, AES, DES, CAMELLIA, and Kasumi
although the last is unsupported since we lack a generic crypto layer
Kasumi implementation), and an optimized memcpy.
Finally some cleanups by Peter Senna Tschudin."
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next: (47 commits)
sparc64: Fix trailing whitespace in NG4 memcpy.
sparc64: Fix comment type in NG4 copy from user.
sparc64: Add SPARC-T4 optimized memcpy.
drivers/sbus/char: removes unnecessary semicolon
arch/sparc/kernel/pci_sun4v.c: removes unnecessary semicolon
sparc64: Fix function argument comment in camellia_sparc64_key_expand asm.
sparc64: Fix IV handling bug in des_sparc64_cbc_decrypt
sparc64: Add auto-loading mechanism to crypto-opcode drivers.
sparc64: Add missing pr_fmt define to crypto opcode drivers.
sparc64: Adjust crypto priorities.
sparc64: Use cpu_pgsz_mask for linear kernel mapping config.
sparc64: Probe cpu page size support more portably.
sparc64: Support 2GB and 16GB page sizes for kernel linear mappings.
sparc64: Fix bugs in unrolled 256-bit loops.
sparc64: Avoid code duplication in crypto assembler.
sparc64: Unroll CTR crypt loops in AES driver.
sparc64: Unroll ECB decryption loops in AES driver.
sparc64: Unroll ECB encryption loops in AES driver.
sparc64: Add ctr mode support to AES driver.
sparc64: Move AES driver over to a methods based implementation.
...
Diffstat (limited to 'arch/sparc/crypto')
-rw-r--r-- | arch/sparc/crypto/Makefile | 25 | ||||
-rw-r--r-- | arch/sparc/crypto/aes_asm.S | 1535 | ||||
-rw-r--r-- | arch/sparc/crypto/aes_glue.c | 477 | ||||
-rw-r--r-- | arch/sparc/crypto/camellia_asm.S | 563 | ||||
-rw-r--r-- | arch/sparc/crypto/camellia_glue.c | 322 | ||||
-rw-r--r-- | arch/sparc/crypto/crc32c_asm.S | 20 | ||||
-rw-r--r-- | arch/sparc/crypto/crc32c_glue.c | 179 | ||||
-rw-r--r-- | arch/sparc/crypto/crop_devid.c | 14 | ||||
-rw-r--r-- | arch/sparc/crypto/des_asm.S | 418 | ||||
-rw-r--r-- | arch/sparc/crypto/des_glue.c | 529 | ||||
-rw-r--r-- | arch/sparc/crypto/md5_asm.S | 70 | ||||
-rw-r--r-- | arch/sparc/crypto/md5_glue.c | 188 | ||||
-rw-r--r-- | arch/sparc/crypto/opcodes.h | 99 | ||||
-rw-r--r-- | arch/sparc/crypto/sha1_asm.S | 72 | ||||
-rw-r--r-- | arch/sparc/crypto/sha1_glue.c | 183 | ||||
-rw-r--r-- | arch/sparc/crypto/sha256_asm.S | 78 | ||||
-rw-r--r-- | arch/sparc/crypto/sha256_glue.c | 241 | ||||
-rw-r--r-- | arch/sparc/crypto/sha512_asm.S | 102 | ||||
-rw-r--r-- | arch/sparc/crypto/sha512_glue.c | 226 |
19 files changed, 5341 insertions, 0 deletions
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile new file mode 100644 index 000000000000..6ae1ad5e502b --- /dev/null +++ b/arch/sparc/crypto/Makefile @@ -0,0 +1,25 @@ +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o +obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o +obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o +obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o + +obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o +obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o +obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o + +obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o + +sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o +sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o +sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o +md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o + +aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o +des-sparc64-y := des_asm.o des_glue.o crop_devid.o +camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o + +crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S new file mode 100644 index 000000000000..23f6cbb910d3 --- /dev/null +++ b/arch/sparc/crypto/aes_asm.S @@ -0,0 +1,1535 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23(KEY_BASE + 6, T0, T1, I1) + +#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ + AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ + AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \ + AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \ + AES_EROUND23(KEY_BASE + 6, T2, T3, I3) + +#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) + +#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ + AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ + AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \ + AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \ + AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3) + + /* 10 rounds */ +#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) + +#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + + /* 12 rounds */ +#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + +#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + + /* 14 rounds */ +#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ + TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ + ldd [%o0 + 0xd0], %f56; \ + ldd [%o0 + 0xd8], %f58; \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ + ldd [%o0 + 0xe0], %f60; \ + ldd [%o0 + 0xe8], %f62; \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ + AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \ + AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \ + AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \ + AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \ + AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \ + AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \ + ldd [%o0 + 0x10], %f8; \ + ldd [%o0 + 0x18], %f10; \ + AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \ + AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \ + ldd [%o0 + 0x20], %f12; \ + ldd [%o0 + 0x28], %f14; + +#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01(KEY_BASE + 6, T0, T1, I0) + +#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ + AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ + AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \ + AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \ + AES_DROUND01(KEY_BASE + 6, T2, T3, I2) + +#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) + +#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ + AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ + AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \ + AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \ + AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2) + + /* 10 rounds */ +#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) + +#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + + /* 12 rounds */ +#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + +#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + + /* 14 rounds */ +#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ + TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ + ldd [%o0 + 0x18], %f56; \ + ldd [%o0 + 0x10], %f58; \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ + ldd [%o0 + 0x08], %f60; \ + ldd [%o0 + 0x00], %f62; \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ + AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \ + AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \ + AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \ + AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \ + AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \ + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \ + ldd [%o0 + 0xd8], %f8; \ + ldd [%o0 + 0xd0], %f10; \ + AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \ + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \ + ldd [%o0 + 0xc8], %f12; \ + ldd [%o0 + 0xc0], %f14; + + .align 32 +ENTRY(aes_sparc64_key_expand) + /* %o0=input_key, %o1=output_key, %o2=key_len */ + VISEntry + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + ld [%o0 + 0x0c], %f3 + + std %f0, [%o1 + 0x00] + std %f2, [%o1 + 0x08] + add %o1, 0x10, %o1 + + cmp %o2, 24 + bl 2f + nop + + be 1f + nop + + /* 256-bit key expansion */ + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + ld [%o0 + 0x18], %f6 + ld [%o0 + 0x1c], %f7 + + std %f4, [%o1 + 0x00] + std %f6, [%o1 + 0x08] + add %o1, 0x10, %o1 + + AES_KEXPAND1(0, 6, 0x0, 8) + AES_KEXPAND2(2, 8, 10) + AES_KEXPAND0(4, 10, 12) + AES_KEXPAND2(6, 12, 14) + AES_KEXPAND1(8, 14, 0x1, 16) + AES_KEXPAND2(10, 16, 18) + AES_KEXPAND0(12, 18, 20) + AES_KEXPAND2(14, 20, 22) + AES_KEXPAND1(16, 22, 0x2, 24) + AES_KEXPAND2(18, 24, 26) + AES_KEXPAND0(20, 26, 28) + AES_KEXPAND2(22, 28, 30) + AES_KEXPAND1(24, 30, 0x3, 32) + AES_KEXPAND2(26, 32, 34) + AES_KEXPAND0(28, 34, 36) + AES_KEXPAND2(30, 36, 38) + AES_KEXPAND1(32, 38, 0x4, 40) + AES_KEXPAND2(34, 40, 42) + AES_KEXPAND0(36, 42, 44) + AES_KEXPAND2(38, 44, 46) + AES_KEXPAND1(40, 46, 0x5, 48) + AES_KEXPAND2(42, 48, 50) + AES_KEXPAND0(44, 50, 52) + AES_KEXPAND2(46, 52, 54) + AES_KEXPAND1(48, 54, 0x6, 56) + AES_KEXPAND2(50, 56, 58) + + std %f8, [%o1 + 0x00] + std %f10, [%o1 + 0x08] + std %f12, [%o1 + 0x10] + std %f14, [%o1 + 0x18] + std %f16, [%o1 + 0x20] + std %f18, [%o1 + 0x28] + std %f20, [%o1 + 0x30] + std %f22, [%o1 + 0x38] + std %f24, [%o1 + 0x40] + std %f26, [%o1 + 0x48] + std %f28, [%o1 + 0x50] + std %f30, [%o1 + 0x58] + std %f32, [%o1 + 0x60] + std %f34, [%o1 + 0x68] + std %f36, [%o1 + 0x70] + std %f38, [%o1 + 0x78] + std %f40, [%o1 + 0x80] + std %f42, [%o1 + 0x88] + std %f44, [%o1 + 0x90] + std %f46, [%o1 + 0x98] + std %f48, [%o1 + 0xa0] + std %f50, [%o1 + 0xa8] + std %f52, [%o1 + 0xb0] + std %f54, [%o1 + 0xb8] + std %f56, [%o1 + 0xc0] + ba,pt %xcc, 80f + std %f58, [%o1 + 0xc8] + +1: + /* 192-bit key expansion */ + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + + std %f4, [%o1 + 0x00] + add %o1, 0x08, %o1 + + AES_KEXPAND1(0, 4, 0x0, 6) + AES_KEXPAND2(2, 6, 8) + AES_KEXPAND2(4, 8, 10) + AES_KEXPAND1(6, 10, 0x1, 12) + AES_KEXPAND2(8, 12, 14) + AES_KEXPAND2(10, 14, 16) + AES_KEXPAND1(12, 16, 0x2, 18) + AES_KEXPAND2(14, 18, 20) + AES_KEXPAND2(16, 20, 22) + AES_KEXPAND1(18, 22, 0x3, 24) + AES_KEXPAND2(20, 24, 26) + AES_KEXPAND2(22, 26, 28) + AES_KEXPAND1(24, 28, 0x4, 30) + AES_KEXPAND2(26, 30, 32) + AES_KEXPAND2(28, 32, 34) + AES_KEXPAND1(30, 34, 0x5, 36) + AES_KEXPAND2(32, 36, 38) + AES_KEXPAND2(34, 38, 40) + AES_KEXPAND1(36, 40, 0x6, 42) + AES_KEXPAND2(38, 42, 44) + AES_KEXPAND2(40, 44, 46) + AES_KEXPAND1(42, 46, 0x7, 48) + AES_KEXPAND2(44, 48, 50) + + std %f6, [%o1 + 0x00] + std %f8, [%o1 + 0x08] + std %f10, [%o1 + 0x10] + std %f12, [%o1 + 0x18] + std %f14, [%o1 + 0x20] + std %f16, [%o1 + 0x28] + std %f18, [%o1 + 0x30] + std %f20, [%o1 + 0x38] + std %f22, [%o1 + 0x40] + std %f24, [%o1 + 0x48] + std %f26, [%o1 + 0x50] + std %f28, [%o1 + 0x58] + std %f30, [%o1 + 0x60] + std %f32, [%o1 + 0x68] + std %f34, [%o1 + 0x70] + std %f36, [%o1 + 0x78] + std %f38, [%o1 + 0x80] + std %f40, [%o1 + 0x88] + std %f42, [%o1 + 0x90] + std %f44, [%o1 + 0x98] + std %f46, [%o1 + 0xa0] + std %f48, [%o1 + 0xa8] + ba,pt %xcc, 80f + std %f50, [%o1 + 0xb0] + +2: + /* 128-bit key expansion */ + AES_KEXPAND1(0, 2, 0x0, 4) + AES_KEXPAND2(2, 4, 6) + AES_KEXPAND1(4, 6, 0x1, 8) + AES_KEXPAND2(6, 8, 10) + AES_KEXPAND1(8, 10, 0x2, 12) + AES_KEXPAND2(10, 12, 14) + AES_KEXPAND1(12, 14, 0x3, 16) + AES_KEXPAND2(14, 16, 18) + AES_KEXPAND1(16, 18, 0x4, 20) + AES_KEXPAND2(18, 20, 22) + AES_KEXPAND1(20, 22, 0x5, 24) + AES_KEXPAND2(22, 24, 26) + AES_KEXPAND1(24, 26, 0x6, 28) + AES_KEXPAND2(26, 28, 30) + AES_KEXPAND1(28, 30, 0x7, 32) + AES_KEXPAND2(30, 32, 34) + AES_KEXPAND1(32, 34, 0x8, 36) + AES_KEXPAND2(34, 36, 38) + AES_KEXPAND1(36, 38, 0x9, 40) + AES_KEXPAND2(38, 40, 42) + + std %f4, [%o1 + 0x00] + std %f6, [%o1 + 0x08] + std %f8, [%o1 + 0x10] + std %f10, [%o1 + 0x18] + std %f12, [%o1 + 0x20] + std %f14, [%o1 + 0x28] + std %f16, [%o1 + 0x30] + std %f18, [%o1 + 0x38] + std %f20, [%o1 + 0x40] + std %f22, [%o1 + 0x48] + std %f24, [%o1 + 0x50] + std %f26, [%o1 + 0x58] + std %f28, [%o1 + 0x60] + std %f30, [%o1 + 0x68] + std %f32, [%o1 + 0x70] + std %f34, [%o1 + 0x78] + std %f36, [%o1 + 0x80] + std %f38, [%o1 + 0x88] + std %f40, [%o1 + 0x90] + std %f42, [%o1 + 0x98] +80: + retl + VISExit +ENDPROC(aes_sparc64_key_expand) + + .align 32 +ENTRY(aes_sparc64_encrypt_128) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + ENCRYPT_128(12, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_encrypt_128) + + .align 32 +ENTRY(aes_sparc64_encrypt_192) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 + + + ENCRYPT_128(12, 4, 6, 0, 2) + + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(aes_sparc64_encrypt_192) + + .align 32 +ENTRY(aes_sparc64_encrypt_256) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + ldd [%o0 + 0x10], %f8 + + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + + ldd [%o0 + 0x10], %f8 + + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 + + ENCRYPT_128(12, 4, 6, 0, 2) + + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(aes_sparc64_encrypt_256) + + .align 32 +ENTRY(aes_sparc64_decrypt_128) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xa0], %f8 + ldd [%o0 + 0xa8], %f10 + ldd [%o0 + 0x98], %f12 + ldd [%o0 + 0x90], %f14 + ldd [%o0 + 0x88], %f16 + ldd [%o0 + 0x80], %f18 + ldd [%o0 + 0x78], %f20 + ldd [%o0 + 0x70], %f22 + ldd [%o0 + 0x68], %f24 + ldd [%o0 + 0x60], %f26 + ldd [%o0 + 0x58], %f28 + ldd [%o0 + 0x50], %f30 + ldd [%o0 + 0x48], %f32 + ldd [%o0 + 0x40], %f34 + ldd [%o0 + 0x38], %f36 + ldd [%o0 + 0x30], %f38 + ldd [%o0 + 0x28], %f40 + ldd [%o0 + 0x20], %f42 + ldd [%o0 + 0x18], %f44 + ldd [%o0 + 0x10], %f46 + ldd [%o0 + 0x08], %f48 + ldd [%o0 + 0x00], %f50 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + DECRYPT_128(12, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_128) + + .align 32 +ENTRY(aes_sparc64_decrypt_192) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xc0], %f8 + ldd [%o0 + 0xc8], %f10 + ldd [%o0 + 0xb8], %f12 + ldd [%o0 + 0xb0], %f14 + ldd [%o0 + 0xa8], %f16 + ldd [%o0 + 0xa0], %f18 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + ldd [%o0 + 0x98], %f20 + ldd [%o0 + 0x90], %f22 + ldd [%o0 + 0x88], %f24 + ldd [%o0 + 0x80], %f26 + DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2) + ldd [%o0 + 0x78], %f28 + ldd [%o0 + 0x70], %f30 + ldd [%o0 + 0x68], %f32 + ldd [%o0 + 0x60], %f34 + ldd [%o0 + 0x58], %f36 + ldd [%o0 + 0x50], %f38 + ldd [%o0 + 0x48], %f40 + ldd [%o0 + 0x40], %f42 + ldd [%o0 + 0x38], %f44 + ldd [%o0 + 0x30], %f46 + ldd [%o0 + 0x28], %f48 + ldd [%o0 + 0x20], %f50 + ldd [%o0 + 0x18], %f52 + ldd [%o0 + 0x10], %f54 + ldd [%o0 + 0x08], %f56 + ldd [%o0 + 0x00], %f58 + DECRYPT_128(20, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_192) + + .align 32 +ENTRY(aes_sparc64_decrypt_256) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xe0], %f8 + ldd [%o0 + 0xe8], %f10 + ldd [%o0 + 0xd8], %f12 + ldd [%o0 + 0xd0], %f14 + ldd [%o0 + 0xc8], %f16 + fxor %f8, %f4, %f4 + ldd [%o0 + 0xc0], %f18 + fxor %f10, %f6, %f6 + ldd [%o0 + 0xb8], %f20 + AES_DROUND23(12, 4, 6, 2) + ldd [%o0 + 0xb0], %f22 + AES_DROUND01(14, 4, 6, 0) + ldd [%o0 + 0xa8], %f24 + AES_DROUND23(16, 0, 2, 6) + ldd [%o0 + 0xa0], %f26 + AES_DROUND01(18, 0, 2, 4) + ldd [%o0 + 0x98], %f12 + AES_DROUND23(20, 4, 6, 2) + ldd [%o0 + 0x90], %f14 + AES_DROUND01(22, 4, 6, 0) + ldd [%o0 + 0x88], %f16 + AES_DROUND23(24, 0, 2, 6) + ldd [%o0 + 0x80], %f18 + AES_DROUND01(26, 0, 2, 4) + ldd [%o0 + 0x78], %f20 + AES_DROUND23(12, 4, 6, 2) + ldd [%o0 + 0x70], %f22 + AES_DROUND01(14, 4, 6, 0) + ldd [%o0 + 0x68], %f24 + AES_DROUND23(16, 0, 2, 6) + ldd [%o0 + 0x60], %f26 + AES_DROUND01(18, 0, 2, 4) + ldd [%o0 + 0x58], %f28 + AES_DROUND23(20, 4, 6, 2) + ldd [%o0 + 0x50], %f30 + AES_DROUND01(22, 4, 6, 0) + ldd [%o0 + 0x48], %f32 + AES_DROUND23(24, 0, 2, 6) + ldd [%o0 + 0x40], %f34 + AES_DROUND01(26, 0, 2, 4) + ldd [%o0 + 0x38], %f36 + AES_DROUND23(28, 4, 6, 2) + ldd [%o0 + 0x30], %f38 + AES_DROUND01(30, 4, 6, 0) + ldd [%o0 + 0x28], %f40 + AES_DROUND23(32, 0, 2, 6) + ldd [%o0 + 0x20], %f42 + AES_DROUND01(34, 0, 2, 4) + ldd [%o0 + 0x18], %f44 + AES_DROUND23(36, 4, 6, 2) + ldd [%o0 + 0x10], %f46 + AES_DROUND01(38, 4, 6, 0) + ldd [%o0 + 0x08], %f48 + AES_DROUND23(40, 0, 2, 6) + ldd [%o0 + 0x00], %f50 + AES_DROUND01(42, 0, 2, 4) + AES_DROUND23(44, 4, 6, 2) + AES_DROUND01(46, 4, 6, 0) + AES_DROUND23_L(48, 0, 2, 6) + AES_DROUND01_L(50, 0, 2, 4) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_256) + + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_128) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + retl + ldd [%o0 + 0xa8], %f46 +ENDPROC(aes_sparc64_load_encrypt_keys_128) + + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_192) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + retl + ldd [%o0 + 0xc8], %f54 +ENDPROC(aes_sparc64_load_encrypt_keys_192) + + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_256) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + ldd [%o0 + 0xc8], %f54 + ldd [%o0 + 0xd0], %f56 + ldd [%o0 + 0xd8], %f58 + ldd [%o0 + 0xe0], %f60 + retl + ldd [%o0 + 0xe8], %f62 +ENDPROC(aes_sparc64_load_encrypt_keys_256) + + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_128) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x98], %f8 + ldd [%o0 + 0x90], %f10 + ldd [%o0 + 0x88], %f12 + ldd [%o0 + 0x80], %f14 + ldd [%o0 + 0x78], %f16 + ldd [%o0 + 0x70], %f18 + ldd [%o0 + 0x68], %f20 + ldd [%o0 + 0x60], %f22 + ldd [%o0 + 0x58], %f24 + ldd [%o0 + 0x50], %f26 + ldd [%o0 + 0x48], %f28 + ldd [%o0 + 0x40], %f30 + ldd [%o0 + 0x38], %f32 + ldd [%o0 + 0x30], %f34 + ldd [%o0 + 0x28], %f36 + ldd [%o0 + 0x20], %f38 + ldd [%o0 + 0x18], %f40 + ldd [%o0 + 0x10], %f42 + ldd [%o0 + 0x08], %f44 + retl + ldd [%o0 + 0x00], %f46 +ENDPROC(aes_sparc64_load_decrypt_keys_128) + + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_192) + /* %o0=key */ + VISEntry + ldd [%o0 + 0xb8], %f8 + ldd [%o0 + 0xb0], %f10 + ldd [%o0 + 0xa8], %f12 + ldd [%o0 + 0xa0], %f14 + ldd [%o0 + 0x98], %f16 + ldd [%o0 + 0x90], %f18 + ldd [%o0 + 0x88], %f20 + ldd [%o0 + 0x80], %f22 + ldd [%o0 + 0x78], %f24 + ldd [%o0 + 0x70], %f26 + ldd [%o0 + 0x68], %f28 + ldd [%o0 + 0x60], %f30 + ldd [%o0 + 0x58], %f32 + ldd [%o0 + 0x50], %f34 + ldd [%o0 + 0x48], %f36 + ldd [%o0 + 0x40], %f38 + ldd [%o0 + 0x38], %f40 + ldd [%o0 + 0x30], %f42 + ldd [%o0 + 0x28], %f44 + ldd [%o0 + 0x20], %f46 + ldd [%o0 + 0x18], %f48 + ldd [%o0 + 0x10], %f50 + ldd [%o0 + 0x08], %f52 + retl + ldd [%o0 + 0x00], %f54 +ENDPROC(aes_sparc64_load_decrypt_keys_192) + + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_256) + /* %o0=key */ + VISEntry + ldd [%o0 + 0xd8], %f8 + ldd [%o0 + 0xd0], %f10 + ldd [%o0 + 0xc8], %f12 + ldd [%o0 + 0xc0], %f14 + ldd [%o0 + 0xb8], %f16 + ldd [%o0 + 0xb0], %f18 + ldd [%o0 + 0xa8], %f20 + ldd [%o0 + 0xa0], %f22 + ldd [%o0 + 0x98], %f24 + ldd [%o0 + 0x90], %f26 + ldd [%o0 + 0x88], %f28 + ldd [%o0 + 0x80], %f30 + ldd [%o0 + 0x78], %f32 + ldd [%o0 + 0x70], %f34 + ldd [%o0 + 0x68], %f36 + ldd [%o0 + 0x60], %f38 + ldd [%o0 + 0x58], %f40 + ldd [%o0 + 0x50], %f42 + ldd [%o0 + 0x48], %f44 + ldd [%o0 + 0x40], %f46 + ldd [%o0 + 0x38], %f48 + ldd [%o0 + 0x30], %f50 + ldd [%o0 + 0x28], %f52 + ldd [%o0 + 0x20], %f54 + ldd [%o0 + 0x18], %f56 + ldd [%o0 + 0x10], %f58 + ldd [%o0 + 0x08], %f60 + retl + ldd [%o0 + 0x00], %f62 +ENDPROC(aes_sparc64_load_decrypt_keys_256) + + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len */ + ldx [%o0 + 0x00], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + ENCRYPT_128(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_encrypt_128) + + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len */ + ldx [%o0 + 0x00], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + ENCRYPT_192(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_encrypt_192) + + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len */ + ldx [%o0 + 0x00], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + ENCRYPT_256_2(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f0, [%o2 + 0x10] + std %f2, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + ENCRYPT_256(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_encrypt_256) + + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_128) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ + ldx [%o0 - 0x10], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_128(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_decrypt_128) + + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_192) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ + ldx [%o0 - 0x10], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_192(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_decrypt_192) + + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_256) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ + ldx [%o0 - 0x10], %g1 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 + sub %o0, 0xf0, %o0 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + DECRYPT_256_2(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f0, [%o2 + 0x10] + std %f2, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_256(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: retl + nop +ENDPROC(aes_sparc64_ecb_decrypt_256) + + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + ENCRYPT_128(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_encrypt_128) + + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + ENCRYPT_192(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_encrypt_192) + + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + ENCRYPT_256(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_encrypt_256) + + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_128) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_128(8, 4, 6, 0, 2) + MOVXTOD_O0_F0 + MOVXTOD_O5_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o5 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_decrypt_128) + + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_192) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_192(8, 4, 6, 0, 2) + MOVXTOD_O0_F0 + MOVXTOD_O5_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o5 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_decrypt_192) + + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_256) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + DECRYPT_256(8, 4, 6, 0, 2) + MOVXTOD_O0_F0 + MOVXTOD_O5_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o5 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_cbc_decrypt_256) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 + ldx [%o0 + 0x00], %g1 + be 10f + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_128(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: stx %g3, [%o4 + 0x00] + retl + stx %g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_128) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 + ldx [%o0 + 0x00], %g1 + be 10f + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_192(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: stx %g3, [%o4 + 0x00] + retl + stx %g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_192) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 + ldx [%o0 + 0x00], %g1 + be 10f + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_256_2(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop + ldd [%o0 + 0xd0], %f56 + ldd [%o0 + 0xd8], %f58 + ldd [%o0 + 0xe0], %f60 + ldd [%o0 + 0xe8], %f62 +10: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_256(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] +11: stx %g3, [%o4 + 0x00] + retl + stx %g7, [%o4 + 0x08] +ENDPROC(aes_sparc64_ctr_crypt_256) diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c new file mode 100644 index 000000000000..8f1c9980f637 --- /dev/null +++ b/arch/sparc/crypto/aes_glue.c @@ -0,0 +1,477 @@ +/* Glue code for AES encryption optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/aesni-intel_glue.c + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Adrian Hoban <adrian.hoban@intel.com> + * Gabriele Paoloni <gabriele.paoloni@intel.com> + * Tadeusz Struk (tadeusz.struk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Copyright (c) 2010, Intel Corporation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +struct aes_ops { + void (*encrypt)(const u64 *key, const u32 *input, u32 *output); + void (*decrypt)(const u64 *key, const u32 *input, u32 *output); + void (*load_encrypt_keys)(const u64 *key); + void (*load_decrypt_keys)(const u64 *key); + void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len); + void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len); + void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); + void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); + void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); +}; + +struct crypto_sparc64_aes_ctx { + struct aes_ops *ops; + u64 key[AES_MAX_KEYLENGTH / sizeof(u64)]; + u32 key_length; + u32 expanded_key_length; +}; + +extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input, + u32 *output); + +extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input, + u32 *output); + +extern void aes_sparc64_load_encrypt_keys_128(const u64 *key); +extern void aes_sparc64_load_encrypt_keys_192(const u64 *key); +extern void aes_sparc64_load_encrypt_keys_256(const u64 *key); + +extern void aes_sparc64_load_decrypt_keys_128(const u64 *key); +extern void aes_sparc64_load_decrypt_keys_192(const u64 *key); +extern void aes_sparc64_load_decrypt_keys_256(const u64 *key); + +extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len); + +extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len); + +extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); +extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); +extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +struct aes_ops aes128_ops = { + .encrypt = aes_sparc64_encrypt_128, + .decrypt = aes_sparc64_decrypt_128, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_128, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_128, + .ecb_encrypt = aes_sparc64_ecb_encrypt_128, + .ecb_decrypt = aes_sparc64_ecb_decrypt_128, + .cbc_encrypt = aes_sparc64_cbc_encrypt_128, + .cbc_decrypt = aes_sparc64_cbc_decrypt_128, + .ctr_crypt = aes_sparc64_ctr_crypt_128, +}; + +struct aes_ops aes192_ops = { + .encrypt = aes_sparc64_encrypt_192, + .decrypt = aes_sparc64_decrypt_192, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_192, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_192, + .ecb_encrypt = aes_sparc64_ecb_encrypt_192, + .ecb_decrypt = aes_sparc64_ecb_decrypt_192, + .cbc_encrypt = aes_sparc64_cbc_encrypt_192, + .cbc_decrypt = aes_sparc64_cbc_decrypt_192, + .ctr_crypt = aes_sparc64_ctr_crypt_192, +}; + +struct aes_ops aes256_ops = { + .encrypt = aes_sparc64_encrypt_256, + .decrypt = aes_sparc64_decrypt_256, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_256, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_256, + .ecb_encrypt = aes_sparc64_ecb_encrypt_256, + .ecb_decrypt = aes_sparc64_ecb_decrypt_256, + .cbc_encrypt = aes_sparc64_cbc_encrypt_256, + .cbc_decrypt = aes_sparc64_cbc_decrypt_256, + .ctr_crypt = aes_sparc64_ctr_crypt_256, +}; + +extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key, + unsigned int key_len); + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + switch (key_len) { + case AES_KEYSIZE_128: + ctx->expanded_key_length = 0xb0; + ctx->ops = &aes128_ops; + break; + + case AES_KEYSIZE_192: + ctx->expanded_key_length = 0xd0; + ctx->ops = &aes192_ops; + break; + + case AES_KEYSIZE_256: + ctx->expanded_key_length = 0xf0; + ctx->ops = &aes256_ops; + break; + + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len); + ctx->key_length = key_len; + + return 0; +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); +} + +#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->ecb_encrypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u64 *key_end; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_decrypt_keys(&ctx->key[0]); + key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->ecb_decrypt(key_end, + (const u64 *) walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, block_len); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->cbc_encrypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u64 *key_end; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_decrypt_keys(&ctx->key[0]); + key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->cbc_decrypt(key_end, + (const u64 *) walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + + return err; +} + +static int ctr_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->ctr_crypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "aes", + .cra_driver_name = "aes-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +} }; + +static bool __init sparc64_has_aes_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_AES)) + return false; + + return true; +} + +static int __init aes_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_aes_opcode()) { + pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 aes opcodes not available.\n"); + return -ENODEV; +} + +static void __exit aes_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(aes_sparc64_mod_init); +module_exit(aes_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); + +MODULE_ALIAS("aes"); diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S new file mode 100644 index 000000000000..cc39553a4e43 --- /dev/null +++ b/arch/sparc/crypto/camellia_asm.S @@ -0,0 +1,563 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ + CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \ + CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \ + CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 10, I0, I1, I0) + +#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \ + CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ + CAMELLIA_FL(KEY_BASE + 12, I0, I0) \ + CAMELLIA_FLI(KEY_BASE + 14, I1, I1) + + .data + + .align 8 +SIGMA: .xword 0xA09E667F3BCC908B + .xword 0xB67AE8584CAA73B2 + .xword 0xC6EF372FE94F82BE + .xword 0x54FF53A5F1D36F1C + .xword 0x10E527FADE682D1D + .xword 0xB05688C2B3E6C1FD + + .text + + .align 32 +ENTRY(camellia_sparc64_key_expand) + /* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */ + VISEntry + ld [%o0 + 0x00], %f0 ! i0, k[0] + ld [%o0 + 0x04], %f1 ! i1, k[1] + ld [%o0 + 0x08], %f2 ! i2, k[2] + ld [%o0 + 0x0c], %f3 ! i3, k[3] + std %f0, [%o1 + 0x00] ! k[0, 1] + fsrc2 %f0, %f28 + std %f2, [%o1 + 0x08] ! k[2, 3] + cmp %o2, 16 + be 10f + fsrc2 %f2, %f30 + + ld [%o0 + 0x10], %f0 + ld [%o0 + 0x14], %f1 + std %f0, [%o1 + 0x20] ! k[8, 9] + cmp %o2, 24 + fone %f10 + be,a 1f + fxor %f10, %f0, %f2 + ld [%o0 + 0x18], %f2 + ld [%o0 + 0x1c], %f3 +1: + std %f2, [%o1 + 0x28] ! k[10, 11] + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + +10: + sethi %hi(SIGMA), %g3 + or %g3, %lo(SIGMA), %g3 + ldd [%g3 + 0x00], %f16 + ldd [%g3 + 0x08], %f18 + ldd [%g3 + 0x10], %f20 + ldd [%g3 + 0x18], %f22 + ldd [%g3 + 0x20], %f24 + ldd [%g3 + 0x28], %f26 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + CAMELLIA_F(20, 2, 0, 2) + CAMELLIA_F(22, 0, 2, 0) + +#define ROTL128(S01, S23, TMP1, TMP2, N) \ + srlx S01, (64 - N), TMP1; \ + sllx S01, N, S01; \ + srlx S23, (64 - N), TMP2; \ + sllx S23, N, S23; \ + or S01, TMP2, S01; \ + or S23, TMP1, S23 + + cmp %o2, 16 + bne 1f + nop + /* 128-bit key */ + std %f0, [%o1 + 0x10] ! k[ 4, 5] + std %f2, [%o1 + 0x18] ! k[ 6, 7] + MOVDTOX_F0_O4 + MOVDTOX_F2_O5 + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x30] ! k[12, 13] + stx %o5, [%o1 + 0x38] ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x40] ! k[16, 17] + stx %o5, [%o1 + 0x48] ! k[18, 19] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x60] ! k[24, 25] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x70] ! k[28, 29] + stx %o5, [%o1 + 0x78] ! k[30, 31] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xa0] ! k[40, 41] + stx %o5, [%o1 + 0xa8] ! k[42, 43] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xc0] ! k[48, 49] + stx %o5, [%o1 + 0xc8] ! k[50, 51] + + ldx [%o1 + 0x00], %o4 ! k[ 0, 1] + ldx [%o1 + 0x08], %o5 ! k[ 2, 3] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x20] ! k[ 8, 9] + stx %o5, [%o1 + 0x28] ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x50] ! k[20, 21] + stx %o5, [%o1 + 0x58] ! k[22, 23] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o5, [%o1 + 0x68] ! k[26, 27] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0x80] ! k[32, 33] + stx %o5, [%o1 + 0x88] ! k[34, 35] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0x90] ! k[36, 37] + stx %o5, [%o1 + 0x98] ! k[38, 39] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xb0] ! k[44, 45] + stx %o5, [%o1 + 0xb8] ! k[46, 47] + + ba,pt %xcc, 2f + mov (3 * 16 * 4), %o0 + +1: + /* 192-bit or 256-bit key */ + std %f0, [%o1 + 0x30] ! k[12, 13] + std %f2, [%o1 + 0x38] ! k[14, 15] + ldd [%o1 + 0x20], %f4 ! k[ 8, 9] + ldd [%o1 + 0x28], %f6 ! k[10, 11] + fxor %f0, %f4, %f0 + fxor %f2, %f6, %f2 + CAMELLIA_F(24, 2, 0, 2) + CAMELLIA_F(26, 0, 2, 0) + std %f0, [%o1 + 0x10] ! k[ 4, 5] + std %f2, [%o1 + 0x18] ! k[ 6, 7] + MOVDTOX_F0_O4 + MOVDTOX_F2_O5 + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x50] ! k[20, 21] + stx %o5, [%o1 + 0x58] ! k[22, 23] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0xa0] ! k[40, 41] + stx %o5, [%o1 + 0xa8] ! k[42, 43] + ROTL128(%o4, %o5, %g2, %g3, 51) + stx %o4, [%o1 + 0x100] ! k[64, 65] + stx %o5, [%o1 + 0x108] ! k[66, 67] + ldx [%o1 + 0x20], %o4 ! k[ 8, 9] + ldx [%o1 + 0x28], %o5 ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x20] ! k[ 8, 9] + stx %o5, [%o1 + 0x28] ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x40] ! k[16, 17] + stx %o5, [%o1 + 0x48] ! k[18, 19] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x90] ! k[36, 37] + stx %o5, [%o1 + 0x98] ! k[38, 39] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xd0] ! k[52, 53] + stx %o5, [%o1 + 0xd8] ! k[54, 55] + ldx [%o1 + 0x30], %o4 ! k[12, 13] + ldx [%o1 + 0x38], %o5 ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x30] ! k[12, 13] + stx %o5, [%o1 + 0x38] ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x70] ! k[28, 29] + stx %o5, [%o1 + 0x78] ! k[30, 31] + srlx %o4, 32, %g2 + srlx %o5, 32, %g3 + stw %o4, [%o1 + 0xc0] ! k[48] + stw %g3, [%o1 + 0xc4] ! k[49] + stw %o5, [%o1 + 0xc8] ! k[50] + stw %g2, [%o1 + 0xcc] ! k[51] + ROTL128(%o4, %o5, %g2, %g3, 49) + stx %o4, [%o1 + 0xe0] ! k[56, 57] + stx %o5, [%o1 + 0xe8] ! k[58, 59] + ldx [%o1 + 0x00], %o4 ! k[ 0, 1] + ldx [%o1 + 0x08], %o5 ! k[ 2, 3] + ROTL128(%o4, %o5, %g2, %g3, 45) + stx %o4, [%o1 + 0x60] ! k[24, 25] + stx %o5, [%o1 + 0x68] ! k[26, 27] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x80] ! k[32, 33] + stx %o5, [%o1 + 0x88] ! k[34, 35] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xb0] ! k[44, 45] + stx %o5, [%o1 + 0xb8] ! k[46, 47] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xf0] ! k[60, 61] + stx %o5, [%o1 + 0xf8] ! k[62, 63] + mov (4 * 16 * 4), %o0 +2: + add %o1, %o0, %o1 + ldd [%o1 + 0x00], %f0 + ldd [%o1 + 0x08], %f2 + std %f0, [%o3 + 0x00] + std %f2, [%o3 + 0x08] + add %o3, 0x10, %o3 +1: + sub %o1, (16 * 4), %o1 + ldd [%o1 + 0x38], %f0 + ldd [%o1 + 0x30], %f2 + ldd [%o1 + 0x28], %f4 + ldd [%o1 + 0x20], %f6 + ldd [%o1 + 0x18], %f8 + ldd [%o1 + 0x10], %f10 + std %f0, [%o3 + 0x00] + std %f2, [%o3 + 0x08] + std %f4, [%o3 + 0x10] + std %f6, [%o3 + 0x18] + std %f8, [%o3 + 0x20] + std %f10, [%o3 + 0x28] + + ldd [%o1 + 0x08], %f0 + ldd [%o1 + 0x00], %f2 + std %f0, [%o3 + 0x30] + std %f2, [%o3 + 0x38] + subcc %o0, (16 * 4), %o0 + bne,pt %icc, 1b + add %o3, (16 * 4), %o3 + + std %f2, [%o3 - 0x10] + std %f0, [%o3 - 0x08] + + retl + VISExit +ENDPROC(camellia_sparc64_key_expand) + + .align 32 +ENTRY(camellia_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output, %o3=key_len */ + VISEntry + + ld [%o1 + 0x00], %f0 + ld [%o1 + 0x04], %f1 + ld [%o1 + 0x08], %f2 + ld [%o1 + 0x0c], %f3 + + ldd [%o0 + 0x00], %f4 + ldd [%o0 + 0x08], %f6 + + cmp %o3, 16 + fxor %f4, %f0, %f0 + be 1f + fxor %f6, %f2, %f2 + + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + add %o0, 0x40, %o0 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + +1: + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + ldd [%o0 + 0xc8], %f54 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + + st %f2, [%o2 + 0x00] + st %f3, [%o2 + 0x04] + st %f0, [%o2 + 0x08] + st %f1, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(camellia_sparc64_crypt) + + .align 32 +ENTRY(camellia_sparc64_load_keys) + /* %o0=key, %o1=key_len */ + VISEntry + ldd [%o0 + 0x00], %f4 + ldd [%o0 + 0x08], %f6 + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + retl + ldd [%o0 + 0xc8], %f54 +ENDPROC(camellia_sparc64_load_keys) + + .align 32 +ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key */ +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + retl + nop +ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key */ +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f2 + fxor %f22, %f0, %f0 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + retl + nop +ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f60, %f0, %f0 + fxor %f62, %f2, %f2 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f60 + fxor %f54, %f0, %f62 + std %f60, [%o1 + 0x00] + std %f62, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f60, %f0, %f0 + fxor %f62, %f2, %f2 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f60 + fxor %f22, %f0, %f62 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + std %f60, [%o1 + 0x00] + std %f62, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f56 + ldd [%o0 + 0x08], %f58 + add %o0, 0x10, %o0 + fxor %f4, %f56, %f0 + fxor %f6, %f58, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + fxor %f60, %f2, %f2 + fxor %f62, %f0, %f0 + fsrc2 %f56, %f60 + fsrc2 %f58, %f62 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f56 + ldd [%o0 + 0x08], %f58 + add %o0, 0x10, %o0 + fxor %f4, %f56, %f0 + fxor %f6, %f58, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f2 + fxor %f22, %f0, %f0 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + fxor %f60, %f2, %f2 + fxor %f62, %f0, %f0 + fsrc2 %f56, %f60 + fsrc2 %f58, %f62 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds) diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c new file mode 100644 index 000000000000..42905c084299 --- /dev/null +++ b/arch/sparc/crypto/camellia_glue.c @@ -0,0 +1,322 @@ +/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +#define CAMELLIA_MIN_KEY_SIZE 16 +#define CAMELLIA_MAX_KEY_SIZE 32 +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 + +struct camellia_sparc64_ctx { + u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + int key_len; +}; + +extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key, + unsigned int key_len, u64 *decrypt_key); + +static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key, + unsigned int key_len) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u32 *in_key = (const u32 *) _in_key; + u32 *flags = &tfm->crt_flags; + + if (key_len != 16 && key_len != 24 && key_len != 32) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + ctx->key_len = key_len; + + camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0], + key_len, &ctx->decrypt_key[0]); + return 0; +} + +extern void camellia_sparc64_crypt(const u64 *key, const u32 *input, + u32 *output, unsigned int key_len); + +static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + + camellia_sparc64_crypt(&ctx->encrypt_key[0], + (const u32 *) src, + (u32 *) dst, ctx->key_len); +} + +static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + + camellia_sparc64_crypt(&ctx->decrypt_key[0], + (const u32 *) src, + (u32 *) dst, ctx->key_len); +} + +extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len); + +typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len, + const u64 *key); + +extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds; +extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds; + +#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1)) + +static int __ecb_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + ecb_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_ecb_crypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_ecb_crypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + key = &ctx->encrypt_key[0]; + else + key = &ctx->decrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, false); +} + +typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len, + const u64 *key, u64 *iv); + +extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds; + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + cbc_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_cbc_encrypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_cbc_encrypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + key = &ctx->encrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key, + (u64 *) walk.iv); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + cbc_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_cbc_decrypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_cbc_decrypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + key = &ctx->decrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key, + (u64 *) walk.iv); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "camellia", + .cra_driver_name = "camellia-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, + .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, + .cia_setkey = camellia_set_key, + .cia_encrypt = camellia_encrypt, + .cia_decrypt = camellia_decrypt + } + } +}, { + .cra_name = "ecb(camellia)", + .cra_driver_name = "ecb-camellia-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(camellia)", + .cra_driver_name = "cbc-camellia-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +} +}; + +static bool __init sparc64_has_camellia_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_CAMELLIA)) + return false; + + return true; +} + +static int __init camellia_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_camellia_opcode()) { + pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 camellia opcodes not available.\n"); + return -ENODEV; +} + +static void __exit camellia_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(camellia_sparc64_mod_init); +module_exit(camellia_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); + +MODULE_ALIAS("aes"); diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S new file mode 100644 index 000000000000..2b1976e765b5 --- /dev/null +++ b/arch/sparc/crypto/crc32c_asm.S @@ -0,0 +1,20 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> +#include <asm/asi.h> + +#include "opcodes.h" + +ENTRY(crc32c_sparc64) + /* %o0=crc32p, %o1=data_ptr, %o2=len */ + VISEntryHalf + lda [%o0] ASI_PL, %f1 +1: ldd [%o1], %f2 + CRC32C(0,2,0) + subcc %o2, 8, %o2 + bne,pt %icc, 1b + add %o1, 0x8, %o1 + sta %f1, [%o0] ASI_PL + VISExitHalf +2: retl + nop +ENDPROC(crc32c_sparc64) diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c new file mode 100644 index 000000000000..0bd89cea8d8e --- /dev/null +++ b/arch/sparc/crypto/crc32c_glue.c @@ -0,0 +1,179 @@ +/* Glue code for CRC32C optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/crc32c-intel.c + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang <austin_zhang@linux.intel.com> + * Kent Liu <kent.liu@intel.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/crc32.h> + +#include <crypto/internal/hash.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *(__le32 *)mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32c_sparc64_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); + +static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len) +{ + unsigned int asm_len; + + asm_len = len & ~7U; + if (asm_len) { + crc32c_sparc64(crcp, data, asm_len); + data += asm_len / 8; + len -= asm_len; + } + if (len) + *crcp = __crc32c_le(*crcp, (const unsigned char *) data, len); +} + +static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + crc32c_compute(crcp, (const u64 *) data, len); + + return 0; +} + +static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + u32 tmp = *crcp; + + crc32c_compute(&tmp, (const u64 *) data, len); + + *(__le32 *) out = ~cpu_to_le32(tmp); + return 0; +} + +static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *) out = ~cpu_to_le32p(crcp); + return 0; +} + +static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = ~0; + + return 0; +} + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +static struct shash_alg alg = { + .setkey = crc32c_sparc64_setkey, + .init = crc32c_sparc64_init, + .update = crc32c_sparc64_update, + .final = crc32c_sparc64_final, + .finup = crc32c_sparc64_finup, + .digest = crc32c_sparc64_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_init = crc32c_sparc64_cra_init, + } +}; + +static bool __init sparc64_has_crc32c_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_CRC32C)) + return false; + + return true; +} + +static int __init crc32c_sparc64_mod_init(void) +{ + if (sparc64_has_crc32c_opcode()) { + pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 crc32c opcode not available.\n"); + return -ENODEV; +} + +static void __exit crc32c_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32c_sparc64_mod_init); +module_exit(crc32c_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); + +MODULE_ALIAS("crc32c"); diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c new file mode 100644 index 000000000000..5f5724a0ae22 --- /dev/null +++ b/arch/sparc/crypto/crop_devid.c @@ -0,0 +1,14 @@ +#include <linux/module.h> +#include <linux/of_device.h> + +/* This is a dummy device table linked into all of the crypto + * opcode drivers. It serves to trigger the module autoloading + * mechanisms in userspace which scan the OF device tree and + * load any modules which have device table entries that + * match OF device nodes. + */ +static const struct of_device_id crypto_opcode_match[] = { + { .name = "cpu", .compatible = "sun4v", }, + {}, +}; +MODULE_DEVICE_TABLE(of, crypto_opcode_match); diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S new file mode 100644 index 000000000000..30b6e90b28b2 --- /dev/null +++ b/arch/sparc/crypto/des_asm.S @@ -0,0 +1,418 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + + .align 32 +ENTRY(des_sparc64_key_expand) + /* %o0=input_key, %o1=output_key */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + DES_KEXPAND(0, 0, 0) + DES_KEXPAND(0, 1, 2) + DES_KEXPAND(2, 3, 6) + DES_KEXPAND(2, 2, 4) + DES_KEXPAND(6, 3, 10) + DES_KEXPAND(6, 2, 8) + DES_KEXPAND(10, 3, 14) + DES_KEXPAND(10, 2, 12) + DES_KEXPAND(14, 1, 16) + DES_KEXPAND(16, 3, 20) + DES_KEXPAND(16, 2, 18) + DES_KEXPAND(20, 3, 24) + DES_KEXPAND(20, 2, 22) + DES_KEXPAND(24, 3, 28) + DES_KEXPAND(24, 2, 26) + DES_KEXPAND(28, 1, 30) + std %f0, [%o1 + 0x00] + std %f2, [%o1 + 0x08] + std %f4, [%o1 + 0x10] + std %f6, [%o1 + 0x18] + std %f8, [%o1 + 0x20] + std %f10, [%o1 + 0x28] + std %f12, [%o1 + 0x30] + std %f14, [%o1 + 0x38] + std %f16, [%o1 + 0x40] + std %f18, [%o1 + 0x48] + std %f20, [%o1 + 0x50] + std %f22, [%o1 + 0x58] + std %f24, [%o1 + 0x60] + std %f26, [%o1 + 0x68] + std %f28, [%o1 + 0x70] + std %f30, [%o1 + 0x78] + retl + VISExitHalf +ENDPROC(des_sparc64_key_expand) + + .align 32 +ENTRY(des_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ldd [%o1 + 0x00], %f32 + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o2 + 0x00] + retl + VISExit +ENDPROC(des_sparc64_crypt) + + .align 32 +ENTRY(des_sparc64_load_keys) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + retl + ldd [%o0 + 0x78], %f30 +ENDPROC(des_sparc64_load_keys) + + .align 32 +ENTRY(des_sparc64_ecb_crypt) + /* %o0=input, %o1=output, %o2=len */ +1: ldd [%o0 + 0x00], %f32 + add %o0, 0x08, %o0 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o1 + 0x00] + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + nop +ENDPROC(des_sparc64_ecb_crypt) + + .align 32 +ENTRY(des_sparc64_cbc_encrypt) + /* %o0=input, %o1=output, %o2=len, %o3=IV */ + ldd [%o3 + 0x00], %f32 +1: ldd [%o0 + 0x00], %f34 + fxor %f32, %f34, %f32 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o1 + 0x00] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + std %f32, [%o3 + 0x00] +ENDPROC(des_sparc64_cbc_encrypt) + + .align 32 +ENTRY(des_sparc64_cbc_decrypt) + /* %o0=input, %o1=output, %o2=len, %o3=IV */ + ldd [%o3 + 0x00], %f34 +1: ldd [%o0 + 0x00], %f36 + DES_IP(36, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + fxor %f32, %f34, %f32 + fsrc2 %f36, %f34 + std %f32, [%o1 + 0x00] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + std %f36, [%o3 + 0x00] +ENDPROC(des_sparc64_cbc_decrypt) + + .align 32 +ENTRY(des3_ede_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ldd [%o1 + 0x00], %f32 + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + ldd [%o0 + 0x80], %f0 + ldd [%o0 + 0x88], %f2 + DES_ROUND(4, 6, 32, 32) + ldd [%o0 + 0x90], %f4 + ldd [%o0 + 0x98], %f6 + DES_ROUND(8, 10, 32, 32) + ldd [%o0 + 0xa0], %f8 + ldd [%o0 + 0xa8], %f10 + DES_ROUND(12, 14, 32, 32) + ldd [%o0 + 0xb0], %f12 + ldd [%o0 + 0xb8], %f14 + DES_ROUND(16, 18, 32, 32) + ldd [%o0 + 0xc0], %f16 + ldd [%o0 + 0xc8], %f18 + DES_ROUND(20, 22, 32, 32) + ldd [%o0 + 0xd0], %f20 + ldd [%o0 + 0xd8], %f22 + DES_ROUND(24, 26, 32, 32) + ldd [%o0 + 0xe0], %f24 + ldd [%o0 + 0xe8], %f26 + DES_ROUND(28, 30, 32, 32) + ldd [%o0 + 0xf0], %f28 + ldd [%o0 + 0xf8], %f30 + DES_IIP(32, 32) + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + ldd [%o0 + 0x100], %f0 + ldd [%o0 + 0x108], %f2 + DES_ROUND(4, 6, 32, 32) + ldd [%o0 + 0x110], %f4 + ldd [%o0 + 0x118], %f6 + DES_ROUND(8, 10, 32, 32) + ldd [%o0 + 0x120], %f8 + ldd [%o0 + 0x128], %f10 + DES_ROUND(12, 14, 32, 32) + ldd [%o0 + 0x130], %f12 + ldd [%o0 + 0x138], %f14 + DES_ROUND(16, 18, 32, 32) + ldd [%o0 + 0x140], %f16 + ldd [%o0 + 0x148], %f18 + DES_ROUND(20, 22, 32, 32) + ldd [%o0 + 0x150], %f20 + ldd [%o0 + 0x158], %f22 + DES_ROUND(24, 26, 32, 32) + ldd [%o0 + 0x160], %f24 + ldd [%o0 + 0x168], %f26 + DES_ROUND(28, 30, 32, 32) + ldd [%o0 + 0x170], %f28 + ldd [%o0 + 0x178], %f30 + DES_IIP(32, 32) + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + + std %f32, [%o2 + 0x00] + retl + VISExit +ENDPROC(des3_ede_sparc64_crypt) + + .align 32 +ENTRY(des3_ede_sparc64_load_keys) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + ldd [%o0 + 0x80], %f32 + ldd [%o0 + 0x88], %f34 + ldd [%o0 + 0x90], %f36 + ldd [%o0 + 0x98], %f38 + ldd [%o0 + 0xa0], %f40 + ldd [%o0 + 0xa8], %f42 + ldd [%o0 + 0xb0], %f44 + ldd [%o0 + 0xb8], %f46 + ldd [%o0 + 0xc0], %f48 + ldd [%o0 + 0xc8], %f50 + ldd [%o0 + 0xd0], %f52 + ldd [%o0 + 0xd8], %f54 + ldd [%o0 + 0xe0], %f56 + retl + ldd [%o0 + 0xe8], %f58 +ENDPROC(des3_ede_sparc64_load_keys) + +#define DES3_LOOP_BODY(X) \ + DES_IP(X, X) \ + DES_ROUND(0, 2, X, X) \ + DES_ROUND(4, 6, X, X) \ + DES_ROUND(8, 10, X, X) \ + DES_ROUND(12, 14, X, X) \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0xf0], %f16; \ + ldd [%o0 + 0xf8], %f18; \ + DES_ROUND(20, 22, X, X) \ + ldd [%o0 + 0x100], %f20; \ + ldd [%o0 + 0x108], %f22; \ + DES_ROUND(24, 26, X, X) \ + ldd [%o0 + 0x110], %f24; \ + ldd [%o0 + 0x118], %f26; \ + DES_ROUND(28, 30, X, X) \ + ldd [%o0 + 0x120], %f28; \ + ldd [%o0 + 0x128], %f30; \ + DES_IIP(X, X) \ + DES_IP(X, X) \ + DES_ROUND(32, 34, X, X) \ + ldd [%o0 + 0x130], %f0; \ + ldd [%o0 + 0x138], %f2; \ + DES_ROUND(36, 38, X, X) \ + ldd [%o0 + 0x140], %f4; \ + ldd [%o0 + 0x148], %f6; \ + DES_ROUND(40, 42, X, X) \ + ldd [%o0 + 0x150], %f8; \ + ldd [%o0 + 0x158], %f10; \ + DES_ROUND(44, 46, X, X) \ + ldd [%o0 + 0x160], %f12; \ + ldd [%o0 + 0x168], %f14; \ + DES_ROUND(48, 50, X, X) \ + DES_ROUND(52, 54, X, X) \ + DES_ROUND(56, 58, X, X) \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0x170], %f16; \ + ldd [%o0 + 0x178], %f18; \ + DES_IIP(X, X) \ + DES_IP(X, X) \ + DES_ROUND(20, 22, X, X) \ + ldd [%o0 + 0x50], %f20; \ + ldd [%o0 + 0x58], %f22; \ + DES_ROUND(24, 26, X, X) \ + ldd [%o0 + 0x60], %f24; \ + ldd [%o0 + 0x68], %f26; \ + DES_ROUND(28, 30, X, X) \ + ldd [%o0 + 0x70], %f28; \ + ldd [%o0 + 0x78], %f30; \ + DES_ROUND(0, 2, X, X) \ + ldd [%o0 + 0x00], %f0; \ + ldd [%o0 + 0x08], %f2; \ + DES_ROUND(4, 6, X, X) \ + ldd [%o0 + 0x10], %f4; \ + ldd [%o0 + 0x18], %f6; \ + DES_ROUND(8, 10, X, X) \ + ldd [%o0 + 0x20], %f8; \ + ldd [%o0 + 0x28], %f10; \ + DES_ROUND(12, 14, X, X) \ + ldd [%o0 + 0x30], %f12; \ + ldd [%o0 + 0x38], %f14; \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0x40], %f16; \ + ldd [%o0 + 0x48], %f18; \ + DES_IIP(X, X) + + .align 32 +ENTRY(des3_ede_sparc64_ecb_crypt) + /* %o0=key, %o1=input, %o2=output, %o3=len */ +1: ldd [%o1 + 0x00], %f60 + DES3_LOOP_BODY(60) + std %f60, [%o2 + 0x00] + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + nop +ENDPROC(des3_ede_sparc64_ecb_crypt) + + .align 32 +ENTRY(des3_ede_sparc64_cbc_encrypt) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f60 +1: ldd [%o1 + 0x00], %f62 + fxor %f60, %f62, %f60 + DES3_LOOP_BODY(60) + std %f60, [%o2 + 0x00] + add %o1, 0x08, %o1 + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + std %f60, [%o4 + 0x00] +ENDPROC(des3_ede_sparc64_cbc_encrypt) + + .align 32 +ENTRY(des3_ede_sparc64_cbc_decrypt) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f62 +1: ldx [%o1 + 0x00], %g1 + MOVXTOD_G1_F60 + DES3_LOOP_BODY(60) + fxor %f62, %f60, %f60 + MOVXTOD_G1_F62 + std %f60, [%o2 + 0x00] + add %o1, 0x08, %o1 + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + stx %g1, [%o4 + 0x00] +ENDPROC(des3_ede_sparc64_cbc_decrypt) diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c new file mode 100644 index 000000000000..c4940c2d3073 --- /dev/null +++ b/arch/sparc/crypto/des_glue.c @@ -0,0 +1,529 @@ +/* Glue code for DES encryption optimized for sparc64 crypto opcodes. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <crypto/algapi.h> +#include <crypto/des.h> + +#include <asm/fpumacro.h> +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +struct des_sparc64_ctx { + u64 encrypt_expkey[DES_EXPKEY_WORDS / 2]; + u64 decrypt_expkey[DES_EXPKEY_WORDS / 2]; +}; + +struct des3_ede_sparc64_ctx { + u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; + u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; +}; + +static void encrypt_to_decrypt(u64 *d, const u64 *e) +{ + const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1; + int i; + + for (i = 0; i < DES_EXPKEY_WORDS / 2; i++) + *d++ = *s--; +} + +extern void des_sparc64_key_expand(const u32 *input_key, u64 *key); + +static int des_set_key(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + u32 tmp[DES_EXPKEY_WORDS]; + int ret; + + /* Even though we have special instructions for key expansion, + * we call des_ekey() so that we don't have to write our own + * weak key detection code. + */ + ret = des_ekey(tmp, key); + if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]); + encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]); + + return 0; +} + +extern void des_sparc64_crypt(const u64 *key, const u64 *input, + u64 *output); + +static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->encrypt_expkey; + + des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->decrypt_expkey; + + des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +extern void des_sparc64_load_keys(const u64 *key); + +extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output, + unsigned int len); + +#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1)) + +static int __ecb_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + des_sparc64_load_keys(&ctx->encrypt_expkey[0]); + else + des_sparc64_load_keys(&ctx->decrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, false); +} + +extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output, + unsigned int len, u64 *iv); + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + des_sparc64_load_keys(&ctx->encrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output, + unsigned int len, u64 *iv); + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + des_sparc64_load_keys(&ctx->decrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); + const u32 *K = (const u32 *)key; + u32 *flags = &tfm->crt_flags; + u64 k1[DES_EXPKEY_WORDS / 2]; + u64 k2[DES_EXPKEY_WORDS / 2]; + u64 k3[DES_EXPKEY_WORDS / 2]; + + if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || + !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && + (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + des_sparc64_key_expand((const u32 *)key, k1); + key += DES_KEY_SIZE; + des_sparc64_key_expand((const u32 *)key, k2); + key += DES_KEY_SIZE; + des_sparc64_key_expand((const u32 *)key, k3); + + memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1)); + encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]); + memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], + &k3[0], sizeof(k3)); + + encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]); + memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2], + &k2[0], sizeof(k2)); + encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], + &k1[0]); + + return 0; +} + +extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input, + u64 *output); + +static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->encrypt_expkey; + + des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->decrypt_expkey; + + des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +extern void des3_ede_sparc64_load_keys(const u64 *key); + +extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len); + +static int __ecb3_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + K = &ctx->encrypt_expkey[0]; + else + K = &ctx->decrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_ecb_crypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb3_crypt(desc, dst, src, nbytes, true); +} + +static int ecb3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb3_crypt(desc, dst, src, nbytes, false); +} + +extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +static int cbc3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + K = &ctx->encrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_cbc_encrypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len, + (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +static int cbc3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + K = &ctx->decrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_cbc_decrypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len, + (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "des", + .cra_driver_name = "des-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES_KEY_SIZE, + .cia_max_keysize = DES_KEY_SIZE, + .cia_setkey = des_set_key, + .cia_encrypt = des_encrypt, + .cia_decrypt = des_decrypt + } + } +}, { + .cra_name = "ecb(des)", + .cra_driver_name = "ecb-des-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(des)", + .cra_driver_name = "cbc-des-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_EDE_KEY_SIZE, + .cia_max_keysize = DES3_EDE_KEY_SIZE, + .cia_setkey = des3_ede_set_key, + .cia_encrypt = des3_ede_encrypt, + .cia_decrypt = des3_ede_decrypt + } + } +}, { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3_ede-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_set_key, + .encrypt = ecb3_encrypt, + .decrypt = ecb3_decrypt, + }, + }, +}, { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3_ede-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_set_key, + .encrypt = cbc3_encrypt, + .decrypt = cbc3_decrypt, + }, + }, +} }; + +static bool __init sparc64_has_des_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_DES)) + return false; + + return true; +} + +static int __init des_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_des_opcode()) { + pr_info("Using sparc64 des opcodes optimized DES implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 des opcodes not available.\n"); + return -ENODEV; +} + +static void __exit des_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(des_sparc64_mod_init); +module_exit(des_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); + +MODULE_ALIAS("des"); diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S new file mode 100644 index 000000000000..3150404e602e --- /dev/null +++ b/arch/sparc/crypto/md5_asm.S @@ -0,0 +1,70 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(md5_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x08], %f2 + bne,pn %xcc, 10f + ld [%o0 + 0x0c], %f3 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + MD5 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + MD5 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(md5_sparc64_transform) diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c new file mode 100644 index 000000000000..603d723038ce --- /dev/null +++ b/arch/sparc/crypto/md5_glue.c @@ -0,0 +1,188 @@ +/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c + * and crypto/md5.c which are: + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/md5.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void md5_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int md5_sparc64_init(struct shash_desc *desc) +{ + struct md5_state *mctx = shash_desc_ctx(desc); + + mctx->hash[0] = cpu_to_le32(0x67452301); + mctx->hash[1] = cpu_to_le32(0xefcdab89); + mctx->hash[2] = cpu_to_le32(0x98badcfe); + mctx->hash[3] = cpu_to_le32(0x10325476); + mctx->byte_count = 0; + + return 0; +} + +static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->byte_count += len; + if (partial) { + done = MD5_HMAC_BLOCK_SIZE - partial; + memcpy((u8 *)sctx->block + partial, data, done); + md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1); + } + if (len - done >= MD5_HMAC_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE; + + md5_sparc64_transform(sctx->hash, data + done, rounds); + done += rounds * MD5_HMAC_BLOCK_SIZE; + } + + memcpy(sctx->block, data + done, len - done); +} + +static int md5_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < MD5_HMAC_BLOCK_SIZE) { + sctx->byte_count += len; + memcpy((u8 *)sctx->block + partial, data, len); + } else + __md5_sparc64_update(sctx, data, len, partial); + + return 0; +} + +/* Add padding and return the message digest. */ +static int md5_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + u32 *dst = (u32 *)out; + __le64 bits; + static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_le64(sctx->byte_count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __md5_sparc64_update() */ + if (padlen <= 56) { + sctx->byte_count += padlen; + memcpy((u8 *)sctx->block + index, padding, padlen); + } else { + __md5_sparc64_update(sctx, padding, padlen, index); + } + __md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < MD5_HASH_WORDS; i++) + dst[i] = sctx->hash[i]; + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int md5_sparc64_export(struct shash_desc *desc, void *out) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int md5_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = MD5_DIGEST_SIZE, + .init = md5_sparc64_init, + .update = md5_sparc64_update, + .final = md5_sparc64_final, + .export = md5_sparc64_export, + .import = md5_sparc64_import, + .descsize = sizeof(struct md5_state), + .statesize = sizeof(struct md5_state), + .base = { + .cra_name = "md5", + .cra_driver_name= "md5-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_md5_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_MD5)) + return false; + + return true; +} + +static int __init md5_sparc64_mod_init(void) +{ + if (sparc64_has_md5_opcode()) { + pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 md5 opcode not available.\n"); + return -ENODEV; +} + +static void __exit md5_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(md5_sparc64_mod_init); +module_exit(md5_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); + +MODULE_ALIAS("md5"); diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h new file mode 100644 index 000000000000..19cbaea6976f --- /dev/null +++ b/arch/sparc/crypto/opcodes.h @@ -0,0 +1,99 @@ +#ifndef _OPCODES_H +#define _OPCODES_H + +#define SPARC_CR_OPCODE_PRIORITY 300 + +#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) + +#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) + +#define RS1(x) (FPD_ENCODE(x) << 14) +#define RS2(x) (FPD_ENCODE(x) << 0) +#define RS3(x) (FPD_ENCODE(x) << 9) +#define RD(x) (FPD_ENCODE(x) << 25) +#define IMM5_0(x) ((x) << 0) +#define IMM5_9(x) ((x) << 9) + +#define CRC32C(a,b,c) \ + .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c)); + +#define MD5 \ + .word 0x81b02800; +#define SHA1 \ + .word 0x81b02820; +#define SHA256 \ + .word 0x81b02840; +#define SHA512 \ + .word 0x81b02860; + +#define AES_EROUND01(a,b,c,d) \ + .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23(a,b,c,d) \ + .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01(a,b,c,d) \ + .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23(a,b,c,d) \ + .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND01_L(a,b,c,d) \ + .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23_L(a,b,c,d) \ + .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01_L(a,b,c,d) \ + .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23_L(a,b,c,d) \ + .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_KEXPAND1(a,b,c,d) \ + .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d)); +#define AES_KEXPAND0(a,b,c) \ + .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c)); +#define AES_KEXPAND2(a,b,c) \ + .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); + +#define DES_IP(a,b) \ + .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); +#define DES_IIP(a,b) \ + .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); +#define DES_KEXPAND(a,b,c) \ + .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c)); +#define DES_ROUND(a,b,c,d) \ + .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); + +#define CAMELLIA_F(a,b,c,d) \ + .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define CAMELLIA_FL(a,b,c) \ + .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); +#define CAMELLIA_FLI(a,b,c) \ + .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); + +#define MOVDTOX_F0_O4 \ + .word 0x99b02200 +#define MOVDTOX_F2_O5 \ + .word 0x9bb02202 +#define MOVXTOD_G1_F60 \ + .word 0xbbb02301 +#define MOVXTOD_G1_F62 \ + .word 0xbfb02301 +#define MOVXTOD_G3_F4 \ + .word 0x89b02303; +#define MOVXTOD_G7_F6 \ + .word 0x8db02307; +#define MOVXTOD_G3_F0 \ + .word 0x81b02303; +#define MOVXTOD_G7_F2 \ + .word 0x85b02307; +#define MOVXTOD_O0_F0 \ + .word 0x81b02308; +#define MOVXTOD_O5_F0 \ + .word 0x81b0230d; +#define MOVXTOD_O5_F2 \ + .word 0x85b0230d; +#define MOVXTOD_O5_F4 \ + .word 0x89b0230d; +#define MOVXTOD_O5_F6 \ + .word 0x8db0230d; +#define MOVXTOD_G3_F60 \ + .word 0xbbb02303; +#define MOVXTOD_G7_F62 \ + .word 0xbfb02307; + +#endif /* _OPCODES_H */ diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S new file mode 100644 index 000000000000..219d10c5ae0e --- /dev/null +++ b/arch/sparc/crypto/sha1_asm.S @@ -0,0 +1,72 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha1_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x0c], %f3 + bne,pn %xcc, 10f + ld [%o0 + 0x10], %f4 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + SHA1 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + st %f4, [%o0 + 0x10] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + SHA1 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha1_sparc64_transform) diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c new file mode 100644 index 000000000000..2bbb20bee9f1 --- /dev/null +++ b/arch/sparc/crypto/sha1_glue.c @@ -0,0 +1,183 @@ +/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_sparc64_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + +static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->buffer + partial, data, done); + sha1_sparc64_transform(sctx->state, sctx->buffer, 1); + } + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + + sha1_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->buffer, data + done, len - done); +} + +static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buffer + partial, data, len); + } else + __sha1_sparc64_update(sctx, data, len, partial); + + return 0; +} + +/* Add padding and return the message digest. */ +static int sha1_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __sha1_sparc64_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buffer + index, padding, padlen); + } else { + __sha1_sparc64_update(sctx, padding, padlen, index); + } + __sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha1_sparc64_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha1_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_sparc64_init, + .update = sha1_sparc64_update, + .final = sha1_sparc64_final, + .export = sha1_sparc64_export, + .import = sha1_sparc64_import, + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha1_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA1)) + return false; + + return true; +} + +static int __init sha1_sparc64_mod_init(void) +{ + if (sparc64_has_sha1_opcode()) { + pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 sha1 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha1_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(sha1_sparc64_mod_init); +module_exit(sha1_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); + +MODULE_ALIAS("sha1"); diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S new file mode 100644 index 000000000000..b5f3d5826eb4 --- /dev/null +++ b/arch/sparc/crypto/sha256_asm.S @@ -0,0 +1,78 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha256_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + ld [%o0 + 0x0c], %f3 + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x18], %f6 + bne,pn %xcc, 10f + ld [%o0 + 0x1c], %f7 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + SHA256 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + st %f4, [%o0 + 0x10] + st %f5, [%o0 + 0x14] + st %f6, [%o0 + 0x18] + st %f7, [%o0 + 0x1c] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + SHA256 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha256_sparc64_transform) diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c new file mode 100644 index 000000000000..591e656bd891 --- /dev/null +++ b/arch/sparc/crypto/sha256_glue.c @@ -0,0 +1,241 @@ +/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon crypto/sha256_generic.c + * + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int sha224_sparc64_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +static int sha256_sparc64_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_sparc64_transform(sctx->state, sctx->buf, 1); + } + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); +} + +static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + } else + __sha256_sparc64_update(sctx, data, len, partial); + + return 0; +} + +static int sha256_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __sha256_sparc64_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_sparc64_update(sctx, padding, padlen, index); + } + __sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_sparc64_final(desc, D); + + memcpy(hash, D, SHA224_DIGEST_SIZE); + memset(D, 0, SHA256_DIGEST_SIZE); + + return 0; +} + +static int sha256_sparc64_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + return 0; +} + +static int sha256_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + return 0; +} + +static struct shash_alg sha256 = { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_sparc64_init, + .update = sha256_sparc64_update, + .final = sha256_sparc64_final, + .export = sha256_sparc64_export, + .import = sha256_sparc64_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name= "sha256-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha224 = { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_sparc64_init, + .update = sha256_sparc64_update, + .final = sha224_sparc64_final, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name= "sha224-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha256_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA256)) + return false; + + return true; +} + +static int __init sha256_sparc64_mod_init(void) +{ + if (sparc64_has_sha256_opcode()) { + int ret = crypto_register_shash(&sha224); + if (ret < 0) + return ret; + + ret = crypto_register_shash(&sha256); + if (ret < 0) { + crypto_unregister_shash(&sha224); + return ret; + } + + pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n"); + return 0; + } + pr_info("sparc64 sha256 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha256_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&sha224); + crypto_unregister_shash(&sha256); +} + +module_init(sha256_sparc64_mod_init); +module_exit(sha256_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); + +MODULE_ALIAS("sha224"); +MODULE_ALIAS("sha256"); diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S new file mode 100644 index 000000000000..54bfba713c0e --- /dev/null +++ b/arch/sparc/crypto/sha512_asm.S @@ -0,0 +1,102 @@ +#include <linux/linkage.h> +#include <asm/visasm.h> + +#include "opcodes.h" + +ENTRY(sha512_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + andcc %o1, 0x7, %g0 + ldd [%o0 + 0x30], %f12 + bne,pn %xcc, 10f + ldd [%o0 + 0x38], %f14 + +1: + ldd [%o1 + 0x00], %f16 + ldd [%o1 + 0x08], %f18 + ldd [%o1 + 0x10], %f20 + ldd [%o1 + 0x18], %f22 + ldd [%o1 + 0x20], %f24 + ldd [%o1 + 0x28], %f26 + ldd [%o1 + 0x30], %f28 + ldd [%o1 + 0x38], %f30 + ldd [%o1 + 0x40], %f32 + ldd [%o1 + 0x48], %f34 + ldd [%o1 + 0x50], %f36 + ldd [%o1 + 0x58], %f38 + ldd [%o1 + 0x60], %f40 + ldd [%o1 + 0x68], %f42 + ldd [%o1 + 0x70], %f44 + ldd [%o1 + 0x78], %f46 + + SHA512 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x80, %o1 + +5: + std %f0, [%o0 + 0x00] + std %f2, [%o0 + 0x08] + std %f4, [%o0 + 0x10] + std %f6, [%o0 + 0x18] + std %f8, [%o0 + 0x20] + std %f10, [%o0 + 0x28] + std %f12, [%o0 + 0x30] + std %f14, [%o0 + 0x38] + retl + VISExit +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f18 +1: + ldd [%o1 + 0x08], %f20 + ldd [%o1 + 0x10], %f22 + ldd [%o1 + 0x18], %f24 + ldd [%o1 + 0x20], %f26 + ldd [%o1 + 0x28], %f28 + ldd [%o1 + 0x30], %f30 + ldd [%o1 + 0x38], %f32 + ldd [%o1 + 0x40], %f34 + ldd [%o1 + 0x48], %f36 + ldd [%o1 + 0x50], %f38 + ldd [%o1 + 0x58], %f40 + ldd [%o1 + 0x60], %f42 + ldd [%o1 + 0x68], %f44 + ldd [%o1 + 0x70], %f46 + ldd [%o1 + 0x78], %f48 + ldd [%o1 + 0x80], %f50 + + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + faligndata %f26, %f28, %f24 + faligndata %f28, %f30, %f26 + faligndata %f30, %f32, %f28 + faligndata %f32, %f34, %f30 + faligndata %f34, %f36, %f32 + faligndata %f36, %f38, %f34 + faligndata %f38, %f40, %f36 + faligndata %f40, %f42, %f38 + faligndata %f42, %f44, %f40 + faligndata %f44, %f46, %f42 + faligndata %f46, %f48, %f44 + faligndata %f48, %f50, %f46 + + SHA512 + + subcc %o2, 1, %o2 + fsrc2 %f50, %f18 + bne,pt %xcc, 1b + add %o1, 0x80, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha512_sparc64_transform) diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c new file mode 100644 index 000000000000..486f0a2b7001 --- /dev/null +++ b/arch/sparc/crypto/sha512_glue.c @@ -0,0 +1,226 @@ +/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon crypto/sha512_generic.c + * + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> + +#include <asm/pstate.h> +#include <asm/elf.h> + +#include "opcodes.h" + +asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data, + unsigned int rounds); + +static int sha512_sparc64_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA512_H0; + sctx->state[1] = SHA512_H1; + sctx->state[2] = SHA512_H2; + sctx->state[3] = SHA512_H3; + sctx->state[4] = SHA512_H4; + sctx->state[5] = SHA512_H5; + sctx->state[6] = SHA512_H6; + sctx->state[7] = SHA512_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha384_sparc64_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA384_H0; + sctx->state[1] = SHA384_H1; + sctx->state[2] = SHA384_H2; + sctx->state[3] = SHA384_H3; + sctx->state[4] = SHA384_H4; + sctx->state[5] = SHA384_H5; + sctx->state[6] = SHA384_H6; + sctx->state[7] = SHA384_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + if ((sctx->count[0] += len) < len) + sctx->count[1]++; + if (partial) { + done = SHA512_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha512_sparc64_transform(sctx->state, sctx->buf, 1); + } + if (len - done >= SHA512_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; + + sha512_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA512_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); +} + +static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA512_BLOCK_SIZE) { + if ((sctx->count[0] += len) < len) + sctx->count[1]++; + memcpy(sctx->buf + partial, data, len); + } else + __sha512_sparc64_update(sctx, data, len, partial); + + return 0; +} + +static int sha512_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be64 *dst = (__be64 *)out; + __be64 bits[2]; + static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; + + /* Save number of bits */ + bits[1] = cpu_to_be64(sctx->count[0] << 3); + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); + + /* Pad out to 112 mod 128 and append length */ + index = sctx->count[0] % SHA512_BLOCK_SIZE; + padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index); + + /* We need to fill a whole block for __sha512_sparc64_update() */ + if (padlen <= 112) { + if ((sctx->count[0] += padlen) < padlen) + sctx->count[1]++; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha512_sparc64_update(sctx, padding, padlen, index); + } + __sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be64(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[64]; + + sha512_sparc64_final(desc, D); + + memcpy(hash, D, 48); + memset(D, 0, 64); + + return 0; +} + +static struct shash_alg sha512 = { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_sparc64_init, + .update = sha512_sparc64_update, + .final = sha512_sparc64_final, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name= "sha512-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha384 = { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_sparc64_init, + .update = sha512_sparc64_update, + .final = sha384_sparc64_final, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name= "sha384-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha512_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA512)) + return false; + + return true; +} + +static int __init sha512_sparc64_mod_init(void) +{ + if (sparc64_has_sha512_opcode()) { + int ret = crypto_register_shash(&sha384); + if (ret < 0) + return ret; + + ret = crypto_register_shash(&sha512); + if (ret < 0) { + crypto_unregister_shash(&sha384); + return ret; + } + + pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n"); + return 0; + } + pr_info("sparc64 sha512 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha512_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&sha384); + crypto_unregister_shash(&sha512); +} + +module_init(sha512_sparc64_mod_init); +module_exit(sha512_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); + +MODULE_ALIAS("sha384"); +MODULE_ALIAS("sha512"); |