From 4ff28d4ca93b182b8e181b1e1b1d03fd09fdaeb4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 19 Aug 2012 15:41:53 -0700 Subject: sparc64: Add SHA1 driver making use of the 'sha1' instruction. Signed-off-by: David S. Miller Acked-by: Herbert Xu --- arch/sparc/crypto/Makefile | 7 ++ arch/sparc/crypto/sha1_asm.S | 72 +++++++++++++++++ arch/sparc/crypto/sha1_glue.c | 181 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 arch/sparc/crypto/Makefile create mode 100644 arch/sparc/crypto/sha1_asm.S create mode 100644 arch/sparc/crypto/sha1_glue.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile new file mode 100644 index 000000000000..9760472fe32b --- /dev/null +++ b/arch/sparc/crypto/Makefile @@ -0,0 +1,7 @@ +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o + +sha1-sparc64-y := sha1_asm.o sha1_glue.o diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S new file mode 100644 index 000000000000..d2147eb054c1 --- /dev/null +++ b/arch/sparc/crypto/sha1_asm.S @@ -0,0 +1,72 @@ +#include +#include + +ENTRY(sha1_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x0c], %f3 + bne,pn %xcc, 10f + ld [%o0 + 0x10], %f4 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + /* sha1 */ + .word 0x81b02820 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + st %f4, [%o0 + 0x10] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + /* sha1 */ + .word 0x81b02820 + + subcc %o2, 1, %o2 + fsrc1 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha1_sparc64_transform) diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c new file mode 100644 index 000000000000..6bd1abc5489d --- /dev/null +++ b/arch/sparc/crypto/sha1_glue.c @@ -0,0 +1,181 @@ +/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * Copyright (c) Mathias Krause + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_sparc64_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + +static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->buffer + partial, data, done); + sha1_sparc64_transform(sctx->state, sctx->buffer, 1); + } + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + + sha1_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->buffer, data + done, len - done); +} + +static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buffer + partial, data, len); + } else + __sha1_sparc64_update(sctx, data, len, partial); + + return 0; +} + +/* Add padding and return the message digest. */ +static int sha1_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __sha1_sparc64_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buffer + index, padding, padlen); + } else { + __sha1_sparc64_update(sctx, padding, padlen, index); + } + __sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha1_sparc64_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha1_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_sparc64_init, + .update = sha1_sparc64_update, + .final = sha1_sparc64_final, + .export = sha1_sparc64_export, + .import = sha1_sparc64_import, + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha1_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA1)) + return false; + + return true; +} + +static int __init sha1_sparc64_mod_init(void) +{ + if (sparc64_has_sha1_opcode()) { + pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 sha1 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha1_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(sha1_sparc64_mod_init); +module_exit(sha1_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); + +MODULE_ALIAS("sha1"); -- cgit v1.2.3 From 86c93b24ef49d64062045c4e3cc2ed4609f6842a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 19 Aug 2012 17:11:37 -0700 Subject: sparc64: Add SHA224/SHA256 driver making use of the 'sha256' instruction. Signed-off-by: David S. Miller Acked-by: Herbert Xu --- arch/sparc/crypto/Makefile | 2 + arch/sparc/crypto/sha256_asm.S | 78 +++++++++++++ arch/sparc/crypto/sha256_glue.c | 237 ++++++++++++++++++++++++++++++++++++++++ crypto/Kconfig | 9 ++ 4 files changed, 326 insertions(+) create mode 100644 arch/sparc/crypto/sha256_asm.S create mode 100644 arch/sparc/crypto/sha256_glue.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index 9760472fe32b..578f845a7e01 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -3,5 +3,7 @@ # obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o +obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o sha1-sparc64-y := sha1_asm.o sha1_glue.o +sha256-sparc64-y := sha256_asm.o sha256_glue.o diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S new file mode 100644 index 000000000000..771ce265f64a --- /dev/null +++ b/arch/sparc/crypto/sha256_asm.S @@ -0,0 +1,78 @@ +#include +#include + +ENTRY(sha256_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + ld [%o0 + 0x0c], %f3 + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x18], %f6 + bne,pn %xcc, 10f + ld [%o0 + 0x1c], %f7 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + /* sha256 */ + .word 0x81b02840 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + st %f4, [%o0 + 0x10] + st %f5, [%o0 + 0x14] + st %f6, [%o0 + 0x18] + st %f7, [%o0 + 0x1c] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + /* sha256 */ + .word 0x81b02840 + + subcc %o2, 1, %o2 + fsrc1 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha256_sparc64_transform) diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c new file mode 100644 index 000000000000..75e1adeeb024 --- /dev/null +++ b/arch/sparc/crypto/sha256_glue.c @@ -0,0 +1,237 @@ +/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon crypto/sha256_generic.c + * + * Copyright (c) Jean-Luc Cooke + * Copyright (c) Andrew McDonald + * Copyright (c) 2002 James Morris + * SHA224 Support Copyright 2007 Intel Corporation + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int sha224_sparc64_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +static int sha256_sparc64_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_sparc64_transform(sctx->state, sctx->buf, 1); + } + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); +} + +static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + } else + __sha256_sparc64_update(sctx, data, len, partial); + + return 0; +} + +static int sha256_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __sha256_sparc64_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_sparc64_update(sctx, padding, padlen, index); + } + __sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_sparc64_final(desc, D); + + memcpy(hash, D, SHA224_DIGEST_SIZE); + memset(D, 0, SHA256_DIGEST_SIZE); + + return 0; +} + +static int sha256_sparc64_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + return 0; +} + +static int sha256_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + return 0; +} + +static struct shash_alg sha256 = { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_sparc64_init, + .update = sha256_sparc64_update, + .final = sha256_sparc64_final, + .export = sha256_sparc64_export, + .import = sha256_sparc64_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name= "sha256-sparc64", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha224 = { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_sparc64_init, + .update = sha256_sparc64_update, + .final = sha224_sparc64_final, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name= "sha224-sparc64", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha256_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA256)) + return false; + + return true; +} + +static int __init sha256_sparc64_mod_init(void) +{ + if (sparc64_has_sha256_opcode()) { + int ret = crypto_register_shash(&sha224); + if (ret < 0) + return ret; + + ret = crypto_register_shash(&sha256); + if (ret < 0) { + crypto_unregister_shash(&sha224); + return ret; + } + + pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n"); + return 0; + } + pr_info("sparc64 sha256 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha256_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&sha224); + crypto_unregister_shash(&sha256); +} + +module_init(sha256_sparc64_mod_init); +module_exit(sha256_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); + +MODULE_ALIAS("sha224"); +MODULE_ALIAS("sha256"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 167c856f906d..4782d840d838 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -454,6 +454,15 @@ config CRYPTO_SHA256 This code also includes SHA-224, a 224 bit hash with 112 bits of security against collision attacks. +config CRYPTO_SHA256_SPARC64 + tristate "SHA224 and SHA256 digest algorithm (SPARC64)" + depends on SPARC64 + select CRYPTO_SHA256 + select CRYPTO_HASH + help + SHA-256 secure hash standard (DFIPS 180-2) implemented + using sparc64 crypto instructions, when available. + config CRYPTO_SHA512 tristate "SHA384 and SHA512 digest algorithms" select CRYPTO_HASH -- cgit v1.2.3 From 775e0c69987a76d08900edbca857425f952c60fb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 19 Aug 2012 17:37:56 -0700 Subject: sparc64: Add SHA384/SHA512 driver making use of the 'sha512' instruction. Signed-off-by: David S. Miller Acked-by: Herbert Xu --- arch/sparc/crypto/Makefile | 2 + arch/sparc/crypto/sha512_asm.S | 102 ++++++++++++++++++ arch/sparc/crypto/sha512_glue.c | 222 ++++++++++++++++++++++++++++++++++++++++ crypto/Kconfig | 9 ++ 4 files changed, 335 insertions(+) create mode 100644 arch/sparc/crypto/sha512_asm.S create mode 100644 arch/sparc/crypto/sha512_glue.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index 578f845a7e01..cfae0e874d26 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -4,6 +4,8 @@ obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o +obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o sha1-sparc64-y := sha1_asm.o sha1_glue.o sha256-sparc64-y := sha256_asm.o sha256_glue.o +sha512-sparc64-y := sha512_asm.o sha512_glue.o diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S new file mode 100644 index 000000000000..04244da50774 --- /dev/null +++ b/arch/sparc/crypto/sha512_asm.S @@ -0,0 +1,102 @@ +#include +#include + +ENTRY(sha512_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + andcc %o1, 0x7, %g0 + ldd [%o0 + 0x30], %f12 + bne,pn %xcc, 10f + ldd [%o0 + 0x38], %f14 + +1: + ldd [%o1 + 0x00], %f16 + ldd [%o1 + 0x08], %f18 + ldd [%o1 + 0x10], %f20 + ldd [%o1 + 0x18], %f22 + ldd [%o1 + 0x20], %f24 + ldd [%o1 + 0x28], %f26 + ldd [%o1 + 0x30], %f28 + ldd [%o1 + 0x38], %f30 + ldd [%o1 + 0x40], %f32 + ldd [%o1 + 0x48], %f34 + ldd [%o1 + 0x50], %f36 + ldd [%o1 + 0x58], %f38 + ldd [%o1 + 0x60], %f40 + ldd [%o1 + 0x68], %f42 + ldd [%o1 + 0x70], %f44 + ldd [%o1 + 0x78], %f46 + + /* sha512 */ + .word 0x81b02860 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x80, %o1 + +5: + std %f0, [%o0 + 0x00] + std %f2, [%o0 + 0x08] + std %f4, [%o0 + 0x10] + std %f6, [%o0 + 0x18] + std %f8, [%o0 + 0x20] + std %f10, [%o0 + 0x28] + std %f12, [%o0 + 0x30] + std %f14, [%o0 + 0x38] + retl + VISExit +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f18 +1: + ldd [%o1 + 0x08], %f20 + ldd [%o1 + 0x10], %f22 + ldd [%o1 + 0x18], %f24 + ldd [%o1 + 0x20], %f26 + ldd [%o1 + 0x28], %f28 + ldd [%o1 + 0x30], %f30 + ldd [%o1 + 0x38], %f32 + ldd [%o1 + 0x40], %f34 + ldd [%o1 + 0x48], %f36 + ldd [%o1 + 0x50], %f38 + ldd [%o1 + 0x58], %f40 + ldd [%o1 + 0x60], %f42 + ldd [%o1 + 0x68], %f44 + ldd [%o1 + 0x70], %f46 + ldd [%o1 + 0x78], %f48 + ldd [%o1 + 0x80], %f50 + + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + faligndata %f26, %f28, %f24 + faligndata %f28, %f30, %f26 + faligndata %f30, %f32, %f28 + faligndata %f32, %f34, %f30 + faligndata %f34, %f36, %f32 + faligndata %f36, %f38, %f34 + faligndata %f38, %f40, %f36 + faligndata %f40, %f42, %f38 + faligndata %f42, %f44, %f40 + faligndata %f44, %f46, %f42 + faligndata %f46, %f48, %f44 + faligndata %f48, %f50, %f46 + + /* sha512 */ + .word 0x81b02860 + + subcc %o2, 1, %o2 + fsrc1 %f50, %f18 + bne,pt %xcc, 1b + add %o1, 0x80, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(sha512_sparc64_transform) diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c new file mode 100644 index 000000000000..4d960be401c4 --- /dev/null +++ b/arch/sparc/crypto/sha512_glue.c @@ -0,0 +1,222 @@ +/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon crypto/sha512_generic.c + * + * Copyright (c) Jean-Luc Cooke + * Copyright (c) Andrew McDonald + * Copyright (c) 2003 Kyle McMartin + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data, + unsigned int rounds); + +static int sha512_sparc64_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA512_H0; + sctx->state[1] = SHA512_H1; + sctx->state[2] = SHA512_H2; + sctx->state[3] = SHA512_H3; + sctx->state[4] = SHA512_H4; + sctx->state[5] = SHA512_H5; + sctx->state[6] = SHA512_H6; + sctx->state[7] = SHA512_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha384_sparc64_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + sctx->state[0] = SHA384_H0; + sctx->state[1] = SHA384_H1; + sctx->state[2] = SHA384_H2; + sctx->state[3] = SHA384_H3; + sctx->state[4] = SHA384_H4; + sctx->state[5] = SHA384_H5; + sctx->state[6] = SHA384_H6; + sctx->state[7] = SHA384_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + if ((sctx->count[0] += len) < len) + sctx->count[1]++; + if (partial) { + done = SHA512_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha512_sparc64_transform(sctx->state, sctx->buf, 1); + } + if (len - done >= SHA512_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; + + sha512_sparc64_transform(sctx->state, data + done, rounds); + done += rounds * SHA512_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); +} + +static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA512_BLOCK_SIZE) { + if ((sctx->count[0] += len) < len) + sctx->count[1]++; + memcpy(sctx->buf + partial, data, len); + } else + __sha512_sparc64_update(sctx, data, len, partial); + + return 0; +} + +static int sha512_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be64 *dst = (__be64 *)out; + __be64 bits[2]; + static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; + + /* Save number of bits */ + bits[1] = cpu_to_be64(sctx->count[0] << 3); + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); + + /* Pad out to 112 mod 128 and append length */ + index = sctx->count[0] % SHA512_BLOCK_SIZE; + padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index); + + /* We need to fill a whole block for __sha512_sparc64_update() */ + if (padlen <= 112) { + if ((sctx->count[0] += padlen) < padlen) + sctx->count[1]++; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha512_sparc64_update(sctx, padding, padlen, index); + } + __sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be64(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[64]; + + sha512_sparc64_final(desc, D); + + memcpy(hash, D, 48); + memset(D, 0, 64); + + return 0; +} + +static struct shash_alg sha512 = { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_sparc64_init, + .update = sha512_sparc64_update, + .final = sha512_sparc64_final, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name= "sha512-sparc64", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha384 = { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_sparc64_init, + .update = sha512_sparc64_update, + .final = sha384_sparc64_final, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name= "sha384-sparc64", + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_sha512_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_SHA512)) + return false; + + return true; +} + +static int __init sha512_sparc64_mod_init(void) +{ + if (sparc64_has_sha512_opcode()) { + int ret = crypto_register_shash(&sha384); + if (ret < 0) + return ret; + + ret = crypto_register_shash(&sha512); + if (ret < 0) { + crypto_unregister_shash(&sha384); + return ret; + } + + pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n"); + return 0; + } + pr_info("sparc64 sha512 opcode not available.\n"); + return -ENODEV; +} + +static void __exit sha512_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&sha384); + crypto_unregister_shash(&sha512); +} + +module_init(sha512_sparc64_mod_init); +module_exit(sha512_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); + +MODULE_ALIAS("sha384"); +MODULE_ALIAS("sha512"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 4782d840d838..e7ed12abc875 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -475,6 +475,15 @@ config CRYPTO_SHA512 This code also includes SHA-384, a 384 bit hash with 192 bits of security against collision attacks. +config CRYPTO_SHA512_SPARC64 + tristate "SHA384 and SHA512 digest algorithm (SPARC64)" + depends on SPARC64 + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-512 secure hash standard (DFIPS 180-2) implemented + using sparc64 crypto instructions, when available. + config CRYPTO_TGR192 tristate "Tiger digest algorithms" select CRYPTO_HASH -- cgit v1.2.3 From fa4dfedcc23a589f953750de54eebf986336fa70 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 19 Aug 2012 21:51:26 -0700 Subject: sparc64: Add MD5 driver making use of the 'md5' instruction. Signed-off-by: David S. Miller Acked-by: Herbert Xu --- arch/sparc/crypto/Makefile | 2 + arch/sparc/crypto/md5_asm.S | 70 ++++++++++++++++ arch/sparc/crypto/md5_glue.c | 186 +++++++++++++++++++++++++++++++++++++++++++ crypto/Kconfig | 9 +++ 4 files changed, 267 insertions(+) create mode 100644 arch/sparc/crypto/md5_asm.S create mode 100644 arch/sparc/crypto/md5_glue.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index cfae0e874d26..535669828d47 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -5,7 +5,9 @@ obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o +obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o sha1-sparc64-y := sha1_asm.o sha1_glue.o sha256-sparc64-y := sha256_asm.o sha256_glue.o sha512-sparc64-y := sha512_asm.o sha512_glue.o +md5-sparc64-y := md5_asm.o md5_glue.o diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S new file mode 100644 index 000000000000..220b73baa551 --- /dev/null +++ b/arch/sparc/crypto/md5_asm.S @@ -0,0 +1,70 @@ +#include +#include + +ENTRY(md5_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x08], %f2 + bne,pn %xcc, 10f + ld [%o0 + 0x0c], %f3 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + /* md5 */ + .word 0x81b02800 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + /* md5 */ + .word 0x81b02800 + + subcc %o2, 1, %o2 + fsrc1 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(md5_sparc64_transform) diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c new file mode 100644 index 000000000000..bdfcfefbd4fd --- /dev/null +++ b/arch/sparc/crypto/md5_glue.c @@ -0,0 +1,186 @@ +/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c + * and crypto/md5.c which are: + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * Copyright (c) Mathias Krause + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +asmlinkage void md5_sparc64_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int md5_sparc64_init(struct shash_desc *desc) +{ + struct md5_state *mctx = shash_desc_ctx(desc); + + mctx->hash[0] = cpu_to_le32(0x67452301); + mctx->hash[1] = cpu_to_le32(0xefcdab89); + mctx->hash[2] = cpu_to_le32(0x98badcfe); + mctx->hash[3] = cpu_to_le32(0x10325476); + mctx->byte_count = 0; + + return 0; +} + +static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->byte_count += len; + if (partial) { + done = MD5_HMAC_BLOCK_SIZE - partial; + memcpy((u8 *)sctx->block + partial, data, done); + md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1); + } + if (len - done >= MD5_HMAC_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE; + + md5_sparc64_transform(sctx->hash, data + done, rounds); + done += rounds * MD5_HMAC_BLOCK_SIZE; + } + + memcpy(sctx->block, data + done, len - done); +} + +static int md5_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < MD5_HMAC_BLOCK_SIZE) { + sctx->byte_count += len; + memcpy((u8 *)sctx->block + partial, data, len); + } else + __md5_sparc64_update(sctx, data, len, partial); + + return 0; +} + +/* Add padding and return the message digest. */ +static int md5_sparc64_final(struct shash_desc *desc, u8 *out) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + u32 *dst = (u32 *)out; + __le64 bits; + static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_le64(sctx->byte_count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index); + + /* We need to fill a whole block for __md5_sparc64_update() */ + if (padlen <= 56) { + sctx->byte_count += padlen; + memcpy((u8 *)sctx->block + index, padding, padlen); + } else { + __md5_sparc64_update(sctx, padding, padlen, index); + } + __md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < MD5_HASH_WORDS; i++) + dst[i] = sctx->hash[i]; + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int md5_sparc64_export(struct shash_desc *desc, void *out) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int md5_sparc64_import(struct shash_desc *desc, const void *in) +{ + struct md5_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = MD5_DIGEST_SIZE, + .init = md5_sparc64_init, + .update = md5_sparc64_update, + .final = md5_sparc64_final, + .export = md5_sparc64_export, + .import = md5_sparc64_import, + .descsize = sizeof(struct md5_state), + .statesize = sizeof(struct md5_state), + .base = { + .cra_name = "md5", + .cra_driver_name= "md5-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool __init sparc64_has_md5_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_MD5)) + return false; + + return true; +} + +static int __init md5_sparc64_mod_init(void) +{ + if (sparc64_has_md5_opcode()) { + pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 md5 opcode not available.\n"); + return -ENODEV; +} + +static void __exit md5_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(md5_sparc64_mod_init); +module_exit(md5_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); + +MODULE_ALIAS("md5"); diff --git a/crypto/Kconfig b/crypto/Kconfig index e7ed12abc875..4cb1ab04168f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -354,6 +354,15 @@ config CRYPTO_MD5 help MD5 message digest algorithm (RFC1321). +config CRYPTO_MD5_SPARC64 + tristate "MD5 digest algorithm (SPARC64)" + depends on SPARC64 + select CRYPTO_MD5 + select CRYPTO_HASH + help + MD5 message digest algorithm (RFC1321) implemented + using sparc64 crypto instructions, when available. + config CRYPTO_MICHAEL_MIC tristate "Michael MIC keyed digest algorithm" select CRYPTO_HASH -- cgit v1.2.3 From 9bf4852d3d195f771503d5be547ac940b0b3472a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 21 Aug 2012 03:58:13 -0700 Subject: sparc64: Add AES driver making use of the new aes opcodes. Signed-off-by: David S. Miller Acked-by: Herbert Xu --- arch/sparc/crypto/Makefile | 4 + arch/sparc/crypto/aes_asm.S | 836 +++++++++++++++++++++++++++++++++++++++++++ arch/sparc/crypto/aes_glue.c | 323 +++++++++++++++++ crypto/Kconfig | 28 ++ 4 files changed, 1191 insertions(+) create mode 100644 arch/sparc/crypto/aes_asm.S create mode 100644 arch/sparc/crypto/aes_glue.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index 535669828d47..5034324fdd46 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -7,7 +7,11 @@ obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o +obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o + sha1-sparc64-y := sha1_asm.o sha1_glue.o sha256-sparc64-y := sha256_asm.o sha256_glue.o sha512-sparc64-y := sha512_asm.o sha512_glue.o md5-sparc64-y := md5_asm.o md5_glue.o + +aes-sparc64-y := aes_asm.o aes_glue.o diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S new file mode 100644 index 000000000000..f656dc7a173e --- /dev/null +++ b/arch/sparc/crypto/aes_asm.S @@ -0,0 +1,836 @@ +#include +#include + +#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) + +#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) + +#define RS1(x) (FPD_ENCODE(x) << 14) +#define RS2(x) (FPD_ENCODE(x) << 0) +#define RS3(x) (FPD_ENCODE(x) << 9) +#define RD(x) (FPD_ENCODE(x) << 25) +#define IMM5(x) ((x) << 9) + +#define AES_EROUND01(a,b,c,d) \ + .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23(a,b,c,d) \ + .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01(a,b,c,d) \ + .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23(a,b,c,d) \ + .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND01_L(a,b,c,d) \ + .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23_L(a,b,c,d) \ + .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01_L(a,b,c,d) \ + .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23_L(a,b,c,d) \ + .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_KEXPAND1(a,b,c,d) \ + .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5(c)|RD(d)); +#define AES_KEXPAND0(a,b,c) \ + .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c)); +#define AES_KEXPAND2(a,b,c) \ + .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); + +#define MOVXTOD_G3_F4 \ + .word 0x89b02303; +#define MOVXTOD_G7_F6 \ + .word 0x8db02307; +#define MOVXTOD_G3_F0 \ + .word 0x81b02303; +#define MOVXTOD_G7_F2 \ + .word 0x85b02307; +#define MOVXTOD_O0_F0 \ + .word 0x81b02308; +#define MOVXTOD_O1_F2 \ + .word 0x85b02309; + +#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23(KEY_BASE + 6, T0, T1, I1) + +#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) + + /* 10 rounds */ +#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) + + /* 12 rounds */ +#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) + + /* 14 rounds */ +#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ + ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + +#define DECRYPT_TWO_ROUNDS(KEY_TOP, I0, I1, T0, T1) \ + AES_DROUND23(KEY_TOP - 2, I0, I1, T1) \ + AES_DROUND01(KEY_TOP - 4, I0, I1, T0) \ + AES_DROUND23(KEY_TOP - 6, T0, T1, I1) \ + AES_DROUND01(KEY_TOP - 8, T0, T1, I0) + +#define DECRYPT_TWO_ROUNDS_LAST(KEY_TOP, I0, I1, T0, T1) \ + AES_DROUND23(KEY_TOP - 2, I0, I1, T1) \ + AES_DROUND01(KEY_TOP - 4, I0, I1, T0) \ + AES_DROUND23_L(KEY_TOP - 6, T0, T1, I1) \ + AES_DROUND01_L(KEY_TOP - 8, T0, T1, I0) + + /* 10 rounds */ +#define DECRYPT_128(KEY_TOP, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 32, I0, I1, T0, T1) + + /* 12 rounds */ +#define DECRYPT_192(KEY_TOP, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 32, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 40, I0, I1, T0, T1) + + /* 14 rounds */ +#define DECRYPT_256(KEY_TOP, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 32, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_TOP - 40, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 48, I0, I1, T0, T1) + +ENTRY(aes_sparc64_key_expand) + /* %o0=input_key, %o1=output_key, %o2=key_len */ + VISEntry + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + ld [%o0 + 0x0c], %f3 + + std %f0, [%o1 + 0x00] + std %f2, [%o1 + 0x08] + add %o1, 0x10, %o1 + + cmp %o2, 24 + bl 2f + nop + + be 1f + nop + + /* 256-bit key expansion */ + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + ld [%o0 + 0x18], %f6 + ld [%o0 + 0x1c], %f7 + + std %f4, [%o1 + 0x00] + std %f6, [%o1 + 0x08] + add %o1, 0x10, %o1 + + AES_KEXPAND1(0, 6, 0x0, 8) + AES_KEXPAND2(2, 8, 10) + AES_KEXPAND0(4, 10, 12) + AES_KEXPAND2(6, 12, 14) + AES_KEXPAND1(8, 14, 0x1, 16) + AES_KEXPAND2(10, 16, 18) + AES_KEXPAND0(12, 18, 20) + AES_KEXPAND2(14, 20, 22) + AES_KEXPAND1(16, 22, 0x2, 24) + AES_KEXPAND2(18, 24, 26) + AES_KEXPAND0(20, 26, 28) + AES_KEXPAND2(22, 28, 30) + AES_KEXPAND1(24, 30, 0x3, 32) + AES_KEXPAND2(26, 32, 34) + AES_KEXPAND0(28, 34, 36) + AES_KEXPAND2(30, 36, 38) + AES_KEXPAND1(32, 38, 0x4, 40) + AES_KEXPAND2(34, 40, 42) + AES_KEXPAND0(36, 42, 44) + AES_KEXPAND2(38, 44, 46) + AES_KEXPAND1(40, 46, 0x5, 48) + AES_KEXPAND2(42, 48, 50) + AES_KEXPAND0(44, 50, 52) + AES_KEXPAND2(46, 52, 54) + AES_KEXPAND1(48, 54, 0x6, 56) + AES_KEXPAND2(50, 56, 58) + + std %f8, [%o1 + 0x00] + std %f10, [%o1 + 0x08] + std %f12, [%o1 + 0x10] + std %f14, [%o1 + 0x18] + std %f16, [%o1 + 0x20] + std %f18, [%o1 + 0x28] + std %f20, [%o1 + 0x30] + std %f22, [%o1 + 0x38] + std %f24, [%o1 + 0x40] + std %f26, [%o1 + 0x48] + std %f28, [%o1 + 0x50] + std %f30, [%o1 + 0x58] + std %f32, [%o1 + 0x60] + std %f34, [%o1 + 0x68] + std %f36, [%o1 + 0x70] + std %f38, [%o1 + 0x78] + std %f40, [%o1 + 0x80] + std %f42, [%o1 + 0x88] + std %f44, [%o1 + 0x90] + std %f46, [%o1 + 0x98] + std %f48, [%o1 + 0xa0] + std %f50, [%o1 + 0xa8] + std %f52, [%o1 + 0xb0] + std %f54, [%o1 + 0xb8] + std %f56, [%o1 + 0xc0] + ba,pt %xcc, 80f + std %f58, [%o1 + 0xc8] + +1: + /* 192-bit key expansion */ + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + + std %f4, [%o1 + 0x00] + add %o1, 0x08, %o1 + + AES_KEXPAND1(0, 4, 0x0, 6) + AES_KEXPAND2(2, 6, 8) + AES_KEXPAND2(4, 8, 10) + AES_KEXPAND1(6, 10, 0x1, 12) + AES_KEXPAND2(8, 12, 14) + AES_KEXPAND2(10, 14, 16) + AES_KEXPAND1(12, 16, 0x2, 18) + AES_KEXPAND2(14, 18, 20) + AES_KEXPAND2(16, 20, 22) + AES_KEXPAND1(18, 22, 0x3, 24) + AES_KEXPAND2(20, 24, 26) + AES_KEXPAND2(22, 26, 28) + AES_KEXPAND1(24, 28, 0x4, 30) + AES_KEXPAND2(26, 30, 32) + AES_KEXPAND2(28, 32, 34) + AES_KEXPAND1(30, 34, 0x5, 36) + AES_KEXPAND2(32, 36, 38) + AES_KEXPAND2(34, 38, 40) + AES_KEXPAND1(36, 40, 0x6, 42) + AES_KEXPAND2(38, 42, 44) + AES_KEXPAND2(40, 44, 46) + AES_KEXPAND1(42, 46, 0x7, 48) + AES_KEXPAND2(44, 48, 50) + + std %f6, [%o1 + 0x00] + std %f8, [%o1 + 0x08] + std %f10, [%o1 + 0x10] + std %f12, [%o1 + 0x18] + std %f14, [%o1 + 0x20] + std %f16, [%o1 + 0x28] + std %f18, [%o1 + 0x30] + std %f20, [%o1 + 0x38] + std %f22, [%o1 + 0x40] + std %f24, [%o1 + 0x48] + std %f26, [%o1 + 0x50] + std %f28, [%o1 + 0x58] + std %f30, [%o1 + 0x60] + std %f32, [%o1 + 0x68] + std %f34, [%o1 + 0x70] + std %f36, [%o1 + 0x78] + std %f38, [%o1 + 0x80] + std %f40, [%o1 + 0x88] + std %f42, [%o1 + 0x90] + std %f44, [%o1 + 0x98] + std %f46, [%o1 + 0xa0] + std %f48, [%o1 + 0xa8] + ba,pt %xcc, 80f + std %f50, [%o1 + 0xb0] + +2: + /* 128-bit key expansion */ + AES_KEXPAND1(0, 2, 0x0, 4) + AES_KEXPAND2(2, 4, 6) + AES_KEXPAND1(4, 6, 0x1, 8) + AES_KEXPAND2(6, 8, 10) + AES_KEXPAND1(8, 10, 0x2, 12) + AES_KEXPAND2(10, 12, 14) + AES_KEXPAND1(12, 14, 0x3, 16) + AES_KEXPAND2(14, 16, 18) + AES_KEXPAND1(16, 18, 0x4, 20) + AES_KEXPAND2(18, 20, 22) + AES_KEXPAND1(20, 22, 0x5, 24) + AES_KEXPAND2(22, 24, 26) + AES_KEXPAND1(24, 26, 0x6, 28) + AES_KEXPAND2(26, 28, 30) + AES_KEXPAND1(28, 30, 0x7, 32) + AES_KEXPAND2(30, 32, 34) + AES_KEXPAND1(32, 34, 0x8, 36) + AES_KEXPAND2(34, 36, 38) + AES_KEXPAND1(36, 38, 0x9, 40) + AES_KEXPAND2(38, 40, 42) + + std %f4, [%o1 + 0x00] + std %f6, [%o1 + 0x08] + std %f8, [%o1 + 0x10] + std %f10, [%o1 + 0x18] + std %f12, [%o1 + 0x20] + std %f14, [%o1 + 0x28] + std %f16, [%o1 + 0x30] + std %f18, [%o1 + 0x38] + std %f20, [%o1 + 0x40] + std %f22, [%o1 + 0x48] + std %f24, [%o1 + 0x50] + std %f26, [%o1 + 0x58] + std %f28, [%o1 + 0x60] + std %f30, [%o1 + 0x68] + std %f32, [%o1 + 0x70] + std %f34, [%o1 + 0x78] + std %f36, [%o1 + 0x80] + std %f38, [%o1 + 0x88] + std %f40, [%o1 + 0x90] + std %f42, [%o1 + 0x98] +80: + retl + VISExit +ENDPROC(aes_sparc64_key_expand) + +ENTRY(aes_sparc64_encrypt) + /* %o0=key, %o1=input, %o2=output, %o3=key_len */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + cmp %o3, 24 + fxor %f8, %f4, %f4 + bl 2f + fxor %f10, %f6, %f6 + + be 1f + ldd [%o0 + 0x10], %f8 + + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + + ldd [%o0 + 0x10], %f8 + +1: + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + +2: + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 + + + ENCRYPT_128(12, 4, 6, 0, 2) + + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(aes_sparc64_encrypt) + +ENTRY(aes_sparc64_decrypt) + /* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=exp_key_len */ + VISEntry + ld [%o1 + 0x00], %f4 + add %o0, %o4, %o0 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + + ldd [%o0 - 0x08], %f8 + ldd [%o0 - 0x10], %f10 + + cmp %o3, 24 + fxor %f10, %f4, %f4 + bl 2f + fxor %f8, %f6, %f6 + + be 1f + ldd [%o0 - 0x30], %f8 + + ldd [%o0 - 0x28], %f10 + ldd [%o0 - 0x20], %f12 + ldd [%o0 - 0x18], %f14 + sub %o0, 0x20, %o0 + + DECRYPT_TWO_ROUNDS(16, 4, 6, 0, 2) + + ldd [%o0 - 0x30], %f8 +1: + ldd [%o0 - 0x28], %f10 + ldd [%o0 - 0x20], %f12 + ldd [%o0 - 0x18], %f14 + sub %o0, 0x20, %o0 + + DECRYPT_TWO_ROUNDS(16, 4, 6, 0, 2) +2: + ldd [%o0 - 0xb0], %f12 + ldd [%o0 - 0xa8], %f14 + ldd [%o0 - 0xa0], %f16 + ldd [%o0 - 0x98], %f18 + ldd [%o0 - 0x90], %f20 + ldd [%o0 - 0x88], %f22 + ldd [%o0 - 0x80], %f24 + ldd [%o0 - 0x78], %f26 + ldd [%o0 - 0x70], %f28 + ldd [%o0 - 0x68], %f30 + ldd [%o0 - 0x60], %f32 + ldd [%o0 - 0x58], %f34 + ldd [%o0 - 0x50], %f36 + ldd [%o0 - 0x48], %f38 + ldd [%o0 - 0x40], %f40 + ldd [%o0 - 0x38], %f42 + ldd [%o0 - 0x30], %f44 + ldd [%o0 - 0x28], %f46 + ldd [%o0 - 0x20], %f48 + ldd [%o0 - 0x18], %f50 + + DECRYPT_128(52, 4, 6, 0, 2) + + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(aes_sparc64_decrypt) + +ENTRY(aes_sparc64_load_decrypt_keys) + /* %o0=key */ + ba,pt %xcc, aes_sparc64_load_encrypt_keys + sub %o0, 0x10, %o0 +ENDPROC(aes_sparc64_load_decrypt_keys) + +ENTRY(aes_sparc64_load_encrypt_keys) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + ldd [%o0 + 0xc8], %f54 + ldd [%o0 + 0xd0], %f56 + ldd [%o0 + 0xd8], %f58 + ldd [%o0 + 0xe0], %f60 + retl + ldd [%o0 + 0xe8], %f62 +ENDPROC(aes_sparc64_load_encrypt_keys) + +ENTRY(aes_sparc64_ecb_encrypt) + /* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=len */ + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 + cmp %o3, 24 + bl 2f + nop + be 1f + nop + +0: + /* 256-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + ENCRYPT_256(8, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 0b + add %o2, 0x10, %o2 + + retl + nop + +1: + /* 192-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + ENCRYPT_192(8, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + + retl + nop + +2: + /* 128-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + ENCRYPT_128(8, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 2b + add %o2, 0x10, %o2 + + retl + nop +ENDPROC(aes_sparc64_ecb_encrypt) + +ENTRY(aes_sparc64_ecb_decrypt) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=key_len, %o4=len, %o5=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + cmp %o3, 24 + bl 2f + nop + be 1f + nop + +0: + /* 256-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + DECRYPT_256(64, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 0b + add %o2, 0x10, %o2 + + retl + nop + +1: + /* 192-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + DECRYPT_192(56, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + + retl + nop + +2: + /* 128-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + DECRYPT_128(48, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 2b + add %o2, 0x10, %o2 + + retl + nop +ENDPROC(aes_sparc64_ecb_decrypt) + +ENTRY(aes_sparc64_cbc_encrypt) + /* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=len */ + ldd [%o5 + 0x00], %f4 + ldd [%o5 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 + cmp %o3, 24 + bl 2f + nop + be 1f + nop + +0: + /* 256-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + + ENCRYPT_256(8, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 0b + add %o2, 0x10, %o2 + + std %f4, [%o5 + 0x00] + std %f6, [%o5 + 0x08] + + retl + nop + +1: + /* 192-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + + ENCRYPT_192(8, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + + std %f4, [%o5 + 0x00] + std %f6, [%o5 + 0x08] + + retl + nop + +2: + /* 128-bit key */ + ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + + ENCRYPT_128(8, 4, 6, 0, 2) + + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 2b + add %o2, 0x10, %o2 + + std %f4, [%o5 + 0x00] + std %f6, [%o5 + 0x08] + + retl + nop +ENDPROC(aes_sparc64_cbc_encrypt) + +ENTRY(aes_sparc64_cbc_decrypt) + /* %o0=&key[key_len], %o1=key_len, %o2=input, %o3=output, %o4=len, %o5=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + cmp %o1, 24 + ldx [%o5 + 0x00], %o0 + bl 2f + ldx [%o5 + 0x08], %o1 + be 1f + nop + +0: + /* 256-bit key */ + ldx [%o2 + 0x00], %g3 + ldx [%o2 + 0x08], %g7 + add %o2, 0x10, %o2 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + DECRYPT_256(64, 4, 6, 0, 2) + + MOVXTOD_O0_F0 + MOVXTOD_O1_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o1 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + + std %f4, [%o3 + 0x00] + std %f6, [%o3 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 0b + add %o3, 0x10, %o3 + + stx %o0, [%o5 + 0x00] + stx %o1, [%o5 + 0x08] + + retl + nop + +1: + /* 192-bit key */ + ldx [%o2 + 0x00], %g3 + ldx [%o2 + 0x08], %g7 + add %o2, 0x10, %o2 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + DECRYPT_192(56, 4, 6, 0, 2) + + MOVXTOD_O0_F0 + MOVXTOD_O1_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o1 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + + std %f4, [%o3 + 0x00] + std %f6, [%o3 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 1b + add %o3, 0x10, %o3 + + stx %o0, [%o5 + 0x00] + stx %o1, [%o5 + 0x08] + + retl + nop + +2: + /* 128-bit key */ + ldx [%o2 + 0x00], %g3 + ldx [%o2 + 0x08], %g7 + add %o2, 0x10, %o2 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + + DECRYPT_128(48, 4, 6, 0, 2) + + MOVXTOD_O0_F0 + MOVXTOD_O1_F2 + xor %g1, %g3, %o0 + xor %g2, %g7, %o1 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + + std %f4, [%o3 + 0x00] + std %f6, [%o3 + 0x08] + subcc %o4, 0x10, %o4 + bne,pt %xcc, 2b + add %o3, 0x10, %o3 + + stx %o0, [%o5 + 0x00] + stx %o1, [%o5 + 0x08] + + retl + nop +ENDPROC(aes_sparc64_cbc_decrypt) diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c new file mode 100644 index 000000000000..a87c5fa76e20 --- /dev/null +++ b/arch/sparc/crypto/aes_glue.c @@ -0,0 +1,323 @@ +/* Glue code for AES encryption optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/aesni-intel_glue.c + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Adrian Hoban + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Copyright (c) 2010, Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +struct crypto_sparc64_aes_ctx { + u64 key[AES_MAX_KEYLENGTH / sizeof(u64)]; + u32 key_length; + u32 expanded_key_length; +}; + +extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key, + unsigned int key_len); + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + switch (key_len) { + case AES_KEYSIZE_128: + ctx->expanded_key_length = 0xb0; + break; + + case AES_KEYSIZE_192: + ctx->expanded_key_length = 0xd0; + break; + + case AES_KEYSIZE_256: + ctx->expanded_key_length = 0xf0; + break; + + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len); + ctx->key_length = key_len; + + return 0; +} + +extern void aes_sparc64_encrypt(const u64 *key, const u32 *input, + u32 *output, unsigned int key_len); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + aes_sparc64_encrypt(&ctx->key[0], (const u32 *) src, + (u32 *) dst, ctx->key_length); +} + +extern void aes_sparc64_decrypt(const u64 *key, const u32 *input, + u32 *output, unsigned int key_len, + unsigned int expanded_key_len); + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + aes_sparc64_decrypt(&ctx->key[0], (const u32 *) src, + (u32 *) dst, ctx->key_length, + ctx->expanded_key_length); +} + +extern void aes_sparc64_load_encrypt_keys(u64 *key); +extern void aes_sparc64_load_decrypt_keys(u64 *key); + +#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) + +extern void aes_sparc64_ecb_encrypt(u64 *key, const u32 *input, u32 *output, + unsigned int key_len, unsigned int len); + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + aes_sparc64_load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + aes_sparc64_ecb_encrypt(&ctx->key[0], + (const u32 *)walk.src.virt.addr, + (u32 *) walk.dst.virt.addr, + ctx->key_length, block_len); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +extern void aes_sparc64_ecb_decrypt(u64 *ekey, const u32 *input, u32 *output, + unsigned int key_len, unsigned int len); + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u64 *key_end; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + aes_sparc64_load_decrypt_keys(&ctx->key[0]); + key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + aes_sparc64_ecb_decrypt(key_end, (const u32 *) walk.src.virt.addr, + (u32 *) walk.dst.virt.addr, ctx->key_length, + block_len); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + + return err; +} + +extern void aes_sparc64_cbc_encrypt(u64 *key, const u32 *input, u32 *output, + unsigned int key_len, unsigned int len, + u64 *iv); + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + aes_sparc64_load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + aes_sparc64_cbc_encrypt(&ctx->key[0], + (const u32 *)walk.src.virt.addr, + (u32 *) walk.dst.virt.addr, + ctx->key_length, block_len, + (u64 *) walk.iv); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +extern void aes_sparc64_cbc_decrypt(u64 *ekey, unsigned int key_len, + const u32 *input, u32 *output, + unsigned int len, u64 *iv); + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u64 *key_end; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + aes_sparc64_load_decrypt_keys(&ctx->key[0]); + key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + aes_sparc64_cbc_decrypt(key_end, ctx->key_length, + (const u32 *) walk.src.virt.addr, + (u32 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "aes", + .cra_driver_name = "aes-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +} }; + +static bool __init sparc64_has_aes_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_AES)) + return false; + + return true; +} + +static int __init aes_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_aes_opcode()) { + pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 aes opcodes not available.\n"); + return -ENODEV; +} + +static void __exit aes_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(aes_sparc64_mod_init); +module_exit(aes_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); + +MODULE_ALIAS("aes"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 4cb1ab04168f..49f867b2025d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -624,6 +624,34 @@ config CRYPTO_AES_NI_INTEL ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional acceleration for CTR. +config CRYPTO_AES_SPARC64 + tristate "AES cipher algorithms (SPARC64)" + depends on SPARC64 + select CRYPTO_CRYPTD + select CRYPTO_ALGAPI + help + Use SPARC64 crypto opcodes for AES algorithm. + + AES cipher algorithms (FIPS-197). AES uses the Rijndael + algorithm. + + Rijndael appears to be consistently a very good performer in + both hardware and software across a wide range of computing + environments regardless of its use in feedback or non-feedback + modes. Its key setup time is excellent, and its key agility is + good. Rijndael's very low memory requirements make it very well + suited for restricted-space environments, in which it also + demonstrates excellent performance. Rijndael's operations are + among the easiest to defend against power and timing attacks. + + The AES specifies three key sizes: 128, 192 and 256 bits + + See for more information. + + In addition to AES cipher algorithm support, the acceleration + for some popular block cipher mode is supported too, including + ECB and CBC. + config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" select CRYPTO_ALGAPI -- cgit v1.2.3 From 442a7c40b1dac78588abfe8ed4c97e4bb8b36e73 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 22 Aug 2012 20:47:36 -0700 Subject: sparc64: Add CRC32C driver making use of the new crc32c opcode. Signed-off-by: David S. Miller --- arch/sparc/crypto/Makefile | 4 + arch/sparc/crypto/crc32c_asm.S | 29 +++++++ arch/sparc/crypto/crc32c_glue.c | 177 ++++++++++++++++++++++++++++++++++++++++ crypto/Kconfig | 9 ++ 4 files changed, 219 insertions(+) create mode 100644 arch/sparc/crypto/crc32c_asm.S create mode 100644 arch/sparc/crypto/crc32c_glue.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index 5034324fdd46..c6ca94181f45 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -9,9 +9,13 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o +obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o + sha1-sparc64-y := sha1_asm.o sha1_glue.o sha256-sparc64-y := sha256_asm.o sha256_glue.o sha512-sparc64-y := sha512_asm.o sha512_glue.o md5-sparc64-y := md5_asm.o md5_glue.o aes-sparc64-y := aes_asm.o aes_glue.o + +crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S new file mode 100644 index 000000000000..cb479ec72433 --- /dev/null +++ b/arch/sparc/crypto/crc32c_asm.S @@ -0,0 +1,29 @@ +#include +#include +#include + +#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) + +#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) + +#define RS1(x) (FPD_ENCODE(x) << 14) +#define RS2(x) (FPD_ENCODE(x) << 0) +#define RD(x) (FPD_ENCODE(x) << 25) + +#define CRC32C(a,b,c) \ + .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c)); + +ENTRY(crc32c_sparc64) + /* %o0=crc32p, %o1=data_ptr, %o2=len */ + VISEntryHalf + lda [%o0] ASI_PL, %f1 +1: ldd [%o1], %f2 + CRC32C(0,2,0) + subcc %o2, 8, %o2 + bne,pt %icc, 1b + add %o1, 0x8, %o1 + sta %f1, [%o0] ASI_PL + VISExitHalf +2: retl + nop +ENDPROC(crc32c_sparc64) diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c new file mode 100644 index 000000000000..ec31cdb20a14 --- /dev/null +++ b/arch/sparc/crypto/crc32c_glue.c @@ -0,0 +1,177 @@ +/* Glue code for CRC32C optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/crc32c-intel.c + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang + * Kent Liu + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *(__le32 *)mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32c_sparc64_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); + +static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len) +{ + unsigned int asm_len; + + asm_len = len & ~7U; + if (asm_len) { + crc32c_sparc64(crcp, data, asm_len); + data += asm_len / 8; + len -= asm_len; + } + if (len) + *crcp = __crc32c_le(*crcp, (const unsigned char *) data, len); +} + +static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + crc32c_compute(crcp, (const u64 *) data, len); + + return 0; +} + +static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + u32 tmp = *crcp; + + crc32c_compute(&tmp, (const u64 *) data, len); + + *(__le32 *) out = ~cpu_to_le32(tmp); + return 0; +} + +static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *) out = ~cpu_to_le32p(crcp); + return 0; +} + +static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = ~0; + + return 0; +} + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +static struct shash_alg alg = { + .setkey = crc32c_sparc64_setkey, + .init = crc32c_sparc64_init, + .update = crc32c_sparc64_update, + .final = crc32c_sparc64_final, + .finup = crc32c_sparc64_finup, + .digest = crc32c_sparc64_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-sparc64", + .cra_priority = 150, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_init = crc32c_sparc64_cra_init, + } +}; + +static bool __init sparc64_has_crc32c_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_CRC32C)) + return false; + + return true; +} + +static int __init crc32c_sparc64_mod_init(void) +{ + if (sparc64_has_crc32c_opcode()) { + pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); + return crypto_register_shash(&alg); + } + pr_info("sparc64 crc32c opcode not available.\n"); + return -ENODEV; +} + +static void __exit crc32c_sparc64_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32c_sparc64_mod_init); +module_exit(crc32c_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); + +MODULE_ALIAS("crc32c"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 49f867b2025d..83993ea84cce 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -336,6 +336,15 @@ config CRYPTO_CRC32C_INTEL gain performance compared with software implementation. Module will be crc32c-intel. +config CRYPTO_CRC32C_SPARC64 + tristate "CRC32c CRC algorithm (SPARC64)" + depends on SPARC64 + select CRYPTO_HASH + select CRC32 + help + CRC32c CRC algorithm implemented using sparc64 crypto instructions, + when available. + config CRYPTO_GHASH tristate "GHASH digest algorithm" select CRYPTO_GF128MUL -- cgit v1.2.3 From c5aac2df6577636ef526d87ec6d92796a190b27f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 25 Aug 2012 22:37:23 -0700 Subject: sparc64: Add DES driver making use of the new des opcodes. Signed-off-by: David S. Miller --- arch/sparc/crypto/Makefile | 2 + arch/sparc/crypto/des_asm.S | 439 ++++++++++++++++++++++++++++++++++++ arch/sparc/crypto/des_glue.c | 525 +++++++++++++++++++++++++++++++++++++++++++ crypto/Kconfig | 8 + 4 files changed, 974 insertions(+) create mode 100644 arch/sparc/crypto/des_asm.S create mode 100644 arch/sparc/crypto/des_glue.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index c6ca94181f45..dd999c6c8609 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o +obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o @@ -17,5 +18,6 @@ sha512-sparc64-y := sha512_asm.o sha512_glue.o md5-sparc64-y := md5_asm.o md5_glue.o aes-sparc64-y := aes_asm.o aes_glue.o +des-sparc64-y := des_asm.o des_glue.o crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S new file mode 100644 index 000000000000..589481e53d07 --- /dev/null +++ b/arch/sparc/crypto/des_asm.S @@ -0,0 +1,439 @@ +#include +#include + +#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) + +#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) + +#define RS1(x) (FPD_ENCODE(x) << 14) +#define RS2(x) (FPD_ENCODE(x) << 0) +#define RS3(x) (FPD_ENCODE(x) << 9) +#define RD(x) (FPD_ENCODE(x) << 25) +#define IMM5(x) ((x) << 0) + +#define DES_IP(a,b) \ + .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); +#define DES_IIP(a,b) \ + .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); +#define DES_KEXPAND(a,b,c) \ + .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5(b)|RD(c)); +#define DES_ROUND(a,b,c,d) \ + .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); + +#define MOVXTOD_G1_F60 \ + .word 0xbbb02301 +#define MOVXTOD_G1_F62 \ + .word 0xbfb02301 + + .align 32 +ENTRY(des_sparc64_key_expand) + /* %o0=input_key, %o1=output_key */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + DES_KEXPAND(0, 0, 0) + DES_KEXPAND(0, 1, 2) + DES_KEXPAND(2, 3, 6) + DES_KEXPAND(2, 2, 4) + DES_KEXPAND(6, 3, 10) + DES_KEXPAND(6, 2, 8) + DES_KEXPAND(10, 3, 14) + DES_KEXPAND(10, 2, 12) + DES_KEXPAND(14, 1, 16) + DES_KEXPAND(16, 3, 20) + DES_KEXPAND(16, 2, 18) + DES_KEXPAND(20, 3, 24) + DES_KEXPAND(20, 2, 22) + DES_KEXPAND(24, 3, 28) + DES_KEXPAND(24, 2, 26) + DES_KEXPAND(28, 1, 30) + std %f0, [%o1 + 0x00] + std %f2, [%o1 + 0x08] + std %f4, [%o1 + 0x10] + std %f6, [%o1 + 0x18] + std %f8, [%o1 + 0x20] + std %f10, [%o1 + 0x28] + std %f12, [%o1 + 0x30] + std %f14, [%o1 + 0x38] + std %f16, [%o1 + 0x40] + std %f18, [%o1 + 0x48] + std %f20, [%o1 + 0x50] + std %f22, [%o1 + 0x58] + std %f24, [%o1 + 0x60] + std %f26, [%o1 + 0x68] + std %f28, [%o1 + 0x70] + std %f30, [%o1 + 0x78] + retl + VISExitHalf +ENDPROC(des_sparc64_key_expand) + + .align 32 +ENTRY(des_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ldd [%o1 + 0x00], %f32 + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o2 + 0x00] + retl + VISExit +ENDPROC(des_sparc64_crypt) + + .align 32 +ENTRY(des_sparc64_load_keys) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + retl + ldd [%o0 + 0x78], %f30 +ENDPROC(des_sparc64_load_keys) + + .align 32 +ENTRY(des_sparc64_ecb_crypt) + /* %o0=input, %o1=output, %o2=len */ +1: ldd [%o0 + 0x00], %f32 + add %o0, 0x08, %o0 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o1 + 0x00] + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + nop +ENDPROC(des_sparc64_ecb_crypt) + + .align 32 +ENTRY(des_sparc64_cbc_encrypt) + /* %o0=input, %o1=output, %o2=len, %o3=IV */ + ldd [%o3 + 0x00], %f32 +1: ldd [%o0 + 0x00], %f34 + fxor %f32, %f34, %f32 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + std %f32, [%o1 + 0x00] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + std %f32, [%o3 + 0x00] +ENDPROC(des_sparc64_cbc_encrypt) + + .align 32 +ENTRY(des_sparc64_cbc_decrypt) + /* %o0=input, %o1=output, %o2=len, %o3=IV */ + ldd [%o3 + 0x00], %f34 +1: ldd [%o0 + 0x00], %f36 + DES_IP(36, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + fxor %f32, %f34, %f32 + std %f32, [%o1 + 0x00] + add %o0, 0x08, %o0 + subcc %o2, 0x08, %o2 + bne,pt %icc, 1b + add %o1, 0x08, %o1 + retl + std %f36, [%o3 + 0x00] +ENDPROC(des_sparc64_cbc_decrypt) + + .align 32 +ENTRY(des3_ede_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ldd [%o1 + 0x00], %f32 + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + ldd [%o0 + 0x80], %f0 + ldd [%o0 + 0x88], %f2 + DES_ROUND(4, 6, 32, 32) + ldd [%o0 + 0x90], %f4 + ldd [%o0 + 0x98], %f6 + DES_ROUND(8, 10, 32, 32) + ldd [%o0 + 0xa0], %f8 + ldd [%o0 + 0xa8], %f10 + DES_ROUND(12, 14, 32, 32) + ldd [%o0 + 0xb0], %f12 + ldd [%o0 + 0xb8], %f14 + DES_ROUND(16, 18, 32, 32) + ldd [%o0 + 0xc0], %f16 + ldd [%o0 + 0xc8], %f18 + DES_ROUND(20, 22, 32, 32) + ldd [%o0 + 0xd0], %f20 + ldd [%o0 + 0xd8], %f22 + DES_ROUND(24, 26, 32, 32) + ldd [%o0 + 0xe0], %f24 + ldd [%o0 + 0xe8], %f26 + DES_ROUND(28, 30, 32, 32) + ldd [%o0 + 0xf0], %f28 + ldd [%o0 + 0xf8], %f30 + DES_IIP(32, 32) + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + ldd [%o0 + 0x100], %f0 + ldd [%o0 + 0x108], %f2 + DES_ROUND(4, 6, 32, 32) + ldd [%o0 + 0x110], %f4 + ldd [%o0 + 0x118], %f6 + DES_ROUND(8, 10, 32, 32) + ldd [%o0 + 0x120], %f8 + ldd [%o0 + 0x128], %f10 + DES_ROUND(12, 14, 32, 32) + ldd [%o0 + 0x130], %f12 + ldd [%o0 + 0x138], %f14 + DES_ROUND(16, 18, 32, 32) + ldd [%o0 + 0x140], %f16 + ldd [%o0 + 0x148], %f18 + DES_ROUND(20, 22, 32, 32) + ldd [%o0 + 0x150], %f20 + ldd [%o0 + 0x158], %f22 + DES_ROUND(24, 26, 32, 32) + ldd [%o0 + 0x160], %f24 + ldd [%o0 + 0x168], %f26 + DES_ROUND(28, 30, 32, 32) + ldd [%o0 + 0x170], %f28 + ldd [%o0 + 0x178], %f30 + DES_IIP(32, 32) + DES_IP(32, 32) + DES_ROUND(0, 2, 32, 32) + DES_ROUND(4, 6, 32, 32) + DES_ROUND(8, 10, 32, 32) + DES_ROUND(12, 14, 32, 32) + DES_ROUND(16, 18, 32, 32) + DES_ROUND(20, 22, 32, 32) + DES_ROUND(24, 26, 32, 32) + DES_ROUND(28, 30, 32, 32) + DES_IIP(32, 32) + + std %f32, [%o2 + 0x00] + retl + VISExit +ENDPROC(des3_ede_sparc64_crypt) + + .align 32 +ENTRY(des3_ede_sparc64_load_keys) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + ldd [%o0 + 0x30], %f12 + ldd [%o0 + 0x38], %f14 + ldd [%o0 + 0x40], %f16 + ldd [%o0 + 0x48], %f18 + ldd [%o0 + 0x50], %f20 + ldd [%o0 + 0x58], %f22 + ldd [%o0 + 0x60], %f24 + ldd [%o0 + 0x68], %f26 + ldd [%o0 + 0x70], %f28 + ldd [%o0 + 0x78], %f30 + ldd [%o0 + 0x80], %f32 + ldd [%o0 + 0x88], %f34 + ldd [%o0 + 0x90], %f36 + ldd [%o0 + 0x98], %f38 + ldd [%o0 + 0xa0], %f40 + ldd [%o0 + 0xa8], %f42 + ldd [%o0 + 0xb0], %f44 + ldd [%o0 + 0xb8], %f46 + ldd [%o0 + 0xc0], %f48 + ldd [%o0 + 0xc8], %f50 + ldd [%o0 + 0xd0], %f52 + ldd [%o0 + 0xd8], %f54 + ldd [%o0 + 0xe0], %f56 + retl + ldd [%o0 + 0xe8], %f58 +ENDPROC(des3_ede_sparc64_load_keys) + +#define DES3_LOOP_BODY(X) \ + DES_IP(X, X) \ + DES_ROUND(0, 2, X, X) \ + DES_ROUND(4, 6, X, X) \ + DES_ROUND(8, 10, X, X) \ + DES_ROUND(12, 14, X, X) \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0xf0], %f16; \ + ldd [%o0 + 0xf8], %f18; \ + DES_ROUND(20, 22, X, X) \ + ldd [%o0 + 0x100], %f20; \ + ldd [%o0 + 0x108], %f22; \ + DES_ROUND(24, 26, X, X) \ + ldd [%o0 + 0x110], %f24; \ + ldd [%o0 + 0x118], %f26; \ + DES_ROUND(28, 30, X, X) \ + ldd [%o0 + 0x120], %f28; \ + ldd [%o0 + 0x128], %f30; \ + DES_IIP(X, X) \ + DES_IP(X, X) \ + DES_ROUND(32, 34, X, X) \ + ldd [%o0 + 0x130], %f0; \ + ldd [%o0 + 0x138], %f2; \ + DES_ROUND(36, 38, X, X) \ + ldd [%o0 + 0x140], %f4; \ + ldd [%o0 + 0x148], %f6; \ + DES_ROUND(40, 42, X, X) \ + ldd [%o0 + 0x150], %f8; \ + ldd [%o0 + 0x158], %f10; \ + DES_ROUND(44, 46, X, X) \ + ldd [%o0 + 0x160], %f12; \ + ldd [%o0 + 0x168], %f14; \ + DES_ROUND(48, 50, X, X) \ + DES_ROUND(52, 54, X, X) \ + DES_ROUND(56, 58, X, X) \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0x170], %f16; \ + ldd [%o0 + 0x178], %f18; \ + DES_IIP(X, X) \ + DES_IP(X, X) \ + DES_ROUND(20, 22, X, X) \ + ldd [%o0 + 0x50], %f20; \ + ldd [%o0 + 0x58], %f22; \ + DES_ROUND(24, 26, X, X) \ + ldd [%o0 + 0x60], %f24; \ + ldd [%o0 + 0x68], %f26; \ + DES_ROUND(28, 30, X, X) \ + ldd [%o0 + 0x70], %f28; \ + ldd [%o0 + 0x78], %f30; \ + DES_ROUND(0, 2, X, X) \ + ldd [%o0 + 0x00], %f0; \ + ldd [%o0 + 0x08], %f2; \ + DES_ROUND(4, 6, X, X) \ + ldd [%o0 + 0x10], %f4; \ + ldd [%o0 + 0x18], %f6; \ + DES_ROUND(8, 10, X, X) \ + ldd [%o0 + 0x20], %f8; \ + ldd [%o0 + 0x28], %f10; \ + DES_ROUND(12, 14, X, X) \ + ldd [%o0 + 0x30], %f12; \ + ldd [%o0 + 0x38], %f14; \ + DES_ROUND(16, 18, X, X) \ + ldd [%o0 + 0x40], %f16; \ + ldd [%o0 + 0x48], %f18; \ + DES_IIP(X, X) + + .align 32 +ENTRY(des3_ede_sparc64_ecb_crypt) + /* %o0=key, %o1=input, %o2=output, %o3=len */ +1: ldd [%o1 + 0x00], %f60 + DES3_LOOP_BODY(60) + std %f60, [%o2 + 0x00] + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + nop +ENDPROC(des3_ede_sparc64_ecb_crypt) + + .align 32 +ENTRY(des3_ede_sparc64_cbc_encrypt) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f60 +1: ldd [%o1 + 0x00], %f62 + fxor %f60, %f62, %f60 + DES3_LOOP_BODY(60) + std %f60, [%o2 + 0x00] + add %o1, 0x08, %o1 + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + std %f60, [%o4 + 0x00] +ENDPROC(des3_ede_sparc64_cbc_encrypt) + + .align 32 +ENTRY(des3_ede_sparc64_cbc_decrypt) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f62 +1: ldx [%o1 + 0x00], %g1 + MOVXTOD_G1_F60 + DES3_LOOP_BODY(60) + fxor %f62, %f60, %f60 + MOVXTOD_G1_F62 + std %f60, [%o2 + 0x00] + add %o1, 0x08, %o1 + subcc %o3, 0x08, %o3 + bne,pt %icc, 1b + add %o2, 0x08, %o2 + retl + stx %g1, [%o4 + 0x00] +ENDPROC(des3_ede_sparc64_cbc_decrypt) diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c new file mode 100644 index 000000000000..5ec0309e48c0 --- /dev/null +++ b/arch/sparc/crypto/des_glue.c @@ -0,0 +1,525 @@ +/* Glue code for DES encryption optimized for sparc64 crypto opcodes. + * + * Copyright (C) 2012 David S. Miller + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +struct des_sparc64_ctx { + u64 encrypt_expkey[DES_EXPKEY_WORDS / 2]; + u64 decrypt_expkey[DES_EXPKEY_WORDS / 2]; +}; + +struct des3_ede_sparc64_ctx { + u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; + u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; +}; + +static void encrypt_to_decrypt(u64 *d, const u64 *e) +{ + const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1; + int i; + + for (i = 0; i < DES_EXPKEY_WORDS / 2; i++) + *d++ = *s--; +} + +extern void des_sparc64_key_expand(const u32 *input_key, u64 *key); + +static int des_set_key(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + u32 tmp[DES_EXPKEY_WORDS]; + int ret; + + /* Even though we have special instructions for key expansion, + * we call des_ekey() so that we don't have to write our own + * weak key detection code. + */ + ret = des_ekey(tmp, key); + if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]); + encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]); + + return 0; +} + +extern void des_sparc64_crypt(const u64 *key, const u64 *input, + u64 *output); + +static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->encrypt_expkey; + + des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->decrypt_expkey; + + des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +extern void des_sparc64_load_keys(const u64 *key); + +extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output, + unsigned int len); + +#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1)) + +static int __ecb_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + des_sparc64_load_keys(&ctx->encrypt_expkey[0]); + else + des_sparc64_load_keys(&ctx->decrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, false); +} + +extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output, + unsigned int len, u64 *iv); + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + des_sparc64_load_keys(&ctx->encrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output, + unsigned int len, u64 *iv); + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + des_sparc64_load_keys(&ctx->decrypt_expkey[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); + const u32 *K = (const u32 *)key; + u32 *flags = &tfm->crt_flags; + u64 k1[DES_EXPKEY_WORDS / 2]; + u64 k2[DES_EXPKEY_WORDS / 2]; + u64 k3[DES_EXPKEY_WORDS / 2]; + + if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || + !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && + (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + des_sparc64_key_expand((const u32 *)key, k1); + key += DES_KEY_SIZE; + des_sparc64_key_expand((const u32 *)key, k2); + key += DES_KEY_SIZE; + des_sparc64_key_expand((const u32 *)key, k3); + + memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1)); + encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]); + memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], + &k3[0], sizeof(k3)); + + encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]); + memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2], + &k2[0], sizeof(k2)); + encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], + &k1[0]); + + return 0; +} + +extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input, + u64 *output); + +static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->encrypt_expkey; + + des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u64 *K = ctx->decrypt_expkey; + + des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); +} + +extern void des3_ede_sparc64_load_keys(const u64 *key); + +extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len); + +static int __ecb3_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + K = &ctx->encrypt_expkey[0]; + else + K = &ctx->decrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_ecb_crypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb3_crypt(desc, dst, src, nbytes, true); +} + +static int ecb3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb3_crypt(desc, dst, src, nbytes, false); +} + +extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +static int cbc3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + K = &ctx->encrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_cbc_encrypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len, + (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +static int cbc3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + const u64 *K; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + K = &ctx->decrypt_expkey[0]; + des3_ede_sparc64_load_keys(K); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & DES_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64 = (const u64 *)walk.src.virt.addr; + des3_ede_sparc64_cbc_decrypt(K, src64, + (u64 *) walk.dst.virt.addr, + block_len, + (u64 *) walk.iv); + } + nbytes &= DES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "des", + .cra_driver_name = "des-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES_KEY_SIZE, + .cia_max_keysize = DES_KEY_SIZE, + .cia_setkey = des_set_key, + .cia_encrypt = des_encrypt, + .cia_decrypt = des_decrypt + } + } +}, { + .cra_name = "ecb(des)", + .cra_driver_name = "ecb-des-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(des)", + .cra_driver_name = "cbc-des-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_EDE_KEY_SIZE, + .cia_max_keysize = DES3_EDE_KEY_SIZE, + .cia_setkey = des3_ede_set_key, + .cia_encrypt = des3_ede_encrypt, + .cia_decrypt = des3_ede_decrypt + } + } +}, { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3_ede-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_set_key, + .encrypt = ecb3_encrypt, + .decrypt = ecb3_decrypt, + }, + }, +}, { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3_ede-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_set_key, + .encrypt = cbc3_encrypt, + .decrypt = cbc3_decrypt, + }, + }, +} }; + +static bool __init sparc64_has_des_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_DES)) + return false; + + return true; +} + +static int __init des_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_des_opcode()) { + pr_info("Using sparc64 des opcodes optimized DES implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 des opcodes not available.\n"); + return -ENODEV; +} + +static void __exit des_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(des_sparc64_mod_init); +module_exit(des_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); + +MODULE_ALIAS("des"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 83993ea84cce..469fc18bb037 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -778,6 +778,14 @@ config CRYPTO_DES help DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). +config CRYPTO_DES_SPARC64 + tristate "DES and Triple DES EDE cipher algorithms (SPARC64)" + select CRYPTO_ALGAPI + select CRYPTO_DES + help + DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3), + optimized using SPARC64 crypto opcodes. + config CRYPTO_FCRYPT tristate "FCrypt cipher algorithm" select CRYPTO_ALGAPI -- cgit v1.2.3 From 81658ad0d92306ceb271994b90cd49ffde10eeda Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 28 Aug 2012 12:05:54 -0700 Subject: sparc64: Add CAMELLIA driver making use of the new camellia opcodes. Signed-off-by: David S. Miller --- arch/sparc/crypto/Makefile | 2 + arch/sparc/crypto/camellia_asm.S | 583 ++++++++++++++++++++++++++++++++++++++ arch/sparc/crypto/camellia_glue.c | 318 +++++++++++++++++++++ crypto/Kconfig | 16 ++ 4 files changed, 919 insertions(+) create mode 100644 arch/sparc/crypto/camellia_asm.S create mode 100644 arch/sparc/crypto/camellia_glue.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index dd999c6c8609..5d469d81761f 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o +obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o @@ -19,5 +20,6 @@ md5-sparc64-y := md5_asm.o md5_glue.o aes-sparc64-y := aes_asm.o aes_glue.o des-sparc64-y := des_asm.o des_glue.o +camellia-sparc64-y := camellia_asm.o camellia_glue.o crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S new file mode 100644 index 000000000000..b0ddb5bcfe5f --- /dev/null +++ b/arch/sparc/crypto/camellia_asm.S @@ -0,0 +1,583 @@ +#include +#include + +#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) + +#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) + +#define RS1(x) (FPD_ENCODE(x) << 14) +#define RS2(x) (FPD_ENCODE(x) << 0) +#define RS3(x) (FPD_ENCODE(x) << 9) +#define RD(x) (FPD_ENCODE(x) << 25) +#define IMM5(x) ((x) << 0) + +#define CAMELLIA_F(a,b,c,d) \ + .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define CAMELLIA_FL(a,b,c) \ + .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); +#define CAMELLIA_FLI(a,b,c) \ + .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); + +#define MOVDTOX_F0_O4 \ + .word 0x99b02200 +#define MOVDTOX_F2_O5 \ + .word 0x9bb02202 + +#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ + CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \ + CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \ + CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \ + CAMELLIA_F(KEY_BASE + 10, I0, I1, I0) + +#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \ + CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ + CAMELLIA_FL(KEY_BASE + 12, I0, I0) \ + CAMELLIA_FLI(KEY_BASE + 14, I1, I1) + + .data + + .align 8 +SIGMA: .xword 0xA09E667F3BCC908B + .xword 0xB67AE8584CAA73B2 + .xword 0xC6EF372FE94F82BE + .xword 0x54FF53A5F1D36F1C + .xword 0x10E527FADE682D1D + .xword 0xB05688C2B3E6C1FD + + .text + + .align 32 +ENTRY(camellia_sparc64_key_expand) + /* %o0=in_key, %o1=out_key, %o2=key_len */ + VISEntry + ld [%o0 + 0x00], %f0 ! i0, k[0] + ld [%o0 + 0x04], %f1 ! i1, k[1] + ld [%o0 + 0x08], %f2 ! i2, k[2] + ld [%o0 + 0x0c], %f3 ! i3, k[3] + std %f0, [%o1 + 0x00] ! k[0, 1] + fsrc2 %f0, %f28 + std %f2, [%o1 + 0x08] ! k[2, 3] + cmp %o2, 16 + be 10f + fsrc2 %f2, %f30 + + ld [%o0 + 0x10], %f0 + ld [%o0 + 0x14], %f1 + std %f0, [%o1 + 0x20] ! k[8, 9] + cmp %o2, 24 + fone %f10 + be,a 1f + fxor %f10, %f0, %f2 + ld [%o0 + 0x18], %f2 + ld [%o0 + 0x1c], %f3 +1: + std %f2, [%o1 + 0x28] ! k[10, 11] + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + +10: + sethi %hi(SIGMA), %g3 + or %g3, %lo(SIGMA), %g3 + ldd [%g3 + 0x00], %f16 + ldd [%g3 + 0x08], %f18 + ldd [%g3 + 0x10], %f20 + ldd [%g3 + 0x18], %f22 + ldd [%g3 + 0x20], %f24 + ldd [%g3 + 0x28], %f26 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + CAMELLIA_F(20, 2, 0, 2) + CAMELLIA_F(22, 0, 2, 0) + +#define ROTL128(S01, S23, TMP1, TMP2, N) \ + srlx S01, (64 - N), TMP1; \ + sllx S01, N, S01; \ + srlx S23, (64 - N), TMP2; \ + sllx S23, N, S23; \ + or S01, TMP2, S01; \ + or S23, TMP1, S23 + + cmp %o2, 16 + bne 1f + nop + /* 128-bit key */ + std %f0, [%o1 + 0x10] ! k[ 4, 5] + std %f2, [%o1 + 0x18] ! k[ 6, 7] + MOVDTOX_F0_O4 + MOVDTOX_F2_O5 + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x30] ! k[12, 13] + stx %o5, [%o1 + 0x38] ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x40] ! k[16, 17] + stx %o5, [%o1 + 0x48] ! k[18, 19] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x60] ! k[24, 25] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x70] ! k[28, 29] + stx %o5, [%o1 + 0x78] ! k[30, 31] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xa0] ! k[40, 41] + stx %o5, [%o1 + 0xa8] ! k[42, 43] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xc0] ! k[48, 49] + stx %o5, [%o1 + 0xc8] ! k[50, 51] + + ldx [%o1 + 0x00], %o4 ! k[ 0, 1] + ldx [%o1 + 0x08], %o5 ! k[ 2, 3] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x20] ! k[ 8, 9] + stx %o5, [%o1 + 0x28] ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x50] ! k[20, 21] + stx %o5, [%o1 + 0x58] ! k[22, 23] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o5, [%o1 + 0x68] ! k[26, 27] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0x80] ! k[32, 33] + stx %o5, [%o1 + 0x88] ! k[34, 35] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0x90] ! k[36, 37] + stx %o5, [%o1 + 0x98] ! k[38, 39] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xb0] ! k[44, 45] + stx %o5, [%o1 + 0xb8] ! k[46, 47] + + ba,pt %xcc, 2f + mov (3 * 16 * 4), %o0 + +1: + /* 192-bit or 256-bit key */ + std %f0, [%o1 + 0x30] ! k[12, 13] + std %f2, [%o1 + 0x38] ! k[14, 15] + ldd [%o1 + 0x20], %f4 ! k[ 8, 9] + ldd [%o1 + 0x28], %f6 ! k[10, 11] + fxor %f0, %f4, %f0 + fxor %f2, %f6, %f2 + CAMELLIA_F(24, 2, 0, 2) + CAMELLIA_F(26, 0, 2, 0) + std %f0, [%o1 + 0x10] ! k[ 4, 5] + std %f2, [%o1 + 0x18] ! k[ 6, 7] + MOVDTOX_F0_O4 + MOVDTOX_F2_O5 + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x50] ! k[20, 21] + stx %o5, [%o1 + 0x58] ! k[22, 23] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0xa0] ! k[40, 41] + stx %o5, [%o1 + 0xa8] ! k[42, 43] + ROTL128(%o4, %o5, %g2, %g3, 51) + stx %o4, [%o1 + 0x100] ! k[64, 65] + stx %o5, [%o1 + 0x108] ! k[66, 67] + ldx [%o1 + 0x20], %o4 ! k[ 8, 9] + ldx [%o1 + 0x28], %o5 ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x20] ! k[ 8, 9] + stx %o5, [%o1 + 0x28] ! k[10, 11] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x40] ! k[16, 17] + stx %o5, [%o1 + 0x48] ! k[18, 19] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x90] ! k[36, 37] + stx %o5, [%o1 + 0x98] ! k[38, 39] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xd0] ! k[52, 53] + stx %o5, [%o1 + 0xd8] ! k[54, 55] + ldx [%o1 + 0x30], %o4 ! k[12, 13] + ldx [%o1 + 0x38], %o5 ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x30] ! k[12, 13] + stx %o5, [%o1 + 0x38] ! k[14, 15] + ROTL128(%o4, %o5, %g2, %g3, 30) + stx %o4, [%o1 + 0x70] ! k[28, 29] + stx %o5, [%o1 + 0x78] ! k[30, 31] + srlx %o4, 32, %g2 + srlx %o5, 32, %g3 + stw %o4, [%o1 + 0xc0] ! k[48] + stw %g3, [%o1 + 0xc4] ! k[49] + stw %o5, [%o1 + 0xc8] ! k[50] + stw %g2, [%o1 + 0xcc] ! k[51] + ROTL128(%o4, %o5, %g2, %g3, 49) + stx %o4, [%o1 + 0xe0] ! k[56, 57] + stx %o5, [%o1 + 0xe8] ! k[58, 59] + ldx [%o1 + 0x00], %o4 ! k[ 0, 1] + ldx [%o1 + 0x08], %o5 ! k[ 2, 3] + ROTL128(%o4, %o5, %g2, %g3, 45) + stx %o4, [%o1 + 0x60] ! k[24, 25] + stx %o5, [%o1 + 0x68] ! k[26, 27] + ROTL128(%o4, %o5, %g2, %g3, 15) + stx %o4, [%o1 + 0x80] ! k[32, 33] + stx %o5, [%o1 + 0x88] ! k[34, 35] + ROTL128(%o4, %o5, %g2, %g3, 17) + stx %o4, [%o1 + 0xb0] ! k[44, 45] + stx %o5, [%o1 + 0xb8] ! k[46, 47] + ROTL128(%o4, %o5, %g2, %g3, 34) + stx %o4, [%o1 + 0xf0] ! k[60, 61] + stx %o5, [%o1 + 0xf8] ! k[62, 63] + mov (4 * 16 * 4), %o0 +2: + add %o1, %o0, %o1 + ldd [%o1 + 0x00], %f0 + ldd [%o1 + 0x08], %f2 + std %f0, [%o3 + 0x00] + std %f2, [%o3 + 0x08] + add %o3, 0x10, %o3 +1: + sub %o1, (16 * 4), %o1 + ldd [%o1 + 0x38], %f0 + ldd [%o1 + 0x30], %f2 + ldd [%o1 + 0x28], %f4 + ldd [%o1 + 0x20], %f6 + ldd [%o1 + 0x18], %f8 + ldd [%o1 + 0x10], %f10 + std %f0, [%o3 + 0x00] + std %f2, [%o3 + 0x08] + std %f4, [%o3 + 0x10] + std %f6, [%o3 + 0x18] + std %f8, [%o3 + 0x20] + std %f10, [%o3 + 0x28] + + ldd [%o1 + 0x08], %f0 + ldd [%o1 + 0x00], %f2 + std %f0, [%o3 + 0x30] + std %f2, [%o3 + 0x38] + subcc %o0, (16 * 4), %o0 + bne,pt %icc, 1b + add %o3, (16 * 4), %o3 + + std %f2, [%o3 - 0x10] + std %f0, [%o3 - 0x08] + + retl + VISExit +ENDPROC(camellia_sparc64_key_expand) + + .align 32 +ENTRY(camellia_sparc64_crypt) + /* %o0=key, %o1=input, %o2=output, %o3=key_len */ + VISEntry + + ld [%o1 + 0x00], %f0 + ld [%o1 + 0x04], %f1 + ld [%o1 + 0x08], %f2 + ld [%o1 + 0x0c], %f3 + + ldd [%o0 + 0x00], %f4 + ldd [%o0 + 0x08], %f6 + + cmp %o3, 16 + fxor %f4, %f0, %f0 + be 1f + fxor %f6, %f2, %f2 + + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + add %o0, 0x40, %o0 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + +1: + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + ldd [%o0 + 0xc8], %f54 + + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + + st %f2, [%o2 + 0x00] + st %f3, [%o2 + 0x04] + st %f0, [%o2 + 0x08] + st %f1, [%o2 + 0x0c] + + retl + VISExit +ENDPROC(camellia_sparc64_crypt) + + .align 32 +ENTRY(camellia_sparc64_load_keys) + /* %o0=key, %o1=key_len */ + VISEntry + ldd [%o0 + 0x00], %f4 + ldd [%o0 + 0x08], %f6 + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + retl + ldd [%o0 + 0xc8], %f54 +ENDPROC(camellia_sparc64_load_keys) + + .align 32 +ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key */ +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + retl + nop +ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key */ +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f2 + fxor %f22, %f0, %f0 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + retl + nop +ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f60, %f0, %f0 + fxor %f62, %f2, %f2 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f60 + fxor %f54, %f0, %f62 + std %f60, [%o1 + 0x00] + std %f62, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f0 + ldd [%o0 + 0x08], %f2 + add %o0, 0x10, %o0 + fxor %f60, %f0, %f0 + fxor %f62, %f2, %f2 + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f60 + fxor %f22, %f0, %f62 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + std %f60, [%o1 + 0x00] + std %f62, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f56 + ldd [%o0 + 0x08], %f58 + add %o0, 0x10, %o0 + fxor %f4, %f56, %f0 + fxor %f6, %f58, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS(40, 0, 2) + fxor %f52, %f2, %f2 + fxor %f54, %f0, %f0 + fxor %f60, %f2, %f2 + fxor %f62, %f0, %f0 + fsrc2 %f56, %f60 + fsrc2 %f58, %f62 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds) + + .align 32 +ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds) + /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ + ldd [%o4 + 0x00], %f60 + ldd [%o4 + 0x08], %f62 +1: ldd [%o0 + 0x00], %f56 + ldd [%o0 + 0x08], %f58 + add %o0, 0x10, %o0 + fxor %f4, %f56, %f0 + fxor %f6, %f58, %f2 + CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) + ldd [%o3 + 0xd0], %f8 + ldd [%o3 + 0xd8], %f10 + ldd [%o3 + 0xe0], %f12 + ldd [%o3 + 0xe8], %f14 + ldd [%o3 + 0xf0], %f16 + ldd [%o3 + 0xf8], %f18 + ldd [%o3 + 0x100], %f20 + ldd [%o3 + 0x108], %f22 + CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) + CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) + CAMELLIA_F(8, 2, 0, 2) + CAMELLIA_F(10, 0, 2, 0) + ldd [%o3 + 0x10], %f8 + ldd [%o3 + 0x18], %f10 + CAMELLIA_F(12, 2, 0, 2) + CAMELLIA_F(14, 0, 2, 0) + ldd [%o3 + 0x20], %f12 + ldd [%o3 + 0x28], %f14 + CAMELLIA_F(16, 2, 0, 2) + CAMELLIA_F(18, 0, 2, 0) + ldd [%o3 + 0x30], %f16 + ldd [%o3 + 0x38], %f18 + fxor %f20, %f2, %f2 + fxor %f22, %f0, %f0 + ldd [%o3 + 0x40], %f20 + ldd [%o3 + 0x48], %f22 + fxor %f60, %f2, %f2 + fxor %f62, %f0, %f0 + fsrc2 %f56, %f60 + fsrc2 %f58, %f62 + std %f2, [%o1 + 0x00] + std %f0, [%o1 + 0x08] + subcc %o2, 0x10, %o2 + bne,pt %icc, 1b + add %o1, 0x10, %o1 + std %f60, [%o4 + 0x00] + retl + std %f62, [%o4 + 0x08] +ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds) diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c new file mode 100644 index 000000000000..c258cc550a6b --- /dev/null +++ b/arch/sparc/crypto/camellia_glue.c @@ -0,0 +1,318 @@ +/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes. + * + * Copyright (C) 2012 David S. Miller + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CAMELLIA_MIN_KEY_SIZE 16 +#define CAMELLIA_MAX_KEY_SIZE 32 +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 + +struct camellia_sparc64_ctx { + u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + int key_len; +}; + +extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key, + unsigned int key_len, u64 *decrypt_key); + +static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key, + unsigned int key_len) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + const u32 *in_key = (const u32 *) _in_key; + u32 *flags = &tfm->crt_flags; + + if (key_len != 16 && key_len != 24 && key_len != 32) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + ctx->key_len = key_len; + + camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0], + key_len, &ctx->decrypt_key[0]); + return 0; +} + +extern void camellia_sparc64_crypt(const u64 *key, const u32 *input, + u32 *output, unsigned int key_len); + +static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + + camellia_sparc64_crypt(&ctx->encrypt_key[0], + (const u32 *) src, + (u32 *) dst, ctx->key_len); +} + +static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); + + camellia_sparc64_crypt(&ctx->decrypt_key[0], + (const u32 *) src, + (u32 *) dst, ctx->key_len); +} + +extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len); + +typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len, + const u64 *key); + +extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds; +extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds; + +#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1)) + +static int __ecb_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes, bool encrypt) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + ecb_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_ecb_crypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_ecb_crypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + if (encrypt) + key = &ctx->encrypt_key[0]; + else + key = &ctx->decrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + return __ecb_crypt(desc, dst, src, nbytes, false); +} + +typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len, + const u64 *key, u64 *iv); + +extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds; +extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds; + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + cbc_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_cbc_encrypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_cbc_encrypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + key = &ctx->encrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key, + (u64 *) walk.iv); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + cbc_crypt_op *op; + const u64 *key; + int err; + + op = camellia_sparc64_cbc_decrypt_3_grand_rounds; + if (ctx->key_len != 16) + op = camellia_sparc64_cbc_decrypt_4_grand_rounds; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + key = &ctx->decrypt_key[0]; + camellia_sparc64_load_keys(key, ctx->key_len); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; + + if (likely(block_len)) { + const u64 *src64; + u64 *dst64; + + src64 = (const u64 *)walk.src.virt.addr; + dst64 = (u64 *) walk.dst.virt.addr; + op(src64, dst64, block_len, key, + (u64 *) walk.iv); + } + nbytes &= CAMELLIA_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + +static struct crypto_alg algs[] = { { + .cra_name = "camellia", + .cra_driver_name = "camellia-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_u = { + .cipher = { + .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, + .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, + .cia_setkey = camellia_set_key, + .cia_encrypt = camellia_encrypt, + .cia_decrypt = camellia_decrypt + } + } +}, { + .cra_name = "ecb(camellia)", + .cra_driver_name = "ecb-camellia-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(camellia)", + .cra_driver_name = "cbc-camellia-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +} +}; + +static bool __init sparc64_has_camellia_opcode(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_CAMELLIA)) + return false; + + return true; +} + +static int __init camellia_sparc64_mod_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs); i++) + INIT_LIST_HEAD(&algs[i].cra_list); + + if (sparc64_has_camellia_opcode()) { + pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); + return crypto_register_algs(algs, ARRAY_SIZE(algs)); + } + pr_info("sparc64 camellia opcodes not available.\n"); + return -ENODEV; +} + +static void __exit camellia_sparc64_mod_fini(void) +{ + crypto_unregister_algs(algs, ARRAY_SIZE(algs)); +} + +module_init(camellia_sparc64_mod_init); +module_exit(camellia_sparc64_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); + +MODULE_ALIAS("aes"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 469fc18bb037..94f232f96d03 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -758,6 +758,22 @@ config CRYPTO_CAMELLIA_X86_64 See also: +config CRYPTO_CAMELLIA_SPARC64 + tristate "Camellia cipher algorithm (SPARC64)" + depends on SPARC64 + depends on CRYPTO + select CRYPTO_ALGAPI + help + Camellia cipher algorithm module (SPARC64). + + Camellia is a symmetric key block cipher developed jointly + at NTT and Mitsubishi Electric Corporation. + + The Camellia specifies three key sizes: 128, 192 and 256 bits. + + See also: + + config CRYPTO_CAST5 tristate "CAST5 (CAST-128) cipher algorithm" select CRYPTO_ALGAPI -- cgit v1.2.3 From 45dfe237a865368929534ec75fe5f26c151c88d9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 28 Aug 2012 20:55:19 -0700 Subject: sparc64: Use fsrc2 instead of fsrc1 in sparc64 hash crypto drivers. On SPARC-T4 fsrc2 has 1 cycle of latency, whereas fsrc1 has 11 cycles. True story. Signed-off-by: David S. Miller --- arch/sparc/crypto/md5_asm.S | 2 +- arch/sparc/crypto/sha1_asm.S | 2 +- arch/sparc/crypto/sha256_asm.S | 2 +- arch/sparc/crypto/sha512_asm.S | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S index 220b73baa551..ff90903180eb 100644 --- a/arch/sparc/crypto/md5_asm.S +++ b/arch/sparc/crypto/md5_asm.S @@ -62,7 +62,7 @@ ENTRY(md5_sparc64_transform) .word 0x81b02800 subcc %o2, 1, %o2 - fsrc1 %f26, %f10 + fsrc2 %f26, %f10 bne,pt %xcc, 1b add %o1, 0x40, %o1 diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S index d2147eb054c1..3cd218ac631b 100644 --- a/arch/sparc/crypto/sha1_asm.S +++ b/arch/sparc/crypto/sha1_asm.S @@ -64,7 +64,7 @@ ENTRY(sha1_sparc64_transform) .word 0x81b02820 subcc %o2, 1, %o2 - fsrc1 %f26, %f10 + fsrc2 %f26, %f10 bne,pt %xcc, 1b add %o1, 0x40, %o1 diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S index 771ce265f64a..9da84f5b61f5 100644 --- a/arch/sparc/crypto/sha256_asm.S +++ b/arch/sparc/crypto/sha256_asm.S @@ -70,7 +70,7 @@ ENTRY(sha256_sparc64_transform) .word 0x81b02840 subcc %o2, 1, %o2 - fsrc1 %f26, %f10 + fsrc2 %f26, %f10 bne,pt %xcc, 1b add %o1, 0x40, %o1 diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S index 04244da50774..650ba5742f0e 100644 --- a/arch/sparc/crypto/sha512_asm.S +++ b/arch/sparc/crypto/sha512_asm.S @@ -94,7 +94,7 @@ ENTRY(sha512_sparc64_transform) .word 0x81b02860 subcc %o2, 1, %o2 - fsrc1 %f50, %f18 + fsrc2 %f50, %f18 bne,pt %xcc, 1b add %o1, 0x80, %o1 -- cgit v1.2.3 From 0bdcaf7495726688a93a2f7226e9b4beaeabd2ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 29 Aug 2012 12:50:16 -0700 Subject: sparc64: Move AES driver over to a methods based implementation. Instead of testing and branching off of the key size on every encrypt/decrypt call, use method ops assigned at key set time. Reverse the order of float registers used for decryption to make future changes easier. Align all assembler routines on a 32-byte boundary. Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_asm.S | 879 +++++++++++++++++++++++++++++-------------- arch/sparc/crypto/aes_glue.c | 186 ++++++--- 2 files changed, 728 insertions(+), 337 deletions(-) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index f656dc7a173e..50faae03c592 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S @@ -44,8 +44,8 @@ .word 0x85b02307; #define MOVXTOD_O0_F0 \ .word 0x81b02308; -#define MOVXTOD_O1_F2 \ - .word 0x85b02309; +#define MOVXTOD_O5_F2 \ + .word 0x85b0230d; #define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ @@ -86,45 +86,46 @@ ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) -#define DECRYPT_TWO_ROUNDS(KEY_TOP, I0, I1, T0, T1) \ - AES_DROUND23(KEY_TOP - 2, I0, I1, T1) \ - AES_DROUND01(KEY_TOP - 4, I0, I1, T0) \ - AES_DROUND23(KEY_TOP - 6, T0, T1, I1) \ - AES_DROUND01(KEY_TOP - 8, T0, T1, I0) +#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01(KEY_BASE + 6, T0, T1, I0) -#define DECRYPT_TWO_ROUNDS_LAST(KEY_TOP, I0, I1, T0, T1) \ - AES_DROUND23(KEY_TOP - 2, I0, I1, T1) \ - AES_DROUND01(KEY_TOP - 4, I0, I1, T0) \ - AES_DROUND23_L(KEY_TOP - 6, T0, T1, I1) \ - AES_DROUND01_L(KEY_TOP - 8, T0, T1, I0) +#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) /* 10 rounds */ -#define DECRYPT_128(KEY_TOP, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 0, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 8, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 32, I0, I1, T0, T1) +#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) /* 12 rounds */ -#define DECRYPT_192(KEY_TOP, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 0, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 8, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 32, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 40, I0, I1, T0, T1) +#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) /* 14 rounds */ -#define DECRYPT_256(KEY_TOP, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 0, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 8, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 16, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 24, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 32, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS(KEY_TOP - 40, I0, I1, T0, T1) \ - DECRYPT_TWO_ROUNDS_LAST(KEY_TOP - 48, I0, I1, T0, T1) - +#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ + DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) + + .align 32 ENTRY(aes_sparc64_key_expand) /* %o0=input_key, %o1=output_key, %o2=key_len */ VISEntry @@ -314,34 +315,63 @@ ENTRY(aes_sparc64_key_expand) VISExit ENDPROC(aes_sparc64_key_expand) -ENTRY(aes_sparc64_encrypt) - /* %o0=key, %o1=input, %o2=output, %o3=key_len */ + .align 32 +ENTRY(aes_sparc64_encrypt_128) + /* %o0=key, %o1=input, %o2=output */ VISEntry ld [%o1 + 0x00], %f4 ld [%o1 + 0x04], %f5 ld [%o1 + 0x08], %f6 ld [%o1 + 0x0c], %f7 - ldd [%o0 + 0x00], %f8 ldd [%o0 + 0x08], %f10 - cmp %o3, 24 + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 fxor %f8, %f4, %f4 - bl 2f - fxor %f10, %f6, %f6 + fxor %f10, %f6, %f6 + ENCRYPT_128(12, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_encrypt_128) - be 1f - ldd [%o0 + 0x10], %f8 + .align 32 +ENTRY(aes_sparc64_encrypt_192) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 - ldd [%o0 + 0x18], %f10 - ldd [%o0 + 0x20], %f12 - ldd [%o0 + 0x28], %f14 - add %o0, 0x20, %o0 + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 - ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 ldd [%o0 + 0x10], %f8 - -1: ldd [%o0 + 0x18], %f10 ldd [%o0 + 0x20], %f12 ldd [%o0 + 0x28], %f14 @@ -349,7 +379,6 @@ ENTRY(aes_sparc64_encrypt) ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) -2: ldd [%o0 + 0x10], %f12 ldd [%o0 + 0x18], %f14 ldd [%o0 + 0x20], %f16 @@ -381,66 +410,63 @@ ENTRY(aes_sparc64_encrypt) retl VISExit -ENDPROC(aes_sparc64_encrypt) +ENDPROC(aes_sparc64_encrypt_192) -ENTRY(aes_sparc64_decrypt) - /* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=exp_key_len */ + .align 32 +ENTRY(aes_sparc64_encrypt_256) + /* %o0=key, %o1=input, %o2=output */ VISEntry ld [%o1 + 0x00], %f4 - add %o0, %o4, %o0 ld [%o1 + 0x04], %f5 ld [%o1 + 0x08], %f6 ld [%o1 + 0x0c], %f7 - - ldd [%o0 - 0x08], %f8 - ldd [%o0 - 0x10], %f10 - cmp %o3, 24 - fxor %f10, %f4, %f4 - bl 2f - fxor %f8, %f6, %f6 + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 - be 1f - ldd [%o0 - 0x30], %f8 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 - ldd [%o0 - 0x28], %f10 - ldd [%o0 - 0x20], %f12 - ldd [%o0 - 0x18], %f14 - sub %o0, 0x20, %o0 + ldd [%o0 + 0x10], %f8 - DECRYPT_TWO_ROUNDS(16, 4, 6, 0, 2) + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 - ldd [%o0 - 0x30], %f8 -1: - ldd [%o0 - 0x28], %f10 - ldd [%o0 - 0x20], %f12 - ldd [%o0 - 0x18], %f14 - sub %o0, 0x20, %o0 + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) - DECRYPT_TWO_ROUNDS(16, 4, 6, 0, 2) -2: - ldd [%o0 - 0xb0], %f12 - ldd [%o0 - 0xa8], %f14 - ldd [%o0 - 0xa0], %f16 - ldd [%o0 - 0x98], %f18 - ldd [%o0 - 0x90], %f20 - ldd [%o0 - 0x88], %f22 - ldd [%o0 - 0x80], %f24 - ldd [%o0 - 0x78], %f26 - ldd [%o0 - 0x70], %f28 - ldd [%o0 - 0x68], %f30 - ldd [%o0 - 0x60], %f32 - ldd [%o0 - 0x58], %f34 - ldd [%o0 - 0x50], %f36 - ldd [%o0 - 0x48], %f38 - ldd [%o0 - 0x40], %f40 - ldd [%o0 - 0x38], %f42 - ldd [%o0 - 0x30], %f44 - ldd [%o0 - 0x28], %f46 - ldd [%o0 - 0x20], %f48 - ldd [%o0 - 0x18], %f50 - - DECRYPT_128(52, 4, 6, 0, 2) + ldd [%o0 + 0x10], %f8 + + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + add %o0, 0x20, %o0 + + ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) + + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + ldd [%o0 + 0x40], %f24 + ldd [%o0 + 0x48], %f26 + ldd [%o0 + 0x50], %f28 + ldd [%o0 + 0x58], %f30 + ldd [%o0 + 0x60], %f32 + ldd [%o0 + 0x68], %f34 + ldd [%o0 + 0x70], %f36 + ldd [%o0 + 0x78], %f38 + ldd [%o0 + 0x80], %f40 + ldd [%o0 + 0x88], %f42 + ldd [%o0 + 0x90], %f44 + ldd [%o0 + 0x98], %f46 + ldd [%o0 + 0xa0], %f48 + ldd [%o0 + 0xa8], %f50 + + ENCRYPT_128(12, 4, 6, 0, 2) st %f4, [%o2 + 0x00] st %f5, [%o2 + 0x04] @@ -449,15 +475,231 @@ ENTRY(aes_sparc64_decrypt) retl VISExit -ENDPROC(aes_sparc64_decrypt) +ENDPROC(aes_sparc64_encrypt_256) -ENTRY(aes_sparc64_load_decrypt_keys) + .align 32 +ENTRY(aes_sparc64_decrypt_128) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xa0], %f8 + ldd [%o0 + 0xa8], %f10 + ldd [%o0 + 0x98], %f12 + ldd [%o0 + 0x90], %f14 + ldd [%o0 + 0x88], %f16 + ldd [%o0 + 0x80], %f18 + ldd [%o0 + 0x78], %f20 + ldd [%o0 + 0x70], %f22 + ldd [%o0 + 0x68], %f24 + ldd [%o0 + 0x60], %f26 + ldd [%o0 + 0x58], %f28 + ldd [%o0 + 0x50], %f30 + ldd [%o0 + 0x48], %f32 + ldd [%o0 + 0x40], %f34 + ldd [%o0 + 0x38], %f36 + ldd [%o0 + 0x30], %f38 + ldd [%o0 + 0x28], %f40 + ldd [%o0 + 0x20], %f42 + ldd [%o0 + 0x18], %f44 + ldd [%o0 + 0x10], %f46 + ldd [%o0 + 0x08], %f48 + ldd [%o0 + 0x00], %f50 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + DECRYPT_128(12, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_128) + + .align 32 +ENTRY(aes_sparc64_decrypt_192) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xc0], %f8 + ldd [%o0 + 0xc8], %f10 + ldd [%o0 + 0xb8], %f12 + ldd [%o0 + 0xb0], %f14 + ldd [%o0 + 0xa8], %f16 + ldd [%o0 + 0xa0], %f18 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + ldd [%o0 + 0x98], %f20 + ldd [%o0 + 0x90], %f22 + ldd [%o0 + 0x88], %f24 + ldd [%o0 + 0x80], %f26 + DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2) + ldd [%o0 + 0x78], %f28 + ldd [%o0 + 0x70], %f30 + ldd [%o0 + 0x68], %f32 + ldd [%o0 + 0x60], %f34 + ldd [%o0 + 0x58], %f36 + ldd [%o0 + 0x50], %f38 + ldd [%o0 + 0x48], %f40 + ldd [%o0 + 0x40], %f42 + ldd [%o0 + 0x38], %f44 + ldd [%o0 + 0x30], %f46 + ldd [%o0 + 0x28], %f48 + ldd [%o0 + 0x20], %f50 + ldd [%o0 + 0x18], %f52 + ldd [%o0 + 0x10], %f54 + ldd [%o0 + 0x08], %f56 + ldd [%o0 + 0x00], %f58 + DECRYPT_128(20, 4, 6, 0, 2) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_192) + + .align 32 +ENTRY(aes_sparc64_decrypt_256) + /* %o0=key, %o1=input, %o2=output */ + VISEntry + ld [%o1 + 0x00], %f4 + ld [%o1 + 0x04], %f5 + ld [%o1 + 0x08], %f6 + ld [%o1 + 0x0c], %f7 + ldd [%o0 + 0xe0], %f8 + ldd [%o0 + 0xe8], %f10 + ldd [%o0 + 0xd8], %f12 + ldd [%o0 + 0xd0], %f14 + ldd [%o0 + 0xc8], %f16 + fxor %f8, %f4, %f4 + ldd [%o0 + 0xc0], %f18 + fxor %f10, %f6, %f6 + ldd [%o0 + 0xb8], %f20 + AES_DROUND23(12, 4, 6, 2) + ldd [%o0 + 0xb0], %f22 + AES_DROUND01(14, 4, 6, 0) + ldd [%o0 + 0xa8], %f24 + AES_DROUND23(16, 0, 2, 6) + ldd [%o0 + 0xa0], %f26 + AES_DROUND01(18, 0, 2, 4) + ldd [%o0 + 0x98], %f12 + AES_DROUND23(20, 4, 6, 2) + ldd [%o0 + 0x90], %f14 + AES_DROUND01(22, 4, 6, 0) + ldd [%o0 + 0x88], %f16 + AES_DROUND23(24, 0, 2, 6) + ldd [%o0 + 0x80], %f18 + AES_DROUND01(26, 0, 2, 4) + ldd [%o0 + 0x78], %f20 + AES_DROUND23(12, 4, 6, 2) + ldd [%o0 + 0x70], %f22 + AES_DROUND01(14, 4, 6, 0) + ldd [%o0 + 0x68], %f24 + AES_DROUND23(16, 0, 2, 6) + ldd [%o0 + 0x60], %f26 + AES_DROUND01(18, 0, 2, 4) + ldd [%o0 + 0x58], %f28 + AES_DROUND23(20, 4, 6, 2) + ldd [%o0 + 0x50], %f30 + AES_DROUND01(22, 4, 6, 0) + ldd [%o0 + 0x48], %f32 + AES_DROUND23(24, 0, 2, 6) + ldd [%o0 + 0x40], %f34 + AES_DROUND01(26, 0, 2, 4) + ldd [%o0 + 0x38], %f36 + AES_DROUND23(28, 4, 6, 2) + ldd [%o0 + 0x30], %f38 + AES_DROUND01(30, 4, 6, 0) + ldd [%o0 + 0x28], %f40 + AES_DROUND23(32, 0, 2, 6) + ldd [%o0 + 0x20], %f42 + AES_DROUND01(34, 0, 2, 4) + ldd [%o0 + 0x18], %f44 + AES_DROUND23(36, 4, 6, 2) + ldd [%o0 + 0x10], %f46 + AES_DROUND01(38, 4, 6, 0) + ldd [%o0 + 0x08], %f48 + AES_DROUND23(40, 0, 2, 6) + ldd [%o0 + 0x00], %f50 + AES_DROUND01(42, 0, 2, 4) + AES_DROUND23(44, 4, 6, 2) + AES_DROUND01(46, 4, 6, 0) + AES_DROUND23_L(48, 0, 2, 6) + AES_DROUND01_L(50, 0, 2, 4) + st %f4, [%o2 + 0x00] + st %f5, [%o2 + 0x04] + st %f6, [%o2 + 0x08] + st %f7, [%o2 + 0x0c] + retl + VISExit +ENDPROC(aes_sparc64_decrypt_256) + + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_128) /* %o0=key */ - ba,pt %xcc, aes_sparc64_load_encrypt_keys - sub %o0, 0x10, %o0 -ENDPROC(aes_sparc64_load_decrypt_keys) + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + retl + ldd [%o0 + 0xa8], %f46 +ENDPROC(aes_sparc64_load_encrypt_keys_128) -ENTRY(aes_sparc64_load_encrypt_keys) + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_192) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + retl + ldd [%o0 + 0xc8], %f54 +ENDPROC(aes_sparc64_load_encrypt_keys_192) + + .align 32 +ENTRY(aes_sparc64_load_encrypt_keys_256) /* %o0=key */ VISEntry ldd [%o0 + 0x10], %f8 @@ -489,171 +731,241 @@ ENTRY(aes_sparc64_load_encrypt_keys) ldd [%o0 + 0xe0], %f60 retl ldd [%o0 + 0xe8], %f62 -ENDPROC(aes_sparc64_load_encrypt_keys) +ENDPROC(aes_sparc64_load_encrypt_keys_256) + + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_128) + /* %o0=key */ + VISEntry + ldd [%o0 + 0x98], %f8 + ldd [%o0 + 0x90], %f10 + ldd [%o0 + 0x88], %f12 + ldd [%o0 + 0x80], %f14 + ldd [%o0 + 0x78], %f16 + ldd [%o0 + 0x70], %f18 + ldd [%o0 + 0x68], %f20 + ldd [%o0 + 0x60], %f22 + ldd [%o0 + 0x58], %f24 + ldd [%o0 + 0x50], %f26 + ldd [%o0 + 0x48], %f28 + ldd [%o0 + 0x40], %f30 + ldd [%o0 + 0x38], %f32 + ldd [%o0 + 0x30], %f34 + ldd [%o0 + 0x28], %f36 + ldd [%o0 + 0x20], %f38 + ldd [%o0 + 0x18], %f40 + ldd [%o0 + 0x10], %f42 + ldd [%o0 + 0x08], %f44 + retl + ldd [%o0 + 0x00], %f46 +ENDPROC(aes_sparc64_load_decrypt_keys_128) -ENTRY(aes_sparc64_ecb_encrypt) - /* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=len */ + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_192) + /* %o0=key */ + VISEntry + ldd [%o0 + 0xb8], %f8 + ldd [%o0 + 0xb0], %f10 + ldd [%o0 + 0xa8], %f12 + ldd [%o0 + 0xa0], %f14 + ldd [%o0 + 0x98], %f16 + ldd [%o0 + 0x90], %f18 + ldd [%o0 + 0x88], %f20 + ldd [%o0 + 0x80], %f22 + ldd [%o0 + 0x78], %f24 + ldd [%o0 + 0x70], %f26 + ldd [%o0 + 0x68], %f28 + ldd [%o0 + 0x60], %f30 + ldd [%o0 + 0x58], %f32 + ldd [%o0 + 0x50], %f34 + ldd [%o0 + 0x48], %f36 + ldd [%o0 + 0x40], %f38 + ldd [%o0 + 0x38], %f40 + ldd [%o0 + 0x30], %f42 + ldd [%o0 + 0x28], %f44 + ldd [%o0 + 0x20], %f46 + ldd [%o0 + 0x18], %f48 + ldd [%o0 + 0x10], %f50 + ldd [%o0 + 0x08], %f52 + retl + ldd [%o0 + 0x00], %f54 +ENDPROC(aes_sparc64_load_decrypt_keys_192) + + .align 32 +ENTRY(aes_sparc64_load_decrypt_keys_256) + /* %o0=key */ + VISEntry + ldd [%o0 + 0xd8], %f8 + ldd [%o0 + 0xd0], %f10 + ldd [%o0 + 0xc8], %f12 + ldd [%o0 + 0xc0], %f14 + ldd [%o0 + 0xb8], %f16 + ldd [%o0 + 0xb0], %f18 + ldd [%o0 + 0xa8], %f20 + ldd [%o0 + 0xa0], %f22 + ldd [%o0 + 0x98], %f24 + ldd [%o0 + 0x90], %f26 + ldd [%o0 + 0x88], %f28 + ldd [%o0 + 0x80], %f30 + ldd [%o0 + 0x78], %f32 + ldd [%o0 + 0x70], %f34 + ldd [%o0 + 0x68], %f36 + ldd [%o0 + 0x60], %f38 + ldd [%o0 + 0x58], %f40 + ldd [%o0 + 0x50], %f42 + ldd [%o0 + 0x48], %f44 + ldd [%o0 + 0x40], %f46 + ldd [%o0 + 0x38], %f48 + ldd [%o0 + 0x30], %f50 + ldd [%o0 + 0x28], %f52 + ldd [%o0 + 0x20], %f54 + ldd [%o0 + 0x18], %f56 + ldd [%o0 + 0x10], %f58 + ldd [%o0 + 0x08], %f60 + retl + ldd [%o0 + 0x00], %f62 +ENDPROC(aes_sparc64_load_decrypt_keys_256) + + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len */ ldx [%o0 + 0x00], %g1 ldx [%o0 + 0x08], %g2 - cmp %o3, 24 - bl 2f - nop - be 1f - nop - -0: - /* 256-bit key */ - ldx [%o1 + 0x00], %g3 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - - ENCRYPT_256(8, 4, 6, 0, 2) - + ENCRYPT_128(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 - bne,pt %xcc, 0b + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b add %o2, 0x10, %o2 - retl nop +ENDPROC(aes_sparc64_ecb_encrypt_128) -1: - /* 192-bit key */ - ldx [%o1 + 0x00], %g3 + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len */ + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - ENCRYPT_192(8, 4, 6, 0, 2) - std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 + subcc %o3, 0x10, %o3 bne,pt %xcc, 1b add %o2, 0x10, %o2 - retl nop +ENDPROC(aes_sparc64_ecb_encrypt_192) -2: - /* 128-bit key */ - ldx [%o1 + 0x00], %g3 + .align 32 +ENTRY(aes_sparc64_ecb_encrypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len */ + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - - ENCRYPT_128(8, 4, 6, 0, 2) - + ENCRYPT_256(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 - bne,pt %xcc, 2b + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b add %o2, 0x10, %o2 - retl nop -ENDPROC(aes_sparc64_ecb_encrypt) +ENDPROC(aes_sparc64_ecb_encrypt_256) -ENTRY(aes_sparc64_ecb_decrypt) - /* %o0=&key[key_len], %o1=input, %o2=output, %o3=key_len, %o4=len, %o5=iv */ + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_128) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ ldx [%o0 - 0x10], %g1 ldx [%o0 - 0x08], %g2 - cmp %o3, 24 - bl 2f - nop - be 1f - nop - -0: - /* 256-bit key */ - ldx [%o1 + 0x00], %g3 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - - DECRYPT_256(64, 4, 6, 0, 2) - + DECRYPT_128(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 - bne,pt %xcc, 0b + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b add %o2, 0x10, %o2 - retl nop +ENDPROC(aes_sparc64_ecb_decrypt_128) -1: - /* 192-bit key */ - ldx [%o1 + 0x00], %g3 + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_192) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - - DECRYPT_192(56, 4, 6, 0, 2) - + DECRYPT_192(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 + subcc %o3, 0x10, %o3 bne,pt %xcc, 1b add %o2, 0x10, %o2 - retl nop +ENDPROC(aes_sparc64_ecb_decrypt_192) -2: - /* 128-bit key */ - ldx [%o1 + 0x00], %g3 + .align 32 +ENTRY(aes_sparc64_ecb_decrypt_256) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - - DECRYPT_128(48, 4, 6, 0, 2) - + DECRYPT_256(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 - bne,pt %xcc, 2b + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b add %o2, 0x10, %o2 - retl nop -ENDPROC(aes_sparc64_ecb_decrypt) +ENDPROC(aes_sparc64_ecb_decrypt_256) -ENTRY(aes_sparc64_cbc_encrypt) - /* %o0=key, %o1=input, %o2=output, %o3=key_len, %o4=len */ - ldd [%o5 + 0x00], %f4 - ldd [%o5 + 0x08], %f6 + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 ldx [%o0 + 0x00], %g1 ldx [%o0 + 0x08], %g2 - cmp %o3, 24 - bl 2f - nop - be 1f - nop - -0: - /* 256-bit key */ - ldx [%o1 + 0x00], %g3 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 @@ -662,24 +974,26 @@ ENTRY(aes_sparc64_cbc_encrypt) MOVXTOD_G7_F2 fxor %f4, %f0, %f4 fxor %f6, %f2, %f6 - - ENCRYPT_256(8, 4, 6, 0, 2) - + ENCRYPT_128(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 - bne,pt %xcc, 0b + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b add %o2, 0x10, %o2 - - std %f4, [%o5 + 0x00] - std %f6, [%o5 + 0x08] - + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] retl nop +ENDPROC(aes_sparc64_cbc_encrypt_128) -1: - /* 192-bit key */ - ldx [%o1 + 0x00], %g3 + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 @@ -688,24 +1002,26 @@ ENTRY(aes_sparc64_cbc_encrypt) MOVXTOD_G7_F2 fxor %f4, %f0, %f4 fxor %f6, %f2, %f6 - ENCRYPT_192(8, 4, 6, 0, 2) - std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 + subcc %o3, 0x10, %o3 bne,pt %xcc, 1b add %o2, 0x10, %o2 - - std %f4, [%o5 + 0x00] - std %f6, [%o5 + 0x08] - + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] retl nop +ENDPROC(aes_sparc64_cbc_encrypt_192) -2: - /* 128-bit key */ - ldx [%o1 + 0x00], %g3 + .align 32 +ENTRY(aes_sparc64_cbc_encrypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldd [%o4 + 0x00], %f4 + ldd [%o4 + 0x08], %f6 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 add %o1, 0x10, %o1 xor %g1, %g3, %g3 @@ -714,123 +1030,110 @@ ENTRY(aes_sparc64_cbc_encrypt) MOVXTOD_G7_F2 fxor %f4, %f0, %f4 fxor %f6, %f2, %f6 - - ENCRYPT_128(8, 4, 6, 0, 2) - + ENCRYPT_256(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o4, 0x10, %o4 - bne,pt %xcc, 2b + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b add %o2, 0x10, %o2 - - std %f4, [%o5 + 0x00] - std %f6, [%o5 + 0x08] - + std %f4, [%o4 + 0x00] + std %f6, [%o4 + 0x08] retl nop -ENDPROC(aes_sparc64_cbc_encrypt) +ENDPROC(aes_sparc64_cbc_encrypt_256) -ENTRY(aes_sparc64_cbc_decrypt) - /* %o0=&key[key_len], %o1=key_len, %o2=input, %o3=output, %o4=len, %o5=iv */ + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_128) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ ldx [%o0 - 0x10], %g1 ldx [%o0 - 0x08], %g2 - cmp %o1, 24 - ldx [%o5 + 0x00], %o0 - bl 2f - ldx [%o5 + 0x08], %o1 - be 1f - nop - -0: - /* 256-bit key */ - ldx [%o2 + 0x00], %g3 - ldx [%o2 + 0x08], %g7 - add %o2, 0x10, %o2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - - DECRYPT_256(64, 4, 6, 0, 2) - + DECRYPT_128(8, 4, 6, 0, 2) MOVXTOD_O0_F0 - MOVXTOD_O1_F2 + MOVXTOD_O5_F2 xor %g1, %g3, %o0 - xor %g2, %g7, %o1 + xor %g2, %g7, %o5 fxor %f4, %f0, %f4 fxor %f6, %f2, %f6 - - std %f4, [%o3 + 0x00] - std %f6, [%o3 + 0x08] - subcc %o4, 0x10, %o4 - bne,pt %xcc, 0b - add %o3, 0x10, %o3 - - stx %o0, [%o5 + 0x00] - stx %o1, [%o5 + 0x08] - + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] retl nop +ENDPROC(aes_sparc64_cbc_decrypt_128) -1: - /* 192-bit key */ - ldx [%o2 + 0x00], %g3 - ldx [%o2 + 0x08], %g7 - add %o2, 0x10, %o2 + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_192) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - - DECRYPT_192(56, 4, 6, 0, 2) - + DECRYPT_192(8, 4, 6, 0, 2) MOVXTOD_O0_F0 - MOVXTOD_O1_F2 + MOVXTOD_O5_F2 xor %g1, %g3, %o0 - xor %g2, %g7, %o1 + xor %g2, %g7, %o5 fxor %f4, %f0, %f4 fxor %f6, %f2, %f6 - - std %f4, [%o3 + 0x00] - std %f6, [%o3 + 0x08] - subcc %o4, 0x10, %o4 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 bne,pt %xcc, 1b - add %o3, 0x10, %o3 - - stx %o0, [%o5 + 0x00] - stx %o1, [%o5 + 0x08] - + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] retl nop +ENDPROC(aes_sparc64_cbc_decrypt_192) -2: - /* 128-bit key */ - ldx [%o2 + 0x00], %g3 - ldx [%o2 + 0x08], %g7 - add %o2, 0x10, %o2 + .align 32 +ENTRY(aes_sparc64_cbc_decrypt_256) + /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ + ldx [%o0 - 0x10], %g1 + ldx [%o0 - 0x08], %g2 + ldx [%o4 + 0x00], %o0 + ldx [%o4 + 0x08], %o5 +1: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 + add %o1, 0x10, %o1 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 MOVXTOD_G7_F6 - - DECRYPT_128(48, 4, 6, 0, 2) - + DECRYPT_256(8, 4, 6, 0, 2) MOVXTOD_O0_F0 - MOVXTOD_O1_F2 + MOVXTOD_O5_F2 xor %g1, %g3, %o0 - xor %g2, %g7, %o1 + xor %g2, %g7, %o5 fxor %f4, %f0, %f4 fxor %f6, %f2, %f6 - - std %f4, [%o3 + 0x00] - std %f6, [%o3 + 0x08] - subcc %o4, 0x10, %o4 - bne,pt %xcc, 2b - add %o3, 0x10, %o3 - - stx %o0, [%o5 + 0x00] - stx %o1, [%o5 + 0x08] - + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %o0, [%o4 + 0x00] + stx %o5, [%o4 + 0x08] retl nop -ENDPROC(aes_sparc64_cbc_decrypt) +ENDPROC(aes_sparc64_cbc_decrypt_256) diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index a87c5fa76e20..0b1de0b470a2 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -26,12 +26,121 @@ #include #include +struct aes_ops { + void (*encrypt)(const u64 *key, const u32 *input, u32 *output); + void (*decrypt)(const u64 *key, const u32 *input, u32 *output); + void (*load_encrypt_keys)(const u64 *key); + void (*load_decrypt_keys)(const u64 *key); + void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len); + void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len); + void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); + void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); +}; + struct crypto_sparc64_aes_ctx { + struct aes_ops *ops; u64 key[AES_MAX_KEYLENGTH / sizeof(u64)]; u32 key_length; u32 expanded_key_length; }; +extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input, + u32 *output); + +extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input, + u32 *output); +extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input, + u32 *output); + +extern void aes_sparc64_load_encrypt_keys_128(const u64 *key); +extern void aes_sparc64_load_encrypt_keys_192(const u64 *key); +extern void aes_sparc64_load_encrypt_keys_256(const u64 *key); + +extern void aes_sparc64_load_decrypt_keys_128(const u64 *key); +extern void aes_sparc64_load_decrypt_keys_192(const u64 *key); +extern void aes_sparc64_load_decrypt_keys_256(const u64 *key); + +extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len); + +extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len); +extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len); + +extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + +struct aes_ops aes128_ops = { + .encrypt = aes_sparc64_encrypt_128, + .decrypt = aes_sparc64_decrypt_128, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_128, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_128, + .ecb_encrypt = aes_sparc64_ecb_encrypt_128, + .ecb_decrypt = aes_sparc64_ecb_decrypt_128, + .cbc_encrypt = aes_sparc64_cbc_encrypt_128, + .cbc_decrypt = aes_sparc64_cbc_decrypt_128, +}; + +struct aes_ops aes192_ops = { + .encrypt = aes_sparc64_encrypt_192, + .decrypt = aes_sparc64_decrypt_192, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_192, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_192, + .ecb_encrypt = aes_sparc64_ecb_encrypt_192, + .ecb_decrypt = aes_sparc64_ecb_decrypt_192, + .cbc_encrypt = aes_sparc64_cbc_encrypt_192, + .cbc_decrypt = aes_sparc64_cbc_decrypt_192, +}; + +struct aes_ops aes256_ops = { + .encrypt = aes_sparc64_encrypt_256, + .decrypt = aes_sparc64_decrypt_256, + .load_encrypt_keys = aes_sparc64_load_encrypt_keys_256, + .load_decrypt_keys = aes_sparc64_load_decrypt_keys_256, + .ecb_encrypt = aes_sparc64_ecb_encrypt_256, + .ecb_decrypt = aes_sparc64_ecb_decrypt_256, + .cbc_encrypt = aes_sparc64_cbc_encrypt_256, + .cbc_decrypt = aes_sparc64_cbc_decrypt_256, +}; + extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key, unsigned int key_len); @@ -44,14 +153,17 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, switch (key_len) { case AES_KEYSIZE_128: ctx->expanded_key_length = 0xb0; + ctx->ops = &aes128_ops; break; case AES_KEYSIZE_192: ctx->expanded_key_length = 0xd0; + ctx->ops = &aes192_ops; break; case AES_KEYSIZE_256: ctx->expanded_key_length = 0xf0; + ctx->ops = &aes256_ops; break; default: @@ -65,38 +177,22 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -extern void aes_sparc64_encrypt(const u64 *key, const u32 *input, - u32 *output, unsigned int key_len); - static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); - aes_sparc64_encrypt(&ctx->key[0], (const u32 *) src, - (u32 *) dst, ctx->key_length); + ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); } -extern void aes_sparc64_decrypt(const u64 *key, const u32 *input, - u32 *output, unsigned int key_len, - unsigned int expanded_key_len); - static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); - aes_sparc64_decrypt(&ctx->key[0], (const u32 *) src, - (u32 *) dst, ctx->key_length, - ctx->expanded_key_length); + ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); } -extern void aes_sparc64_load_encrypt_keys(u64 *key); -extern void aes_sparc64_load_decrypt_keys(u64 *key); - #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) -extern void aes_sparc64_ecb_encrypt(u64 *key, const u32 *input, u32 *output, - unsigned int key_len, unsigned int len); - static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -108,15 +204,15 @@ static int ecb_encrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - aes_sparc64_load_encrypt_keys(&ctx->key[0]); + ctx->ops->load_encrypt_keys(&ctx->key[0]); while ((nbytes = walk.nbytes)) { unsigned int block_len = nbytes & AES_BLOCK_MASK; if (likely(block_len)) { - aes_sparc64_ecb_encrypt(&ctx->key[0], - (const u32 *)walk.src.virt.addr, - (u32 *) walk.dst.virt.addr, - ctx->key_length, block_len); + ctx->ops->ecb_encrypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len); } nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); @@ -125,9 +221,6 @@ static int ecb_encrypt(struct blkcipher_desc *desc, return err; } -extern void aes_sparc64_ecb_decrypt(u64 *ekey, const u32 *input, u32 *output, - unsigned int key_len, unsigned int len); - static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -140,14 +233,16 @@ static int ecb_decrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - aes_sparc64_load_decrypt_keys(&ctx->key[0]); + ctx->ops->load_decrypt_keys(&ctx->key[0]); key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; while ((nbytes = walk.nbytes)) { unsigned int block_len = nbytes & AES_BLOCK_MASK; - aes_sparc64_ecb_decrypt(key_end, (const u32 *) walk.src.virt.addr, - (u32 *) walk.dst.virt.addr, ctx->key_length, - block_len); + if (likely(block_len)) { + ctx->ops->ecb_decrypt(key_end, + (const u64 *) walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, block_len); + } nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } @@ -156,10 +251,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, return err; } -extern void aes_sparc64_cbc_encrypt(u64 *key, const u32 *input, u32 *output, - unsigned int key_len, unsigned int len, - u64 *iv); - static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -171,16 +262,15 @@ static int cbc_encrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - aes_sparc64_load_encrypt_keys(&ctx->key[0]); + ctx->ops->load_encrypt_keys(&ctx->key[0]); while ((nbytes = walk.nbytes)) { unsigned int block_len = nbytes & AES_BLOCK_MASK; if (likely(block_len)) { - aes_sparc64_cbc_encrypt(&ctx->key[0], - (const u32 *)walk.src.virt.addr, - (u32 *) walk.dst.virt.addr, - ctx->key_length, block_len, - (u64 *) walk.iv); + ctx->ops->cbc_encrypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); } nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); @@ -189,10 +279,6 @@ static int cbc_encrypt(struct blkcipher_desc *desc, return err; } -extern void aes_sparc64_cbc_decrypt(u64 *ekey, unsigned int key_len, - const u32 *input, u32 *output, - unsigned int len, u64 *iv); - static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -205,15 +291,17 @@ static int cbc_decrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - aes_sparc64_load_decrypt_keys(&ctx->key[0]); + ctx->ops->load_decrypt_keys(&ctx->key[0]); key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; while ((nbytes = walk.nbytes)) { unsigned int block_len = nbytes & AES_BLOCK_MASK; - aes_sparc64_cbc_decrypt(key_end, ctx->key_length, - (const u32 *) walk.src.virt.addr, - (u32 *) walk.dst.virt.addr, - block_len, (u64 *) walk.iv); + if (likely(block_len)) { + ctx->ops->cbc_decrypt(key_end, + (const u64 *) walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } -- cgit v1.2.3 From 9fd130ecbe97f3440d14d3d0c6660413e69ac532 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 29 Aug 2012 14:49:23 -0700 Subject: sparc64: Add ctr mode support to AES driver. Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_asm.S | 95 ++++++++++++++++++++++++++++++++++++++++++++ arch/sparc/crypto/aes_glue.c | 62 +++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 50faae03c592..7a975d689919 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S @@ -44,6 +44,8 @@ .word 0x85b02307; #define MOVXTOD_O0_F0 \ .word 0x81b02308; +#define MOVXTOD_O5_F0 \ + .word 0x81b0230d; #define MOVXTOD_O5_F2 \ .word 0x85b0230d; @@ -1137,3 +1139,96 @@ ENTRY(aes_sparc64_cbc_decrypt_256) retl nop ENDPROC(aes_sparc64_cbc_decrypt_256) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_128) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_128(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + add %o1, 0x10, %o1 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %g3, [%o4 + 0x00] + stx %g7, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_ctr_crypt_128) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_192) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_192(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + add %o1, 0x10, %o1 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %g3, [%o4 + 0x00] + stx %g7, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_ctr_crypt_192) + + .align 32 +ENTRY(aes_sparc64_ctr_crypt_256) + /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ + ldx [%o4 + 0x00], %g3 + ldx [%o4 + 0x08], %g7 + ldx [%o0 + 0x00], %g1 + ldx [%o0 + 0x08], %g2 +1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_256(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f4 + ldd [%o1 + 0x08], %f6 + fxor %f4, %f0, %f4 + fxor %f6, %f2, %f6 + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + subcc %o3, 0x10, %o3 + add %o1, 0x10, %o1 + bne,pt %xcc, 1b + add %o2, 0x10, %o2 + stx %g3, [%o4 + 0x00] + stx %g7, [%o4 + 0x08] + retl + nop +ENDPROC(aes_sparc64_ctr_crypt_256) diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 0b1de0b470a2..f457fc69edeb 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -39,6 +39,8 @@ struct aes_ops { unsigned int len, u64 *iv); void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output, unsigned int len, u64 *iv); + void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output, + unsigned int len, u64 *iv); }; struct crypto_sparc64_aes_ctx { @@ -108,6 +110,16 @@ extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input, u64 *output, unsigned int len, u64 *iv); +extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); +extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); +extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input, + u64 *output, unsigned int len, + u64 *iv); + struct aes_ops aes128_ops = { .encrypt = aes_sparc64_encrypt_128, .decrypt = aes_sparc64_decrypt_128, @@ -117,6 +129,7 @@ struct aes_ops aes128_ops = { .ecb_decrypt = aes_sparc64_ecb_decrypt_128, .cbc_encrypt = aes_sparc64_cbc_encrypt_128, .cbc_decrypt = aes_sparc64_cbc_decrypt_128, + .ctr_crypt = aes_sparc64_ctr_crypt_128, }; struct aes_ops aes192_ops = { @@ -128,6 +141,7 @@ struct aes_ops aes192_ops = { .ecb_decrypt = aes_sparc64_ecb_decrypt_192, .cbc_encrypt = aes_sparc64_cbc_encrypt_192, .cbc_decrypt = aes_sparc64_cbc_decrypt_192, + .ctr_crypt = aes_sparc64_ctr_crypt_192, }; struct aes_ops aes256_ops = { @@ -139,6 +153,7 @@ struct aes_ops aes256_ops = { .ecb_decrypt = aes_sparc64_ecb_decrypt_256, .cbc_encrypt = aes_sparc64_cbc_encrypt_256, .cbc_decrypt = aes_sparc64_cbc_decrypt_256, + .ctr_crypt = aes_sparc64_ctr_crypt_256, }; extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key, @@ -310,6 +325,34 @@ static int cbc_decrypt(struct blkcipher_desc *desc, return err; } +static int ctr_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + ctx->ops->load_encrypt_keys(&ctx->key[0]); + while ((nbytes = walk.nbytes)) { + unsigned int block_len = nbytes & AES_BLOCK_MASK; + + if (likely(block_len)) { + ctx->ops->ctr_crypt(&ctx->key[0], + (const u64 *)walk.src.virt.addr, + (u64 *) walk.dst.virt.addr, + block_len, (u64 *) walk.iv); + } + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + fprs_write(0); + return err; +} + static struct crypto_alg algs[] = { { .cra_name = "aes", .cra_driver_name = "aes-sparc64", @@ -366,6 +409,25 @@ static struct crypto_alg algs[] = { { .decrypt = cbc_decrypt, }, }, +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-sparc64", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, } }; static bool __init sparc64_has_aes_opcode(void) -- cgit v1.2.3 From 03d168ad122d6e622ad00490211704c4f2994976 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 30 Aug 2012 07:51:32 -0700 Subject: sparc64: Unroll ECB encryption loops in AES driver. The AES opcodes have a 3 cycle latency, so by doing 32-bytes at a time we avoid a pipeline bubble in between every round. For the 256-bit key case, it looks like we're doing more work in order to reload the KEY registers during the loop to make space for scarce temporaries. But the load dual issues with the AES operations so we get the KEY reloads essentially for free. Before: testing speed of ecb(aes) encryption test 0 (128 bit key, 16 byte blocks): 1 operation in 264 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 231 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 329 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 715 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 4248 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 221 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 234 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 359 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 803 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 5366 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 209 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 255 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 379 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 938 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 6041 cycles (8192 bytes) After: testing speed of ecb(aes) encryption test 0 (128 bit key, 16 byte blocks): 1 operation in 266 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 256 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 305 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 676 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 3981 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 210 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 233 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 340 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 766 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 5136 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 268 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 368 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 890 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 5718 cycles (8192 bytes) Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_asm.S | 166 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 148 insertions(+), 18 deletions(-) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 7a975d689919..33d59c66f1e2 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S @@ -48,6 +48,10 @@ .word 0x81b0230d; #define MOVXTOD_O5_F2 \ .word 0x85b0230d; +#define MOVXTOD_G3_F60 \ + .word 0xbbb02303; +#define MOVXTOD_G7_F62 \ + .word 0xbfb02307; #define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ @@ -55,12 +59,32 @@ AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ AES_EROUND23(KEY_BASE + 6, T0, T1, I1) +#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ + AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ + AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \ + AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \ + AES_EROUND23(KEY_BASE + 6, T2, T3, I3) + #define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) +#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ + AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ + AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ + AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ + AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ + AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \ + AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \ + AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3) + /* 10 rounds */ #define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ @@ -69,6 +93,13 @@ ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) +#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + /* 12 rounds */ #define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ @@ -78,6 +109,14 @@ ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) +#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ + ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + /* 14 rounds */ #define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ @@ -88,6 +127,34 @@ ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) +#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ + ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ + TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ + ldd [%o0 + 0xd0], %f56; \ + ldd [%o0 + 0xd8], %f58; \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ + ldd [%o0 + 0xe0], %f60; \ + ldd [%o0 + 0xe8], %f62; \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ + ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ + AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \ + AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \ + AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \ + AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \ + AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \ + AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \ + ldd [%o0 + 0x10], %f8; \ + ldd [%o0 + 0x18], %f10; \ + AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \ + AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \ + ldd [%o0 + 0x20], %f12; \ + ldd [%o0 + 0x28], %f14; + #define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ @@ -832,10 +899,34 @@ ENDPROC(aes_sparc64_load_decrypt_keys_256) ENTRY(aes_sparc64_ecb_encrypt_128) /* %o0=key, %o1=input, %o2=output, %o3=len */ ldx [%o0 + 0x00], %g1 - ldx [%o0 + 0x08], %g2 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 - add %o1, 0x10, %o1 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 @@ -843,10 +934,7 @@ ENTRY(aes_sparc64_ecb_encrypt_128) ENCRYPT_128(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - retl +11: retl nop ENDPROC(aes_sparc64_ecb_encrypt_128) @@ -854,10 +942,34 @@ ENDPROC(aes_sparc64_ecb_encrypt_128) ENTRY(aes_sparc64_ecb_encrypt_192) /* %o0=key, %o1=input, %o2=output, %o3=len */ ldx [%o0 + 0x00], %g1 - ldx [%o0 + 0x08], %g2 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 - add %o1, 0x10, %o1 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 @@ -865,10 +977,7 @@ ENTRY(aes_sparc64_ecb_encrypt_192) ENCRYPT_192(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - retl +11: retl nop ENDPROC(aes_sparc64_ecb_encrypt_192) @@ -876,10 +985,34 @@ ENDPROC(aes_sparc64_ecb_encrypt_192) ENTRY(aes_sparc64_ecb_encrypt_256) /* %o0=key, %o1=input, %o2=output, %o3=len */ ldx [%o0 + 0x00], %g1 - ldx [%o0 + 0x08], %g2 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 + 0x08], %g2 1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 - add %o1, 0x10, %o1 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + ENCRYPT_256_2(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f0, [%o2 + 0x10] + std %f2, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 @@ -887,10 +1020,7 @@ ENTRY(aes_sparc64_ecb_encrypt_256) ENCRYPT_256(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - retl +11: retl nop ENDPROC(aes_sparc64_ecb_encrypt_256) -- cgit v1.2.3 From 301013159e4cdce44700418c8fd5eadb270e2d3a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 30 Aug 2012 08:11:01 -0700 Subject: sparc64: Unroll ECB decryption loops in AES driver. Before: testing speed of ecb(aes) decryption test 0 (128 bit key, 16 byte blocks): 1 operation in 223 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 230 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 325 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 719 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 4266 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 211 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 234 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 353 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 808 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 5344 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 243 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 393 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 939 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 6039 cycles (8192 bytes) After: testing speed of ecb(aes) decryption test 0 (128 bit key, 16 byte blocks): 1 operation in 226 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 231 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 313 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 681 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 3964 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 205 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 341 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 770 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 5050 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 216 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 250 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 371 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 869 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 5494 cycles (8192 bytes) Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_asm.S | 161 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 143 insertions(+), 18 deletions(-) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 33d59c66f1e2..0bd3e04ac42d 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S @@ -161,12 +161,32 @@ AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ AES_DROUND01(KEY_BASE + 6, T0, T1, I0) +#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ + AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ + AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \ + AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \ + AES_DROUND01(KEY_BASE + 6, T2, T3, I2) + #define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) +#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ + AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ + AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ + AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ + AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ + AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \ + AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \ + AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2) + /* 10 rounds */ #define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ @@ -175,6 +195,13 @@ DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) +#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) + /* 12 rounds */ #define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ @@ -184,6 +211,14 @@ DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) +#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ + DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) + /* 14 rounds */ #define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ @@ -194,6 +229,32 @@ DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) +#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ + DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ + TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) + +#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ + ldd [%o0 + 0x18], %f56; \ + ldd [%o0 + 0x10], %f58; \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ + DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ + AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \ + AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \ + AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \ + AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \ + AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \ + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \ + ldd [%o0 + 0xd8], %f8; \ + ldd [%o0 + 0xd0], %f10; \ + AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \ + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) + ldd [%o0 + 0xc8], %f12; \ + ldd [%o0 + 0xc0], %f14; + .align 32 ENTRY(aes_sparc64_key_expand) /* %o0=input_key, %o1=output_key, %o2=key_len */ @@ -1028,10 +1089,34 @@ ENDPROC(aes_sparc64_ecb_encrypt_256) ENTRY(aes_sparc64_ecb_decrypt_128) /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ ldx [%o0 - 0x10], %g1 - ldx [%o0 - 0x08], %g2 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 - add %o1, 0x10, %o1 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 @@ -1039,10 +1124,7 @@ ENTRY(aes_sparc64_ecb_decrypt_128) DECRYPT_128(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - retl +11: retl nop ENDPROC(aes_sparc64_ecb_decrypt_128) @@ -1050,10 +1132,34 @@ ENDPROC(aes_sparc64_ecb_decrypt_128) ENTRY(aes_sparc64_ecb_decrypt_192) /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ ldx [%o0 - 0x10], %g1 - ldx [%o0 - 0x08], %g2 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 - add %o1, 0x10, %o1 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F60 + MOVXTOD_G7_F62 + DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 @@ -1061,10 +1167,7 @@ ENTRY(aes_sparc64_ecb_decrypt_192) DECRYPT_192(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - retl +11: retl nop ENDPROC(aes_sparc64_ecb_decrypt_192) @@ -1072,10 +1175,35 @@ ENDPROC(aes_sparc64_ecb_decrypt_192) ENTRY(aes_sparc64_ecb_decrypt_256) /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ ldx [%o0 - 0x10], %g1 - ldx [%o0 - 0x08], %g2 + subcc %o3, 0x10, %o3 + be 10f + ldx [%o0 - 0x08], %g2 + sub %o0, 0xf0, %o0 1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 - add %o1, 0x10, %o1 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + xor %g1, %g3, %g3 + xor %g2, %g7, %g7 + MOVXTOD_G3_F4 + MOVXTOD_G7_F6 + xor %g1, %o4, %g3 + xor %g2, %o5, %g7 + MOVXTOD_G3_F0 + MOVXTOD_G7_F2 + DECRYPT_256_2(8, 4, 6, 0, 2) + std %f4, [%o2 + 0x00] + std %f6, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + sub %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz,pt %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: ldx [%o1 + 0x00], %g3 + ldx [%o1 + 0x08], %g7 xor %g1, %g3, %g3 xor %g2, %g7, %g7 MOVXTOD_G3_F4 @@ -1083,10 +1211,7 @@ ENTRY(aes_sparc64_ecb_decrypt_256) DECRYPT_256(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - retl +11: retl nop ENDPROC(aes_sparc64_ecb_decrypt_256) -- cgit v1.2.3 From 4e71bb49f256e4efc94a9fdaaa430d906cd88e6b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 30 Aug 2012 08:40:44 -0700 Subject: sparc64: Unroll CTR crypt loops in AES driver. Before: testing speed of ctr(aes) encryption test 0 (128 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 244 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 360 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 814 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 5021 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 378 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 939 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 6395 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 209 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 249 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 414 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 1073 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 7110 cycles (8192 bytes) testing speed of ctr(aes) decryption test 0 (128 bit key, 16 byte blocks): 1 operation in 225 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 233 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 344 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 810 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 5021 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 376 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 938 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 6380 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 251 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 411 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 1070 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 7114 cycles (8192 bytes) After: testing speed of ctr(aes) encryption test 0 (128 bit key, 16 byte blocks): 1 operation in 211 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 246 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 344 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 799 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 4975 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 210 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 236 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 365 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 888 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 6055 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 209 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 255 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 404 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 1010 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 6669 cycles (8192 bytes) testing speed of ctr(aes) decryption test 0 (128 bit key, 16 byte blocks): 1 operation in 210 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 233 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 340 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 818 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 4956 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 239 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 361 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 888 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 5996 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 248 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 395 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 1010 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 6664 cycles (8192 bytes) Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_asm.S | 142 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 118 insertions(+), 24 deletions(-) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 0bd3e04ac42d..0fadad0c60ad 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S @@ -48,6 +48,10 @@ .word 0x81b0230d; #define MOVXTOD_O5_F2 \ .word 0x85b0230d; +#define MOVXTOD_O5_F4 \ + .word 0x89b0230d; +#define MOVXTOD_O5_F6 \ + .word 0x8db0230d; #define MOVXTOD_G3_F60 \ .word 0xbbb02303; #define MOVXTOD_G7_F62 \ @@ -1400,9 +1404,44 @@ ENTRY(aes_sparc64_ctr_crypt_128) /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ ldx [%o4 + 0x00], %g3 ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 ldx [%o0 + 0x00], %g1 - ldx [%o0 + 0x08], %g2 + be 10f + ldx [%o0 + 0x08], %g2 1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: xor %g1, %g3, %o5 MOVXTOD_O5_F0 xor %g2, %g7, %o5 MOVXTOD_O5_F2 @@ -1416,14 +1455,9 @@ ENTRY(aes_sparc64_ctr_crypt_128) fxor %f6, %f2, %f6 std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - add %o1, 0x10, %o1 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - stx %g3, [%o4 + 0x00] - stx %g7, [%o4 + 0x08] +11: stx %g3, [%o4 + 0x00] retl - nop + stx %g7, [%o4 + 0x08] ENDPROC(aes_sparc64_ctr_crypt_128) .align 32 @@ -1431,9 +1465,44 @@ ENTRY(aes_sparc64_ctr_crypt_192) /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ ldx [%o4 + 0x00], %g3 ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 ldx [%o0 + 0x00], %g1 - ldx [%o0 + 0x08], %g2 + be 10f + ldx [%o0 + 0x08], %g2 1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: xor %g1, %g3, %o5 MOVXTOD_O5_F0 xor %g2, %g7, %o5 MOVXTOD_O5_F2 @@ -1447,14 +1516,9 @@ ENTRY(aes_sparc64_ctr_crypt_192) fxor %f6, %f2, %f6 std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - add %o1, 0x10, %o1 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - stx %g3, [%o4 + 0x00] - stx %g7, [%o4 + 0x08] +11: stx %g3, [%o4 + 0x00] retl - nop + stx %g7, [%o4 + 0x08] ENDPROC(aes_sparc64_ctr_crypt_192) .align 32 @@ -1462,9 +1526,44 @@ ENTRY(aes_sparc64_ctr_crypt_256) /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ ldx [%o4 + 0x00], %g3 ldx [%o4 + 0x08], %g7 + subcc %o3, 0x10, %o3 ldx [%o0 + 0x00], %g1 - ldx [%o0 + 0x08], %g2 + be 10f + ldx [%o0 + 0x08], %g2 1: xor %g1, %g3, %o5 + MOVXTOD_O5_F0 + xor %g2, %g7, %o5 + MOVXTOD_O5_F2 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + xor %g1, %g3, %o5 + MOVXTOD_O5_F4 + xor %g2, %g7, %o5 + MOVXTOD_O5_F6 + add %g7, 1, %g7 + add %g3, 1, %o5 + movrz %g7, %o5, %g3 + ENCRYPT_256_2(8, 0, 2, 4, 6) + ldd [%o1 + 0x00], %f56 + ldd [%o1 + 0x08], %f58 + ldd [%o1 + 0x10], %f60 + ldd [%o1 + 0x18], %f62 + fxor %f56, %f0, %f56 + fxor %f58, %f2, %f58 + fxor %f60, %f4, %f60 + fxor %f62, %f6, %f62 + std %f56, [%o2 + 0x00] + std %f58, [%o2 + 0x08] + std %f60, [%o2 + 0x10] + std %f62, [%o2 + 0x18] + subcc %o3, 0x20, %o3 + add %o1, 0x20, %o1 + brgz %o3, 1b + add %o2, 0x20, %o2 + brlz,pt %o3, 11f + nop +10: xor %g1, %g3, %o5 MOVXTOD_O5_F0 xor %g2, %g7, %o5 MOVXTOD_O5_F2 @@ -1478,12 +1577,7 @@ ENTRY(aes_sparc64_ctr_crypt_256) fxor %f6, %f2, %f6 std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - subcc %o3, 0x10, %o3 - add %o1, 0x10, %o1 - bne,pt %xcc, 1b - add %o2, 0x10, %o2 - stx %g3, [%o4 + 0x00] - stx %g7, [%o4 + 0x08] +11: stx %g3, [%o4 + 0x00] retl - nop + stx %g7, [%o4 + 0x08] ENDPROC(aes_sparc64_ctr_crypt_256) -- cgit v1.2.3 From 7cff82f5f42a938a1b633e121a41d29c81de18bb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 31 Aug 2012 12:11:51 -0700 Subject: sparc64: Avoid code duplication in crypto assembler. Put the opcode macros in a common header Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_asm.S | 56 +---------------------- arch/sparc/crypto/camellia_asm.S | 22 +-------- arch/sparc/crypto/crc32c_asm.S | 11 +---- arch/sparc/crypto/des_asm.S | 24 +--------- arch/sparc/crypto/md5_asm.S | 8 ++-- arch/sparc/crypto/opcodes.h | 97 ++++++++++++++++++++++++++++++++++++++++ arch/sparc/crypto/sha1_asm.S | 8 ++-- arch/sparc/crypto/sha256_asm.S | 8 ++-- arch/sparc/crypto/sha512_asm.S | 8 ++-- 9 files changed, 117 insertions(+), 125 deletions(-) create mode 100644 arch/sparc/crypto/opcodes.h (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 0fadad0c60ad..20860885f068 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S @@ -1,61 +1,7 @@ #include #include -#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) - -#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) - -#define RS1(x) (FPD_ENCODE(x) << 14) -#define RS2(x) (FPD_ENCODE(x) << 0) -#define RS3(x) (FPD_ENCODE(x) << 9) -#define RD(x) (FPD_ENCODE(x) << 25) -#define IMM5(x) ((x) << 9) - -#define AES_EROUND01(a,b,c,d) \ - .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define AES_EROUND23(a,b,c,d) \ - .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define AES_DROUND01(a,b,c,d) \ - .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define AES_DROUND23(a,b,c,d) \ - .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define AES_EROUND01_L(a,b,c,d) \ - .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define AES_EROUND23_L(a,b,c,d) \ - .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define AES_DROUND01_L(a,b,c,d) \ - .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define AES_DROUND23_L(a,b,c,d) \ - .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define AES_KEXPAND1(a,b,c,d) \ - .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5(c)|RD(d)); -#define AES_KEXPAND0(a,b,c) \ - .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c)); -#define AES_KEXPAND2(a,b,c) \ - .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); - -#define MOVXTOD_G3_F4 \ - .word 0x89b02303; -#define MOVXTOD_G7_F6 \ - .word 0x8db02307; -#define MOVXTOD_G3_F0 \ - .word 0x81b02303; -#define MOVXTOD_G7_F2 \ - .word 0x85b02307; -#define MOVXTOD_O0_F0 \ - .word 0x81b02308; -#define MOVXTOD_O5_F0 \ - .word 0x81b0230d; -#define MOVXTOD_O5_F2 \ - .word 0x85b0230d; -#define MOVXTOD_O5_F4 \ - .word 0x89b0230d; -#define MOVXTOD_O5_F6 \ - .word 0x8db0230d; -#define MOVXTOD_G3_F60 \ - .word 0xbbb02303; -#define MOVXTOD_G7_F62 \ - .word 0xbfb02307; +#include "opcodes.h" #define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S index b0ddb5bcfe5f..d3ef9c7bbd02 100644 --- a/arch/sparc/crypto/camellia_asm.S +++ b/arch/sparc/crypto/camellia_asm.S @@ -1,27 +1,7 @@ #include #include -#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) - -#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) - -#define RS1(x) (FPD_ENCODE(x) << 14) -#define RS2(x) (FPD_ENCODE(x) << 0) -#define RS3(x) (FPD_ENCODE(x) << 9) -#define RD(x) (FPD_ENCODE(x) << 25) -#define IMM5(x) ((x) << 0) - -#define CAMELLIA_F(a,b,c,d) \ - .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); -#define CAMELLIA_FL(a,b,c) \ - .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); -#define CAMELLIA_FLI(a,b,c) \ - .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); - -#define MOVDTOX_F0_O4 \ - .word 0x99b02200 -#define MOVDTOX_F2_O5 \ - .word 0x9bb02202 +#include "opcodes.h" #define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \ diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S index cb479ec72433..2b1976e765b5 100644 --- a/arch/sparc/crypto/crc32c_asm.S +++ b/arch/sparc/crypto/crc32c_asm.S @@ -2,16 +2,7 @@ #include #include -#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) - -#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) - -#define RS1(x) (FPD_ENCODE(x) << 14) -#define RS2(x) (FPD_ENCODE(x) << 0) -#define RD(x) (FPD_ENCODE(x) << 25) - -#define CRC32C(a,b,c) \ - .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c)); +#include "opcodes.h" ENTRY(crc32c_sparc64) /* %o0=crc32p, %o1=data_ptr, %o2=len */ diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S index 589481e53d07..a7da935796e1 100644 --- a/arch/sparc/crypto/des_asm.S +++ b/arch/sparc/crypto/des_asm.S @@ -1,29 +1,7 @@ #include #include -#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) - -#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) - -#define RS1(x) (FPD_ENCODE(x) << 14) -#define RS2(x) (FPD_ENCODE(x) << 0) -#define RS3(x) (FPD_ENCODE(x) << 9) -#define RD(x) (FPD_ENCODE(x) << 25) -#define IMM5(x) ((x) << 0) - -#define DES_IP(a,b) \ - .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); -#define DES_IIP(a,b) \ - .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); -#define DES_KEXPAND(a,b,c) \ - .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5(b)|RD(c)); -#define DES_ROUND(a,b,c,d) \ - .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); - -#define MOVXTOD_G1_F60 \ - .word 0xbbb02301 -#define MOVXTOD_G1_F62 \ - .word 0xbfb02301 +#include "opcodes.h" .align 32 ENTRY(des_sparc64_key_expand) diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S index ff90903180eb..3150404e602e 100644 --- a/arch/sparc/crypto/md5_asm.S +++ b/arch/sparc/crypto/md5_asm.S @@ -1,6 +1,8 @@ #include #include +#include "opcodes.h" + ENTRY(md5_sparc64_transform) /* %o0 = digest, %o1 = data, %o2 = rounds */ VISEntryHalf @@ -21,8 +23,7 @@ ENTRY(md5_sparc64_transform) ldd [%o1 + 0x30], %f20 ldd [%o1 + 0x38], %f22 - /* md5 */ - .word 0x81b02800 + MD5 subcc %o2, 1, %o2 bne,pt %xcc, 1b @@ -58,8 +59,7 @@ ENTRY(md5_sparc64_transform) faligndata %f22, %f24, %f20 faligndata %f24, %f26, %f22 - /* md5 */ - .word 0x81b02800 + MD5 subcc %o2, 1, %o2 fsrc2 %f26, %f10 diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h new file mode 100644 index 000000000000..b6c4a1556354 --- /dev/null +++ b/arch/sparc/crypto/opcodes.h @@ -0,0 +1,97 @@ +#ifndef _OPCODES_H +#define _OPCODES_H + +#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) + +#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) + +#define RS1(x) (FPD_ENCODE(x) << 14) +#define RS2(x) (FPD_ENCODE(x) << 0) +#define RS3(x) (FPD_ENCODE(x) << 9) +#define RD(x) (FPD_ENCODE(x) << 25) +#define IMM5_0(x) ((x) << 0) +#define IMM5_9(x) ((x) << 9) + +#define CRC32C(a,b,c) \ + .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c)); + +#define MD5 \ + .word 0x81b02800; +#define SHA1 \ + .word 0x81b02820; +#define SHA256 \ + .word 0x81b02840; +#define SHA512 \ + .word 0x81b02860; + +#define AES_EROUND01(a,b,c,d) \ + .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23(a,b,c,d) \ + .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01(a,b,c,d) \ + .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23(a,b,c,d) \ + .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND01_L(a,b,c,d) \ + .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_EROUND23_L(a,b,c,d) \ + .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND01_L(a,b,c,d) \ + .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_DROUND23_L(a,b,c,d) \ + .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define AES_KEXPAND1(a,b,c,d) \ + .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d)); +#define AES_KEXPAND0(a,b,c) \ + .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c)); +#define AES_KEXPAND2(a,b,c) \ + .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); + +#define DES_IP(a,b) \ + .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); +#define DES_IIP(a,b) \ + .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); +#define DES_KEXPAND(a,b,c) \ + .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c)); +#define DES_ROUND(a,b,c,d) \ + .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); + +#define CAMELLIA_F(a,b,c,d) \ + .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); +#define CAMELLIA_FL(a,b,c) \ + .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); +#define CAMELLIA_FLI(a,b,c) \ + .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); + +#define MOVDTOX_F0_O4 \ + .word 0x99b02200 +#define MOVDTOX_F2_O5 \ + .word 0x9bb02202 +#define MOVXTOD_G1_F60 \ + .word 0xbbb02301 +#define MOVXTOD_G1_F62 \ + .word 0xbfb02301 +#define MOVXTOD_G3_F4 \ + .word 0x89b02303; +#define MOVXTOD_G7_F6 \ + .word 0x8db02307; +#define MOVXTOD_G3_F0 \ + .word 0x81b02303; +#define MOVXTOD_G7_F2 \ + .word 0x85b02307; +#define MOVXTOD_O0_F0 \ + .word 0x81b02308; +#define MOVXTOD_O5_F0 \ + .word 0x81b0230d; +#define MOVXTOD_O5_F2 \ + .word 0x85b0230d; +#define MOVXTOD_O5_F4 \ + .word 0x89b0230d; +#define MOVXTOD_O5_F6 \ + .word 0x8db0230d; +#define MOVXTOD_G3_F60 \ + .word 0xbbb02303; +#define MOVXTOD_G7_F62 \ + .word 0xbfb02307; + +#endif /* _OPCODES_H */ diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S index 3cd218ac631b..219d10c5ae0e 100644 --- a/arch/sparc/crypto/sha1_asm.S +++ b/arch/sparc/crypto/sha1_asm.S @@ -1,6 +1,8 @@ #include #include +#include "opcodes.h" + ENTRY(sha1_sparc64_transform) /* %o0 = digest, %o1 = data, %o2 = rounds */ VISEntryHalf @@ -22,8 +24,7 @@ ENTRY(sha1_sparc64_transform) ldd [%o1 + 0x30], %f20 ldd [%o1 + 0x38], %f22 - /* sha1 */ - .word 0x81b02820 + SHA1 subcc %o2, 1, %o2 bne,pt %xcc, 1b @@ -60,8 +61,7 @@ ENTRY(sha1_sparc64_transform) faligndata %f22, %f24, %f20 faligndata %f24, %f26, %f22 - /* sha1 */ - .word 0x81b02820 + SHA1 subcc %o2, 1, %o2 fsrc2 %f26, %f10 diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S index 9da84f5b61f5..b5f3d5826eb4 100644 --- a/arch/sparc/crypto/sha256_asm.S +++ b/arch/sparc/crypto/sha256_asm.S @@ -1,6 +1,8 @@ #include #include +#include "opcodes.h" + ENTRY(sha256_sparc64_transform) /* %o0 = digest, %o1 = data, %o2 = rounds */ VISEntryHalf @@ -25,8 +27,7 @@ ENTRY(sha256_sparc64_transform) ldd [%o1 + 0x30], %f20 ldd [%o1 + 0x38], %f22 - /* sha256 */ - .word 0x81b02840 + SHA256 subcc %o2, 1, %o2 bne,pt %xcc, 1b @@ -66,8 +67,7 @@ ENTRY(sha256_sparc64_transform) faligndata %f22, %f24, %f20 faligndata %f24, %f26, %f22 - /* sha256 */ - .word 0x81b02840 + SHA256 subcc %o2, 1, %o2 fsrc2 %f26, %f10 diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S index 650ba5742f0e..54bfba713c0e 100644 --- a/arch/sparc/crypto/sha512_asm.S +++ b/arch/sparc/crypto/sha512_asm.S @@ -1,6 +1,8 @@ #include #include +#include "opcodes.h" + ENTRY(sha512_sparc64_transform) /* %o0 = digest, %o1 = data, %o2 = rounds */ VISEntry @@ -33,8 +35,7 @@ ENTRY(sha512_sparc64_transform) ldd [%o1 + 0x70], %f44 ldd [%o1 + 0x78], %f46 - /* sha512 */ - .word 0x81b02860 + SHA512 subcc %o2, 1, %o2 bne,pt %xcc, 1b @@ -90,8 +91,7 @@ ENTRY(sha512_sparc64_transform) faligndata %f46, %f48, %f44 faligndata %f48, %f50, %f46 - /* sha512 */ - .word 0x81b02860 + SHA512 subcc %o2, 1, %o2 fsrc2 %f50, %f18 -- cgit v1.2.3 From 699871bc943be418be13208526bc613d68017fab Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 1 Sep 2012 23:05:43 -0700 Subject: sparc64: Fix bugs in unrolled 256-bit loops. Some dm-crypt testing revealed several bugs in the 256-bit unrolled loops. The DECRYPT_256_2() macro had two errors: 1) Missing reload of KEY registers %f60 and %f62 2) Missing "\" in penultimate line of definition. In aes_sparc64_ecb_decrypt_256, we were storing the second half of the encryption result from the wrong source registers. In aes_sparc64_ctr_crypt_256 we have to be careful when we fall out of the 32-byte-at-a-time loop and handle a trailing 16-byte chunk. In that case we've clobbered the final key holding registers and have to restore them before executing the ENCRYPT_256() macro. Inside of the 32-byte-at-a-time loop things are OK, because we do this key register restoring during the first few rounds of the ENCRYPT_256_2() macro. Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_asm.S | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 20860885f068..23f6cbb910d3 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S @@ -188,6 +188,8 @@ ldd [%o0 + 0x18], %f56; \ ldd [%o0 + 0x10], %f58; \ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ + ldd [%o0 + 0x08], %f60; \ + ldd [%o0 + 0x00], %f62; \ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ @@ -201,7 +203,7 @@ ldd [%o0 + 0xd8], %f8; \ ldd [%o0 + 0xd0], %f10; \ AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \ - AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) + AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \ ldd [%o0 + 0xc8], %f12; \ ldd [%o0 + 0xc0], %f14; @@ -1144,8 +1146,8 @@ ENTRY(aes_sparc64_ecb_decrypt_256) DECRYPT_256_2(8, 4, 6, 0, 2) std %f4, [%o2 + 0x00] std %f6, [%o2 + 0x08] - std %f60, [%o2 + 0x10] - std %f62, [%o2 + 0x18] + std %f0, [%o2 + 0x10] + std %f2, [%o2 + 0x18] sub %o3, 0x20, %o3 add %o1, 0x20, %o1 brgz,pt %o3, 1b @@ -1509,6 +1511,10 @@ ENTRY(aes_sparc64_ctr_crypt_256) add %o2, 0x20, %o2 brlz,pt %o3, 11f nop + ldd [%o0 + 0xd0], %f56 + ldd [%o0 + 0xd8], %f58 + ldd [%o0 + 0xe0], %f60 + ldd [%o0 + 0xe8], %f62 10: xor %g1, %g3, %o5 MOVXTOD_O5_F0 xor %g2, %g7, %o5 -- cgit v1.2.3 From 1080362425793f67e36dc690973e13e4a9631b4d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 15 Sep 2012 09:06:30 -0700 Subject: sparc64: Adjust crypto priorities. Make the crypto opcode implementations have a higher priority than those provides by the ring buffer based Niagara crypto device. Also, several crypto opcode hashes were not setting the priority value at all. Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_glue.c | 10 ++++++---- arch/sparc/crypto/camellia_glue.c | 8 +++++--- arch/sparc/crypto/crc32c_glue.c | 4 +++- arch/sparc/crypto/des_glue.c | 14 ++++++++------ arch/sparc/crypto/md5_glue.c | 4 +++- arch/sparc/crypto/opcodes.h | 2 ++ arch/sparc/crypto/sha1_glue.c | 4 +++- arch/sparc/crypto/sha256_glue.c | 4 ++++ arch/sparc/crypto/sha512_glue.c | 4 ++++ drivers/crypto/n2_core.c | 2 +- 10 files changed, 39 insertions(+), 17 deletions(-) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index f457fc69edeb..180bed441aa0 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -26,6 +26,8 @@ #include #include +#include "opcodes.h" + struct aes_ops { void (*encrypt)(const u64 *key, const u32 *input, u32 *output); void (*decrypt)(const u64 *key, const u32 *input, u32 *output); @@ -356,7 +358,7 @@ static int ctr_crypt(struct blkcipher_desc *desc, static struct crypto_alg algs[] = { { .cra_name = "aes", .cra_driver_name = "aes-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), @@ -374,7 +376,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "ecb(aes)", .cra_driver_name = "ecb-aes-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), @@ -393,7 +395,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "cbc(aes)", .cra_driver_name = "cbc-aes-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), @@ -412,7 +414,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "ctr(aes)", .cra_driver_name = "ctr-aes-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index c258cc550a6b..f45ae69d0d1a 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -14,6 +14,8 @@ #include #include +#include "opcodes.h" + #define CAMELLIA_MIN_KEY_SIZE 16 #define CAMELLIA_MAX_KEY_SIZE 32 #define CAMELLIA_BLOCK_SIZE 16 @@ -219,7 +221,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, static struct crypto_alg algs[] = { { .cra_name = "camellia", .cra_driver_name = "camellia-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAMELLIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), @@ -237,7 +239,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "ecb(camellia)", .cra_driver_name = "ecb-camellia-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = CAMELLIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), @@ -256,7 +258,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "cbc(camellia)", .cra_driver_name = "cbc-camellia-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = CAMELLIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index ec31cdb20a14..0bd89cea8d8e 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -20,6 +20,8 @@ #include #include +#include "opcodes.h" + /* * Setting the seed allows arbitrary accumulators and flexible XOR policy * If your algorithm starts with ~0, then XOR with ~0 before you set @@ -130,7 +132,7 @@ static struct shash_alg alg = { .base = { .cra_name = "crc32c", .cra_driver_name = "crc32c-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_alignmask = 7, diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 5ec0309e48c0..77d2ad6e899e 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -15,6 +15,8 @@ #include #include +#include "opcodes.h" + struct des_sparc64_ctx { u64 encrypt_expkey[DES_EXPKEY_WORDS / 2]; u64 decrypt_expkey[DES_EXPKEY_WORDS / 2]; @@ -371,7 +373,7 @@ static int cbc3_decrypt(struct blkcipher_desc *desc, static struct crypto_alg algs[] = { { .cra_name = "des", .cra_driver_name = "des-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct des_sparc64_ctx), @@ -389,7 +391,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "ecb(des)", .cra_driver_name = "ecb-des-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct des_sparc64_ctx), @@ -408,7 +410,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "cbc(des)", .cra_driver_name = "cbc-des-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct des_sparc64_ctx), @@ -427,7 +429,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES3_EDE_BLOCK_SIZE, .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), @@ -445,7 +447,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "ecb(des3_ede)", .cra_driver_name = "ecb-des3_ede-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES3_EDE_BLOCK_SIZE, .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), @@ -464,7 +466,7 @@ static struct crypto_alg algs[] = { { }, { .cra_name = "cbc(des3_ede)", .cra_driver_name = "cbc-des3_ede-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES3_EDE_BLOCK_SIZE, .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index bdfcfefbd4fd..603d723038ce 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -24,6 +24,8 @@ #include #include +#include "opcodes.h" + asmlinkage void md5_sparc64_transform(u32 *digest, const char *data, unsigned int rounds); @@ -141,7 +143,7 @@ static struct shash_alg alg = { .base = { .cra_name = "md5", .cra_driver_name= "md5-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = MD5_HMAC_BLOCK_SIZE, .cra_module = THIS_MODULE, diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h index b6c4a1556354..19cbaea6976f 100644 --- a/arch/sparc/crypto/opcodes.h +++ b/arch/sparc/crypto/opcodes.h @@ -1,6 +1,8 @@ #ifndef _OPCODES_H #define _OPCODES_H +#define SPARC_CR_OPCODE_PRIORITY 300 + #define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) #define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c index 6bd1abc5489d..2bbb20bee9f1 100644 --- a/arch/sparc/crypto/sha1_glue.c +++ b/arch/sparc/crypto/sha1_glue.c @@ -21,6 +21,8 @@ #include #include +#include "opcodes.h" + asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data, unsigned int rounds); @@ -136,7 +138,7 @@ static struct shash_alg alg = { .base = { .cra_name = "sha1", .cra_driver_name= "sha1-sparc64", - .cra_priority = 150, + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c index 75e1adeeb024..591e656bd891 100644 --- a/arch/sparc/crypto/sha256_glue.c +++ b/arch/sparc/crypto/sha256_glue.c @@ -21,6 +21,8 @@ #include #include +#include "opcodes.h" + asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data, unsigned int rounds); @@ -166,6 +168,7 @@ static struct shash_alg sha256 = { .base = { .cra_name = "sha256", .cra_driver_name= "sha256-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, @@ -181,6 +184,7 @@ static struct shash_alg sha224 = { .base = { .cra_name = "sha224", .cra_driver_name= "sha224-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_module = THIS_MODULE, diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c index 4d960be401c4..486f0a2b7001 100644 --- a/arch/sparc/crypto/sha512_glue.c +++ b/arch/sparc/crypto/sha512_glue.c @@ -20,6 +20,8 @@ #include #include +#include "opcodes.h" + asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data, unsigned int rounds); @@ -151,6 +153,7 @@ static struct shash_alg sha512 = { .base = { .cra_name = "sha512", .cra_driver_name= "sha512-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, @@ -166,6 +169,7 @@ static struct shash_alg sha384 = { .base = { .cra_name = "sha384", .cra_driver_name= "sha384-sparc64", + .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_module = THIS_MODULE, diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index a8bd0310f8fe..aab257403b4a 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -42,7 +42,7 @@ MODULE_DESCRIPTION("Niagara2 Crypto driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); -#define N2_CRA_PRIORITY 300 +#define N2_CRA_PRIORITY 200 static DEFINE_MUTEX(spu_lock); -- cgit v1.2.3 From 71741680a9858a7fcba54acf5b321d185abef1e8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 15 Sep 2012 09:17:10 -0700 Subject: sparc64: Add missing pr_fmt define to crypto opcode drivers. The hashes and crc32c had it, only the AES/DES/CAMELLIA drivers were missing it. Signed-off-by: David S. Miller --- arch/sparc/crypto/aes_glue.c | 2 ++ arch/sparc/crypto/camellia_glue.c | 2 ++ arch/sparc/crypto/des_glue.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 180bed441aa0..8f1c9980f637 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -14,6 +14,8 @@ * Copyright (c) 2010, Intel Corporation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index f45ae69d0d1a..42905c084299 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -3,6 +3,8 @@ * Copyright (C) 2012 David S. Miller */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 77d2ad6e899e..c4940c2d3073 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -3,6 +3,8 @@ * Copyright (C) 2012 David S. Miller */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include -- cgit v1.2.3 From 979e93ca24e8336acb660529676dcda6817d8a1f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 15 Sep 2012 09:46:25 -0700 Subject: sparc64: Add auto-loading mechanism to crypto-opcode drivers. Just simply provide a device table containing an entry for sun4v cpus, the capability mask checks in the drivers themselves will take care of the rest. This makes the bootup logs on pre-T4 cpus slightly more verbose, with each driver indicating lack of support for the associated opcode(s). But this isn't too much of a real problem. I toyed with the idea of using explicit entries with compatability fields of "SPARC-T4", "SPARC-T5", etc. but all future cpus will have some subset of these opcodes available and this would just be one more pointless thing to do as each new cpu is released with a new string. Signed-off-by: David S. Miller --- arch/sparc/crypto/Makefile | 16 ++++++++-------- arch/sparc/crypto/crop_devid.c | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 arch/sparc/crypto/crop_devid.c (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index 5d469d81761f..6ae1ad5e502b 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -13,13 +13,13 @@ obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o -sha1-sparc64-y := sha1_asm.o sha1_glue.o -sha256-sparc64-y := sha256_asm.o sha256_glue.o -sha512-sparc64-y := sha512_asm.o sha512_glue.o -md5-sparc64-y := md5_asm.o md5_glue.o +sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o +sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o +sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o +md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o -aes-sparc64-y := aes_asm.o aes_glue.o -des-sparc64-y := des_asm.o des_glue.o -camellia-sparc64-y := camellia_asm.o camellia_glue.o +aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o +des-sparc64-y := des_asm.o des_glue.o crop_devid.o +camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o -crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o +crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c new file mode 100644 index 000000000000..5f5724a0ae22 --- /dev/null +++ b/arch/sparc/crypto/crop_devid.c @@ -0,0 +1,14 @@ +#include +#include + +/* This is a dummy device table linked into all of the crypto + * opcode drivers. It serves to trigger the module autoloading + * mechanisms in userspace which scan the OF device tree and + * load any modules which have device table entries that + * match OF device nodes. + */ +static const struct of_device_id crypto_opcode_match[] = { + { .name = "cpu", .compatible = "sun4v", }, + {}, +}; +MODULE_DEVICE_TABLE(of, crypto_opcode_match); -- cgit v1.2.3 From 65d5fcf5c284735d3a4254cb17691842385984bb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 17 Sep 2012 19:57:30 -0700 Subject: sparc64: Fix IV handling bug in des_sparc64_cbc_decrypt The IV wasn't being propagated properly past the first loop iteration. This bug lived only because the crypto layer tests for cbc(des) do not have any cases that go more than one loop. Signed-off-by: David S. Miller --- arch/sparc/crypto/des_asm.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S index a7da935796e1..30b6e90b28b2 100644 --- a/arch/sparc/crypto/des_asm.S +++ b/arch/sparc/crypto/des_asm.S @@ -168,6 +168,7 @@ ENTRY(des_sparc64_cbc_decrypt) DES_ROUND(28, 30, 32, 32) DES_IIP(32, 32) fxor %f32, %f34, %f32 + fsrc2 %f36, %f34 std %f32, [%o1 + 0x00] add %o0, 0x08, %o0 subcc %o2, 0x08, %o2 -- cgit v1.2.3 From 274504f5e62882bb43ea6599b1308607956f20d9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Sep 2012 12:04:51 -0700 Subject: sparc64: Fix function argument comment in camellia_sparc64_key_expand asm. Signed-off-by: David S. Miller --- arch/sparc/crypto/camellia_asm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sparc/crypto') diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S index d3ef9c7bbd02..cc39553a4e43 100644 --- a/arch/sparc/crypto/camellia_asm.S +++ b/arch/sparc/crypto/camellia_asm.S @@ -30,7 +30,7 @@ SIGMA: .xword 0xA09E667F3BCC908B .align 32 ENTRY(camellia_sparc64_key_expand) - /* %o0=in_key, %o1=out_key, %o2=key_len */ + /* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */ VISEntry ld [%o0 + 0x00], %f0 ! i0, k[0] ld [%o0 + 0x04], %f1 ! i1, k[1] -- cgit v1.2.3