From 58990986f1cba40c23c0c10592ace08616de3ffa Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 20 Oct 2012 15:06:36 +0300 Subject: crypto: x86/glue_helper - use le128 instead of u128 for CTR mode 'u128' currently used for CTR mode is on little-endian 'long long' swapped and would require extra swap operations by SSE/AVX code. Use of le128 instead of u128 allows IV calculations to be done with vector registers easier. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/include/asm/crypto/glue_helper.h | 28 +++++++++++++++++----------- arch/x86/include/asm/crypto/twofish.h | 4 ++-- 2 files changed, 19 insertions(+), 13 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 3e408bddc96f..e2d65b061d27 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -13,7 +13,7 @@ typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, - u128 *iv); + le128 *iv); #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) @@ -71,23 +71,29 @@ static inline void glue_fpu_end(bool fpu_enabled) kernel_fpu_end(); } -static inline void u128_to_be128(be128 *dst, const u128 *src) +static inline void le128_to_be128(be128 *dst, const le128 *src) { - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); + dst->a = cpu_to_be64(le64_to_cpu(src->a)); + dst->b = cpu_to_be64(le64_to_cpu(src->b)); } -static inline void be128_to_u128(u128 *dst, const be128 *src) +static inline void be128_to_le128(le128 *dst, const be128 *src) { - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); + dst->a = cpu_to_le64(be64_to_cpu(src->a)); + dst->b = cpu_to_le64(be64_to_cpu(src->b)); } -static inline void u128_inc(u128 *i) +static inline void le128_inc(le128 *i) { - i->b++; - if (!i->b) - i->a++; + u64 a = le64_to_cpu(i->a); + u64 b = le64_to_cpu(i->b); + + b++; + if (!b) + a++; + + i->a = cpu_to_le64(a); + i->b = cpu_to_le64(b); } extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 9d2c514bd5f9..878c51ceebb5 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -31,9 +31,9 @@ asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, /* helpers from twofish_x86_64-3way module */ extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, - u128 *iv); + le128 *iv); extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, - u128 *iv); + le128 *iv); extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); -- cgit v1.2.3 From facd416fbc1cdee357730909a414898934f16ae1 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 20 Oct 2012 15:06:51 +0300 Subject: crypto: serpent/avx - avoid using temporary stack buffers Introduce new assembler functions to avoid use temporary stack buffers in glue code. This also allows use of vector instructions for xoring output in CTR and CBC modes and construction of IVs for CTR mode. ECB mode sees ~0.5% decrease in speed because added one extra function call. CBC mode decryption and CTR mode benefit from vector operations and gain ~3%. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 166 ++++++++++++++++++---------- arch/x86/crypto/serpent_avx_glue.c | 43 +------ arch/x86/include/asm/crypto/serpent-avx.h | 27 ++--- 3 files changed, 121 insertions(+), 115 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 504106bf04a2..02b0e9fe997c 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -24,7 +24,16 @@ * */ +#include "glue_helper-asm-avx.S" + .file "serpent-avx-x86_64-asm_64.S" + +.data +.align 16 + +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + .text #define CTX %rdi @@ -550,51 +559,27 @@ vpunpcklqdq x3, t2, x2; \ vpunpckhqdq x3, t2, x3; -#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ - vmovdqu (0*4*4)(in), x0; \ - vmovdqu (1*4*4)(in), x1; \ - vmovdqu (2*4*4)(in), x2; \ - vmovdqu (3*4*4)(in), x3; \ - \ +#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ - transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ - \ - vmovdqu x0, (0*4*4)(out); \ - vmovdqu x1, (1*4*4)(out); \ - vmovdqu x2, (2*4*4)(out); \ - vmovdqu x3, (3*4*4)(out); - -#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ - transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ - \ - vpxor (0*4*4)(out), x0, x0; \ - vmovdqu x0, (0*4*4)(out); \ - vpxor (1*4*4)(out), x1, x1; \ - vmovdqu x1, (1*4*4)(out); \ - vpxor (2*4*4)(out), x2, x2; \ - vmovdqu x2, (2*4*4)(out); \ - vpxor (3*4*4)(out), x3, x3; \ - vmovdqu x3, (3*4*4)(out); +#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) .align 8 -.global __serpent_enc_blk_8way_avx -.type __serpent_enc_blk_8way_avx,@function; +.type __serpent_enc_blk8_avx,@function; -__serpent_enc_blk_8way_avx: +__serpent_enc_blk8_avx: /* input: * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: bool, if true: xor output + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks + * output: + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks */ vpcmpeqd RNOT, RNOT, RNOT; - leaq (4*4*4)(%rdx), %rax; - read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); - read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); K2(RA, RB, RC, RD, RE, 0); S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); @@ -630,38 +615,26 @@ __serpent_enc_blk_8way_avx: S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); - leaq (4*4*4)(%rsi), %rax; - - testb %cl, %cl; - jnz __enc_xor8; - - write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); - write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - - ret; - -__enc_xor8: - xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); - xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; .align 8 -.global serpent_dec_blk_8way_avx -.type serpent_dec_blk_8way_avx,@function; +.type __serpent_dec_blk8_avx,@function; -serpent_dec_blk_8way_avx: +__serpent_dec_blk8_avx: /* input: * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src + * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks + * output: + * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: decrypted blocks */ vpcmpeqd RNOT, RNOT, RNOT; - leaq (4*4*4)(%rdx), %rax; - read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); - read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); K2(RA, RB, RC, RD, RE, 32); SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); @@ -697,8 +670,85 @@ serpent_dec_blk_8way_avx: SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); - leaq (4*4*4)(%rsi), %rax; - write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); - write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); + write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); + + ret; + +.align 8 +.global serpent_ecb_enc_8way_avx +.type serpent_ecb_enc_8way_avx,@function; + +serpent_ecb_enc_8way_avx: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_enc_blk8_avx; + + store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + ret; + +.align 8 +.global serpent_ecb_dec_8way_avx +.type serpent_ecb_dec_8way_avx,@function; + +serpent_ecb_dec_8way_avx: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_dec_blk8_avx; + + store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); + + ret; + +.align 8 +.global serpent_cbc_dec_8way_avx +.type serpent_cbc_dec_8way_avx,@function; + +serpent_cbc_dec_8way_avx: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + + call __serpent_dec_blk8_avx; + + store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); + + ret; + +.align 8 +.global serpent_ctr_8way_avx +.type serpent_ctr_8way_avx,@function; + +serpent_ctr_8way_avx: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: iv (little endian, 128bit) + */ + + load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, + RD2, RK0, RK1, RK2); + + call __serpent_enc_blk8_avx; + + store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 2aa31ade1e68..52abaaf28e7f 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -42,20 +42,6 @@ #include #include -static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) -{ - u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; - unsigned int j; - - for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) - ivs[j] = src[j]; - - serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); - - for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) - u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); -} - static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) { be128 ctrblk; @@ -67,30 +53,13 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) u128_xor(dst, src, (u128 *)&ctrblk); } -static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, - le128 *iv) -{ - be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; - unsigned int i; - - for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - le128_to_be128(&ctrblks[i], iv); - le128_inc(iv); - } - - serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); -} - static const struct common_glue_ctx serpent_enc = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } + .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } }, { .num_blocks = 1, .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } @@ -103,7 +72,7 @@ static const struct common_glue_ctx serpent_ctr = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } }, { .num_blocks = 1, .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } @@ -116,7 +85,7 @@ static const struct common_glue_ctx serpent_dec = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } + .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } }, { .num_blocks = 1, .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } @@ -129,7 +98,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } }, { .num_blocks = 1, .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } @@ -193,7 +162,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); + serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); return; } @@ -210,7 +179,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); + serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); return; } diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index 432deedd2945..0da1d3e2a55c 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -6,27 +6,14 @@ #define SERPENT_PARALLEL_BLOCKS 8 -asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, const u8 *src); -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way_avx(ctx, dst, src, false); -} - -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - __serpent_enc_blk_8way_avx(ctx, dst, src, true); -} - -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) -{ - serpent_dec_blk_8way_avx(ctx, dst, src); -} +asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, + const u8 *src, le128 *iv); #endif -- cgit v1.2.3 From cf582ccedad02eb9bfdcdb25adfc800dd117b428 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Fri, 26 Oct 2012 14:48:56 +0300 Subject: crypto: camellia-x86_64 - share common functions and move structures and function definitions to header file Prepare camellia-x86_64 functions to be reused from AVX/AESNI implementation module. Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_glue.c | 80 ++++++++++----------------------- arch/x86/include/asm/crypto/camellia.h | 82 ++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 57 deletions(-) create mode 100644 arch/x86/include/asm/crypto/camellia.h (limited to 'arch/x86/include') diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 021a0086186b..5cb86ccd4acb 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -32,53 +32,24 @@ #include #include #include +#include #include -#define CAMELLIA_MIN_KEY_SIZE 16 -#define CAMELLIA_MAX_KEY_SIZE 32 -#define CAMELLIA_BLOCK_SIZE 16 -#define CAMELLIA_TABLE_BYTE_LEN 272 - -struct camellia_ctx { - u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; - u32 key_length; -}; - /* regular block cipher functions */ asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, const u8 *src, bool xor); +EXPORT_SYMBOL_GPL(__camellia_enc_blk); asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(camellia_dec_blk); /* 2-way parallel cipher functions */ asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, const u8 *src, bool xor); +EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way); asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, const u8 *src); - -static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk(ctx, dst, src, false); -} - -static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk(ctx, dst, src, true); -} - -static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk_2way(ctx, dst, src, false); -} - -static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk_2way(ctx, dst, src, true); -} +EXPORT_SYMBOL_GPL(camellia_dec_blk_2way); static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { @@ -1275,9 +1246,8 @@ static void camellia_setup192(const unsigned char *key, u64 *subkey) camellia_setup256(kk, subkey); } -static int __camellia_setkey(struct camellia_ctx *cctx, - const unsigned char *key, - unsigned int key_len, u32 *flags) +int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, + unsigned int key_len, u32 *flags) { if (key_len != 16 && key_len != 24 && key_len != 32) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; @@ -1300,6 +1270,7 @@ static int __camellia_setkey(struct camellia_ctx *cctx, return 0; } +EXPORT_SYMBOL_GPL(__camellia_setkey); static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) @@ -1308,7 +1279,7 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, &tfm->crt_flags); } -static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) +void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) { u128 iv = *src; @@ -1316,8 +1287,9 @@ static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) u128_xor(&dst[1], &dst[1], &iv); } +EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); -static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) { be128 ctrblk; @@ -1329,9 +1301,9 @@ static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); } +EXPORT_SYMBOL_GPL(camellia_crypt_ctr); -static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, - le128 *iv) +void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv) { be128 ctrblks[2]; @@ -1347,6 +1319,7 @@ static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); } +EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way); static const struct common_glue_ctx camellia_enc = { .num_funcs = 2, @@ -1464,13 +1437,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) camellia_dec_blk(ctx, srcdst, srcdst); } -struct camellia_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct camellia_ctx camellia_ctx; -}; - -static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) { struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); int err; @@ -1484,6 +1452,7 @@ static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, return lrw_init_table(&ctx->lrw_table, key + keylen - CAMELLIA_BLOCK_SIZE); } +EXPORT_SYMBOL_GPL(lrw_camellia_setkey); static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -1519,20 +1488,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return lrw_crypt(desc, dst, src, nbytes, &req); } -static void lrw_exit_tfm(struct crypto_tfm *tfm) +void lrw_camellia_exit_tfm(struct crypto_tfm *tfm) { struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); lrw_free_table(&ctx->lrw_table); } +EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm); -struct camellia_xts_ctx { - struct camellia_ctx tweak_ctx; - struct camellia_ctx crypt_ctx; -}; - -static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) { struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; @@ -1555,6 +1520,7 @@ static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, flags); } +EXPORT_SYMBOL_GPL(xts_camellia_setkey); static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -1679,7 +1645,7 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_exit = lrw_exit_tfm, + .cra_exit = lrw_camellia_exit_tfm, .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE + diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h new file mode 100644 index 000000000000..98038add801e --- /dev/null +++ b/arch/x86/include/asm/crypto/camellia.h @@ -0,0 +1,82 @@ +#ifndef ASM_X86_CAMELLIA_H +#define ASM_X86_CAMELLIA_H + +#include +#include + +#define CAMELLIA_MIN_KEY_SIZE 16 +#define CAMELLIA_MAX_KEY_SIZE 32 +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 +#define CAMELLIA_PARALLEL_BLOCKS 2 + +struct camellia_ctx { + u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + u32 key_length; +}; + +struct camellia_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct camellia_ctx camellia_ctx; +}; + +struct camellia_xts_ctx { + struct camellia_ctx tweak_ctx; + struct camellia_ctx crypt_ctx; +}; + +extern int __camellia_setkey(struct camellia_ctx *cctx, + const unsigned char *key, + unsigned int key_len, u32 *flags); + +extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); +extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm); + +extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + +/* regular block cipher functions */ +asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src); + +/* 2-way parallel cipher functions */ +asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk(ctx, dst, src, false); +} + +static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk(ctx, dst, src, true); +} + +static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk_2way(ctx, dst, src, false); +} + +static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk_2way(ctx, dst, src, true); +} + +/* glue helpers */ +extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src); +extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, + le128 *iv); +extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, + le128 *iv); + +#endif /* ASM_X86_CAMELLIA_H */ -- cgit v1.2.3