summaryrefslogtreecommitdiffstats
path: root/arch/arm64/crypto/ghash-ce-core.S
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2014-03-26 20:53:05 +0100
committerArd Biesheuvel <ard.biesheuvel@linaro.org>2014-05-14 10:04:07 -0700
commitfdd2389457b209a9723c3be818fcf301f35db906 (patch)
tree086cdabe6082cf998ef5ca07fb9235ad04c1d8d6 /arch/arm64/crypto/ghash-ce-core.S
parent6ba6c74dfc6bcf43312ef572592f7d4ebb3aedfa (diff)
downloadlinux-fdd2389457b209a9723c3be818fcf301f35db906.tar.bz2
arm64/crypto: GHASH secure hash using ARMv8 Crypto Extensions
This is a port to ARMv8 (Crypto Extensions) of the Intel implementation of the GHASH Secure Hash (used in the Galois/Counter chaining mode). It relies on the optional PMULL/PMULL2 instruction (polynomial multiply long, what Intel call carry-less multiply). Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm64/crypto/ghash-ce-core.S')
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S95
1 files changed, 95 insertions, 0 deletions
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..b9e6eaf41c9b
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -0,0 +1,95 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
+ *
+ * Copyright (c) 2009 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ * Vinodh Gopal
+ * Erdinc Ozturk
+ * Deniz Karakoyunlu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ DATA .req v0
+ SHASH .req v1
+ IN1 .req v2
+ T1 .req v2
+ T2 .req v3
+ T3 .req v4
+ VZR .req v5
+
+ .text
+ .arch armv8-a+crypto
+
+ /*
+ * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ * struct ghash_key const *k, const char *head)
+ */
+ENTRY(pmull_ghash_update)
+ ld1 {DATA.16b}, [x1]
+ ld1 {SHASH.16b}, [x3]
+ eor VZR.16b, VZR.16b, VZR.16b
+
+ /* do the head block first, if supplied */
+ cbz x4, 0f
+ ld1 {IN1.2d}, [x4]
+ b 1f
+
+0: ld1 {IN1.2d}, [x2], #16
+ sub w0, w0, #1
+1: ext IN1.16b, IN1.16b, IN1.16b, #8
+CPU_LE( rev64 IN1.16b, IN1.16b )
+ eor DATA.16b, DATA.16b, IN1.16b
+
+ /* multiply DATA by SHASH in GF(2^128) */
+ ext T2.16b, DATA.16b, DATA.16b, #8
+ ext T3.16b, SHASH.16b, SHASH.16b, #8
+ eor T2.16b, T2.16b, DATA.16b
+ eor T3.16b, T3.16b, SHASH.16b
+
+ pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1
+ pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0
+ pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0)
+ eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0)
+ eor T2.16b, T2.16b, DATA.16b
+
+ ext T3.16b, VZR.16b, T2.16b, #8
+ ext T2.16b, T2.16b, VZR.16b, #8
+ eor DATA.16b, DATA.16b, T3.16b
+ eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of
+ // carry-less multiplication
+
+ /* first phase of the reduction */
+ shl T3.2d, DATA.2d, #1
+ eor T3.16b, T3.16b, DATA.16b
+ shl T3.2d, T3.2d, #5
+ eor T3.16b, T3.16b, DATA.16b
+ shl T3.2d, T3.2d, #57
+ ext T2.16b, VZR.16b, T3.16b, #8
+ ext T3.16b, T3.16b, VZR.16b, #8
+ eor DATA.16b, DATA.16b, T2.16b
+ eor T1.16b, T1.16b, T3.16b
+
+ /* second phase of the reduction */
+ ushr T2.2d, DATA.2d, #5
+ eor T2.16b, T2.16b, DATA.16b
+ ushr T2.2d, T2.2d, #1
+ eor T2.16b, T2.16b, DATA.16b
+ ushr T2.2d, T2.2d, #1
+ eor T1.16b, T1.16b, T2.16b
+ eor DATA.16b, DATA.16b, T1.16b
+
+ cbnz w0, 0b
+
+ st1 {DATA.16b}, [x1]
+ ret
+ENDPROC(pmull_ghash_update)