summaryrefslogtreecommitdiffstats
path: root/crypto/polyval-generic.c
diff options
context:
space:
mode:
authorNathan Huckleberry <nhuck@google.com>2022-05-20 18:14:59 +0000
committerHerbert Xu <herbert@gondor.apana.org.au>2022-06-10 16:40:17 +0800
commit34f7f6c3011276313383099156be287ac745bcea (patch)
tree54cceaf783df3b379afd1b6c52a699c5ed8301c4 /crypto/polyval-generic.c
parentc0eb7591c1ed9cbdb0ad796bb56aed13748b55fa (diff)
downloadlinux-34f7f6c3011276313383099156be287ac745bcea.tar.bz2
crypto: x86/polyval - Add PCLMULQDQ accelerated implementation of POLYVAL
Add hardware accelerated version of POLYVAL for x86-64 CPUs with PCLMULQDQ support. This implementation is accelerated using PCLMULQDQ instructions to perform the finite field computations. For added efficiency, 8 blocks of the message are processed simultaneously by precomputing the first 8 powers of the key. Schoolbook multiplication is used instead of Karatsuba multiplication because it was found to be slightly faster on x86-64 machines. Montgomery reduction must be used instead of Barrett reduction due to the difference in modulus between POLYVAL's field and other finite fields. More information on POLYVAL can be found in the HCTR2 paper: "Length-preserving encryption with HCTR2": https://eprint.iacr.org/2021/1441.pdf Signed-off-by: Nathan Huckleberry <nhuck@google.com> Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/polyval-generic.c')
-rw-r--r--crypto/polyval-generic.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/crypto/polyval-generic.c b/crypto/polyval-generic.c
index bf2b03b7bfc0..16bfa6925b31 100644
--- a/crypto/polyval-generic.c
+++ b/crypto/polyval-generic.c
@@ -76,6 +76,46 @@ static void copy_and_reverse(u8 dst[POLYVAL_BLOCK_SIZE],
put_unaligned(swab64(b), (u64 *)&dst[0]);
}
+/*
+ * Performs multiplication in the POLYVAL field using the GHASH field as a
+ * subroutine. This function is used as a fallback for hardware accelerated
+ * implementations when simd registers are unavailable.
+ *
+ * Note: This function is not used for polyval-generic, instead we use the 4k
+ * lookup table implementation for finite field multiplication.
+ */
+void polyval_mul_non4k(u8 *op1, const u8 *op2)
+{
+ be128 a, b;
+
+ // Assume one argument is in Montgomery form and one is not.
+ copy_and_reverse((u8 *)&a, op1);
+ copy_and_reverse((u8 *)&b, op2);
+ gf128mul_x_lle(&a, &a);
+ gf128mul_lle(&a, &b);
+ copy_and_reverse(op1, (u8 *)&a);
+}
+EXPORT_SYMBOL_GPL(polyval_mul_non4k);
+
+/*
+ * Perform a POLYVAL update using non4k multiplication. This function is used
+ * as a fallback for hardware accelerated implementations when simd registers
+ * are unavailable.
+ *
+ * Note: This function is not used for polyval-generic, instead we use the 4k
+ * lookup table implementation of finite field multiplication.
+ */
+void polyval_update_non4k(const u8 *key, const u8 *in,
+ size_t nblocks, u8 *accumulator)
+{
+ while (nblocks--) {
+ crypto_xor(accumulator, in, POLYVAL_BLOCK_SIZE);
+ polyval_mul_non4k(accumulator, key);
+ in += POLYVAL_BLOCK_SIZE;
+ }
+}
+EXPORT_SYMBOL_GPL(polyval_update_non4k);
+
static int polyval_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{