crypto: x86/polyval - Add PCLMULQDQ accelerated implementation of POLYVAL

Add hardware accelerated version of POLYVAL for x86-64 CPUs with PCLMULQDQ support. This implementation is accelerated using PCLMULQDQ instructions to perform the finite field computations. For added efficiency, 8 blocks of the message are processed simultaneously by precomputing the first 8 powers of the key. Schoolbook multiplication is used instead of Karatsuba multiplication because it was found to be slightly faster on x86-64 machines. Montgomery reduction must be used instead of Barrett reduction due to the difference in modulus between POLYVAL's field and other finite fields. More information on POLYVAL can be found in the HCTR2 paper: "Length-preserving encryption with HCTR2": https://eprint.iacr.org/2021/1441.pdf Signed-off-by: Nathan Huckleberry <nhuck@google.com> Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Nathan Huckleberry <nhuck@google.com> 2022-05-20 18:14:59 +0000
committer: Herbert Xu <herbert@gondor.apana.org.au> 2022-06-10 16:40:17 +0800
commit: 34f7f6c3011276313383099156be287ac745bcea (patch)
tree: 54cceaf783df3b379afd1b6c52a699c5ed8301c4 /crypto/polyval-generic.c
parent: c0eb7591c1ed9cbdb0ad796bb56aed13748b55fa (diff)
download: linux-34f7f6c3011276313383099156be287ac745bcea.tar.bz2
1 files changed, 40 insertions, 0 deletions
diff --git a/crypto/polyval-generic.c b/crypto/polyval-generic.c
index bf2b03b7bfc0..16bfa6925b31 100644
--- a/crypto/polyval-generic.c
+++ b/crypto/polyval-generic.c
@@ -76,6 +76,46 @@ static void copy_and_reverse(u8 dst[POLYVAL_BLOCK_SIZE],
 	put_unaligned(swab64(b), (u64 *)&dst[0]);
 }
 
+/*
+ * Performs multiplication in the POLYVAL field using the GHASH field as a
+ * subroutine.  This function is used as a fallback for hardware accelerated
+ * implementations when simd registers are unavailable.
+ *
+ * Note: This function is not used for polyval-generic, instead we use the 4k
+ * lookup table implementation for finite field multiplication.
+ */
+void polyval_mul_non4k(u8 *op1, const u8 *op2)
+{
+	be128 a, b;
+
+	// Assume one argument is in Montgomery form and one is not.
+	copy_and_reverse((u8 *)&a, op1);
+	copy_and_reverse((u8 *)&b, op2);
+	gf128mul_x_lle(&a, &a);
+	gf128mul_lle(&a, &b);
+	copy_and_reverse(op1, (u8 *)&a);
+}
+EXPORT_SYMBOL_GPL(polyval_mul_non4k);
+
+/*
+ * Perform a POLYVAL update using non4k multiplication.  This function is used
+ * as a fallback for hardware accelerated implementations when simd registers
+ * are unavailable.
+ *
+ * Note: This function is not used for polyval-generic, instead we use the 4k
+ * lookup table implementation of finite field multiplication.
+ */
+void polyval_update_non4k(const u8 *key, const u8 *in,
+			  size_t nblocks, u8 *accumulator)
+{
+	while (nblocks--) {
+		crypto_xor(accumulator, in, POLYVAL_BLOCK_SIZE);
+		polyval_mul_non4k(accumulator, key);
+		in += POLYVAL_BLOCK_SIZE;
+	}
+}
+EXPORT_SYMBOL_GPL(polyval_update_non4k);
+
 static int polyval_setkey(struct crypto_shash *tfm,
 			  const u8 *key, unsigned int keylen)
 {
author	Nathan Huckleberry <nhuck@google.com>	2022-05-20 18:14:59 +0000
committer	Herbert Xu <herbert@gondor.apana.org.au>	2022-06-10 16:40:17 +0800
commit	34f7f6c3011276313383099156be287ac745bcea (patch)
tree	54cceaf783df3b379afd1b6c52a699c5ed8301c4 /crypto/polyval-generic.c
parent	c0eb7591c1ed9cbdb0ad796bb56aed13748b55fa (diff)
download	linux-34f7f6c3011276313383099156be287ac745bcea.tar.bz2