summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-13 08:50:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-13 08:50:16 -0700
commit39a5101f989e8d2be557136704d53990f9b402c8 (patch)
treeb9c16c6f32508939111fb6d0159d7450713a5f33 /arch/x86
parent865c50e1d279671728c2936cb7680eb89355eeea (diff)
parent3093e7c16e12d729c325adb3c53dde7308cefbd8 (diff)
downloadlinux-39a5101f989e8d2be557136704d53990f9b402c8.tar.bz2
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Allow DRBG testing through user-space af_alg - Add tcrypt speed testing support for keyed hashes - Add type-safe init/exit hooks for ahash Algorithms: - Mark arc4 as obsolete and pending for future removal - Mark anubis, khazad, sead and tea as obsolete - Improve boot-time xor benchmark - Add OSCCA SM2 asymmetric cipher algorithm and use it for integrity Drivers: - Fixes and enhancement for XTS in caam - Add support for XIP8001B hwrng in xiphera-trng - Add RNG and hash support in sun8i-ce/sun8i-ss - Allow imx-rngc to be used by kernel entropy pool - Use crypto engine in omap-sham - Add support for Ingenic X1830 with ingenic" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (205 commits) X.509: Fix modular build of public_key_sm2 crypto: xor - Remove unused variable count in do_xor_speed X.509: fix error return value on the failed path crypto: bcm - Verify GCM/CCM key length in setkey crypto: qat - drop input parameter from adf_enable_aer() crypto: qat - fix function parameters descriptions crypto: atmel-tdes - use semicolons rather than commas to separate statements crypto: drivers - use semicolons rather than commas to separate statements hwrng: mxc-rnga - use semicolons rather than commas to separate statements hwrng: iproc-rng200 - use semicolons rather than commas to separate statements hwrng: stm32 - use semicolons rather than commas to separate statements crypto: xor - use ktime for template benchmarking crypto: xor - defer load time benchmark to a later time crypto: hisilicon/zip - fix the uninitalized 'curr_qm_qp_num' crypto: hisilicon/zip - fix the return value when device is busy crypto: hisilicon/zip - fix zero length input in GZIP decompress crypto: hisilicon/zip - fix the uncleared debug registers lib/mpi: Fix unused variable warnings crypto: x86/poly1305 - Remove assignments with no effect hwrng: npcm - modify readl to readb ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/blake2s-glue.c1
-rw-r--r--arch/x86/crypto/chacha_glue.c1
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c18
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c69
-rw-r--r--arch/x86/crypto/nhpoly1305-avx2-glue.c1
-rw-r--r--arch/x86/crypto/nhpoly1305-sse2-glue.c1
-rw-r--r--arch/x86/crypto/poly1305-x86_64-cryptogams.pl8
-rw-r--r--arch/x86/crypto/poly1305_glue.c4
8 files changed, 50 insertions, 53 deletions
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 6737bcea1fa1..c025a01cf708 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -11,6 +11,7 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index e67a59130025..7b3a1cf0984b 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -12,6 +12,7 @@
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index d2d069bd459b..feccb5254c7e 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -28,9 +28,9 @@
#define SCALE_F sizeof(unsigned long)
#ifdef CONFIG_X86_64
-#define REX_PRE "0x48, "
+#define CRC32_INST "crc32q %1, %q0"
#else
-#define REX_PRE
+#define CRC32_INST "crc32l %1, %0"
#endif
#ifdef CONFIG_X86_64
@@ -48,11 +48,8 @@ asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
- __asm__ __volatile__(
- ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
- :"=S"(crc)
- :"0"(crc), "c"(*data)
- );
+ asm("crc32b %1, %0"
+ : "+r" (crc) : "rm" (*data));
data++;
}
@@ -66,11 +63,8 @@ static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len
unsigned long *ptmp = (unsigned long *)p;
while (iquotient--) {
- __asm__ __volatile__(
- ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
- :"=S"(crc)
- :"0"(crc), "c"(*ptmp)
- );
+ asm(CRC32_INST
+ : "+r" (crc) : "rm" (*ptmp));
ptmp++;
}
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index 8acbb6584a37..5af8021b98ce 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -11,6 +11,7 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/scatterlist.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
@@ -45,11 +46,11 @@ static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
asm volatile(
/* Clear registers to propagate the carry bit */
- " xor %%r8, %%r8;"
- " xor %%r9, %%r9;"
- " xor %%r10, %%r10;"
- " xor %%r11, %%r11;"
- " xor %1, %1;"
+ " xor %%r8d, %%r8d;"
+ " xor %%r9d, %%r9d;"
+ " xor %%r10d, %%r10d;"
+ " xor %%r11d, %%r11d;"
+ " xor %k1, %k1;"
/* Begin addition chain */
" addq 0(%3), %0;"
@@ -93,7 +94,7 @@ static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
" cmovc %0, %%rax;"
/* Step 2: Add carry*38 to the original sum */
- " xor %%rcx, %%rcx;"
+ " xor %%ecx, %%ecx;"
" add %%rax, %%r8;"
" adcx %%rcx, %%r9;"
" movq %%r9, 8(%1);"
@@ -165,28 +166,28 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Compute src1[0] * src2 */
" movq 0(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 8(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 16(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 24(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
@@ -200,7 +201,7 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 32(%1), %%r8, %%r13;"
- " xor %3, %3;"
+ " xor %k3, %k3;"
" adoxq 0(%1), %%r8;"
" mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -246,28 +247,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Compute src1[0] * src2 */
" movq 0(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 8(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 16(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 24(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
@@ -277,29 +278,29 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Compute src1[0] * src2 */
" movq 32(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 64(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 40(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 48(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 56(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
@@ -312,7 +313,7 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 32(%1), %%r8, %%r13;"
- " xor %3, %3;"
+ " xor %k3, %k3;"
" adoxq 0(%1), %%r8;"
" mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -345,7 +346,7 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 96(%1), %%r8, %%r13;"
- " xor %3, %3;"
+ " xor %k3, %k3;"
" adoxq 64(%1), %%r8;"
" mulxq 104(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -516,7 +517,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute all partial products */
" movq 0(%1), %%rdx;" /* f[0] */
- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 24(%1), %%rdx;" /* f[3] */
@@ -526,7 +527,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
- " xor %%r15, %%r15;"
+ " xor %%r15d, %%r15d;"
" adox %%rax, %%r10;"
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
@@ -563,7 +564,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 32(%1), %%r8, %%r13;"
- " xor %%rcx, %%rcx;"
+ " xor %%ecx, %%ecx;"
" adoxq 0(%1), %%r8;"
" mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -607,7 +608,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
asm volatile(
/* Step 1: Compute all partial products */
" movq 0(%1), %%rdx;" /* f[0] */
- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 24(%1), %%rdx;" /* f[3] */
@@ -617,7 +618,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
- " xor %%r15, %%r15;"
+ " xor %%r15d, %%r15d;"
" adox %%rax, %%r10;"
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
@@ -647,7 +648,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute all partial products */
" movq 32(%1), %%rdx;" /* f[0] */
- " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
" mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 56(%1), %%rdx;" /* f[3] */
@@ -657,7 +658,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
- " xor %%r15, %%r15;"
+ " xor %%r15d, %%r15d;"
" adox %%rax, %%r10;"
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
@@ -692,7 +693,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 32(%1), %%r8, %%r13;"
- " xor %%rcx, %%rcx;"
+ " xor %%ecx, %%ecx;"
" adoxq 0(%1), %%r8;"
" mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -725,7 +726,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 96(%1), %%r8, %%r13;"
- " xor %%rcx, %%rcx;"
+ " xor %%ecx, %%ecx;"
" adoxq 64(%1), %%r8;"
" mulxq 104(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index 80fcb85736e1..8ea5ab0f1ca7 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -10,6 +10,7 @@
#include <crypto/internal/simd.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/simd.h>
asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index cc6b7c1a2705..2b353d42ed13 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -10,6 +10,7 @@
#include <crypto/internal/simd.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/simd.h>
asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 137edcf038cb..7d568012cc15 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
___
&declare_function("poly1305_init_x86_64", 32, 3);
$code.=<<___;
- xor %rax,%rax
+ xor %eax,%eax
mov %rax,0($ctx) # initialize hash value
mov %rax,8($ctx)
mov %rax,16($ctx)
@@ -2853,7 +2853,7 @@ $code.=<<___;
.type poly1305_init_base2_44,\@function,3
.align 32
poly1305_init_base2_44:
- xor %rax,%rax
+ xor %eax,%eax
mov %rax,0($ctx) # initialize hash value
mov %rax,8($ctx)
mov %rax,16($ctx)
@@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
mov \$16,$len
sub %r10,$len
xor %eax,%eax
- xor %r11,%r11
+ xor %r11d,%r11d
.Loop_dec_byte:
mov ($inp,$otp),%r11b
mov ($otp),%al
@@ -4085,7 +4085,7 @@ avx_handler:
.long 0xa548f3fc # cld; rep movsq
mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
+ xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER
mov 8(%rsi),%rdx # arg2, disp->ImageBase
mov 0(%rsi),%r8 # arg3, disp->ControlPc
mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index dfe921efa9b2..e508dbd91813 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -11,6 +11,7 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/intel-family.h>
#include <asm/simd.h>
@@ -157,9 +158,6 @@ static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
dctx->s[1] = get_unaligned_le32(&inp[4]);
dctx->s[2] = get_unaligned_le32(&inp[8]);
dctx->s[3] = get_unaligned_le32(&inp[12]);
- inp += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
- acc += POLY1305_BLOCK_SIZE;
dctx->sset = true;
}
}