summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/include/asm/ppc-opcode.h
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2016-07-01 08:19:45 +1000
committerHerbert Xu <herbert@gondor.apana.org.au>2016-07-05 23:05:19 +0800
commit6dd7a82cc54ebd2936763befd3dcd4beb727a704 (patch)
tree4077ad30a801a6e7dcc0b915612e2c815239fd9b /arch/powerpc/include/asm/ppc-opcode.h
parent151f25112ff7befc134ed3fc58b0ff8792b3169e (diff)
downloadlinux-6dd7a82cc54ebd2936763befd3dcd4beb727a704.tar.bz2
crypto: powerpc - Add POWER8 optimised crc32c
Use the vector polynomial multiply-sum instructions in POWER8 to speed up crc32c. This is just over 41x faster than the slice-by-8 method that it replaces. Measurements on a 4.1 GHz POWER8 show it sustaining 52 GiB/sec. A simple btrfs write performance test: dd if=/dev/zero of=/mnt/tmpfile bs=1M count=4096 sync is over 3.7x faster. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/powerpc/include/asm/ppc-opcode.h')
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h12
1 files changed, 12 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1d035c1cc889..49cd8760aa7c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -174,6 +174,8 @@
#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1fffff
#define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6
#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff
+#define PPC_INST_MFVSRD 0x7c000066
+#define PPC_INST_MTVSRD 0x7c000166
#define PPC_INST_SLBFEE 0x7c0007a7
#define PPC_INST_STRING 0x7c00042a
@@ -188,6 +190,8 @@
#define PPC_INST_WAIT 0x7c00007c
#define PPC_INST_TLBIVAX 0x7c000624
#define PPC_INST_TLBSRX_DOT 0x7c0006a5
+#define PPC_INST_VPMSUMW 0x10000488
+#define PPC_INST_VPMSUMD 0x100004c8
#define PPC_INST_XXLOR 0xf0000510
#define PPC_INST_XXSWAPD 0xf0000250
#define PPC_INST_XVCPSGNDP 0xf0000780
@@ -359,6 +363,14 @@
VSX_XX1((s), a, b))
#define LXVD2X(s, a, b) stringify_in_c(.long PPC_INST_LXVD2X | \
VSX_XX1((s), a, b))
+#define MFVRD(a, t) stringify_in_c(.long PPC_INST_MFVSRD | \
+ VSX_XX1((t)+32, a, R0))
+#define MTVRD(t, a) stringify_in_c(.long PPC_INST_MTVSRD | \
+ VSX_XX1((t)+32, a, R0))
+#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMW | \
+ VSX_XX3((t), a, b))
+#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMD | \
+ VSX_XX3((t), a, b))
#define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \
VSX_XX3((t), a, b))
#define XXSWAPD(t, a) stringify_in_c(.long PPC_INST_XXSWAPD | \