summaryrefslogtreecommitdiffstats
path: root/lib/raid6/x86.h
diff options
context:
space:
mode:
authorJim Kukunas <james.t.kukunas@linux.intel.com>2012-11-08 13:47:44 -0800
committerNeilBrown <neilb@suse.de>2012-12-13 16:42:01 +1100
commit7056741fd9fc14a65608549a4657cf5178f05f63 (patch)
treeb30504208f8261c4a0a2625169eaff9aa9de544e /lib/raid6/x86.h
parent54f89341e8b8da0cdac8a7b873491739de19f098 (diff)
downloadlinux-7056741fd9fc14a65608549a4657cf5178f05f63.tar.bz2
lib/raid6: Add AVX2 optimized recovery functions
Optimize RAID6 recovery functions to take advantage of the 256-bit YMM integer instructions introduced in AVX2. The patch was tested and benchmarked before submission. However hardware is not yet released so benchmark numbers cannot be reported. Acked-by: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'lib/raid6/x86.h')
-rw-r--r--lib/raid6/x86.h14
1 files changed, 9 insertions, 5 deletions
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index d55d63232c55..b7595484a815 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void)
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
/* Should work well enough on modern CPUs for testing */
static inline int boot_cpu_has(int flag)
{
- u32 eax = (flag & 0x20) ? 0x80000001 : 1;
- u32 ecx, edx;
+ u32 eax, ebx, ecx, edx;
+
+ eax = (flag & 0x100) ? 7 :
+ (flag & 0x20) ? 0x80000001 : 1;
+ ecx = 0;
asm volatile("cpuid"
- : "+a" (eax), "=d" (edx), "=c" (ecx)
- : : "ebx");
+ : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx));
- return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
+ return ((flag & 0x100 ? ebx :
+ (flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1;
}
#endif /* ndef __KERNEL__ */