From 86919f9dd2dbb88bbbe4e381dc8e6b9e9a1b9eb5 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Mon, 12 Nov 2018 15:22:18 -0800 Subject: lib/raid6: check for assembler SSSE3 support Allow the x86 SSSE3 recovery function to be tested in raid6test. Signed-off-by: Daniel Verkamp Signed-off-by: Shaohua Li --- lib/raid6/test/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 79777645cac9..3ab8720aa2f8 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -34,6 +34,9 @@ endif ifeq ($(IS_X86),yes) OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o + CFLAGS += $(shell echo "pshufb %xmm0, %xmm0" | \ + gcc -c -x assembler - >&/dev/null && \ + rm ./-.o && echo -DCONFIG_AS_SSSE3=1) CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ gcc -c -x assembler - >&/dev/null && \ rm ./-.o && echo -DCONFIG_AS_AVX2=1) -- cgit v1.2.3 From 0437de4fa09fe59b57d12b785e4afb73b0f34c05 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Mon, 12 Nov 2018 15:26:51 -0800 Subject: lib/raid6: sort algos in rough performance order Sort the list of RAID6 algorithms in roughly decreasing order of expected performance: newer instruction sets first (within each architecture) and wider unrollings first. This doesn't make any difference right now, since all functions are benchmarked; a follow-up change will make use of this by optionally choosing the first valid function rather than testing all of them. The Itanium raid6_intx{16,32} entries are also moved down to be near the other raid6_intx entries for clarity. Signed-off-by: Daniel Verkamp Signed-off-by: Shaohua Li --- lib/raid6/algos.c | 76 +++++++++++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'lib') diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 5065b1e7e327..a753ff56670f 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -34,64 +34,64 @@ struct raid6_calls raid6_call; EXPORT_SYMBOL_GPL(raid6_call); const struct raid6_calls * const raid6_algos[] = { -#if defined(__ia64__) - &raid6_intx16, - &raid6_intx32, -#endif #if defined(__i386__) && !defined(__arch_um__) - &raid6_mmxx1, - &raid6_mmxx2, - &raid6_sse1x1, - &raid6_sse1x2, - &raid6_sse2x1, - &raid6_sse2x2, -#ifdef CONFIG_AS_AVX2 - &raid6_avx2x1, - &raid6_avx2x2, -#endif #ifdef CONFIG_AS_AVX512 - &raid6_avx512x1, &raid6_avx512x2, + &raid6_avx512x1, #endif -#endif -#if defined(__x86_64__) && !defined(__arch_um__) - &raid6_sse2x1, - &raid6_sse2x2, - &raid6_sse2x4, #ifdef CONFIG_AS_AVX2 - &raid6_avx2x1, &raid6_avx2x2, - &raid6_avx2x4, + &raid6_avx2x1, #endif + &raid6_sse2x2, + &raid6_sse2x1, + &raid6_sse1x2, + &raid6_sse1x1, + &raid6_mmxx2, + &raid6_mmxx1, +#endif +#if defined(__x86_64__) && !defined(__arch_um__) #ifdef CONFIG_AS_AVX512 - &raid6_avx512x1, - &raid6_avx512x2, &raid6_avx512x4, + &raid6_avx512x2, + &raid6_avx512x1, +#endif +#ifdef CONFIG_AS_AVX2 + &raid6_avx2x4, + &raid6_avx2x2, + &raid6_avx2x1, #endif + &raid6_sse2x4, + &raid6_sse2x2, + &raid6_sse2x1, #endif #ifdef CONFIG_ALTIVEC - &raid6_altivec1, - &raid6_altivec2, - &raid6_altivec4, - &raid6_altivec8, - &raid6_vpermxor1, - &raid6_vpermxor2, - &raid6_vpermxor4, &raid6_vpermxor8, + &raid6_vpermxor4, + &raid6_vpermxor2, + &raid6_vpermxor1, + &raid6_altivec8, + &raid6_altivec4, + &raid6_altivec2, + &raid6_altivec1, #endif #if defined(CONFIG_S390) &raid6_s390vx8, #endif - &raid6_intx1, - &raid6_intx2, - &raid6_intx4, - &raid6_intx8, #ifdef CONFIG_KERNEL_MODE_NEON - &raid6_neonx1, - &raid6_neonx2, - &raid6_neonx4, &raid6_neonx8, + &raid6_neonx4, + &raid6_neonx2, + &raid6_neonx1, +#endif +#if defined(__ia64__) + &raid6_intx32, + &raid6_intx16, #endif + &raid6_intx8, + &raid6_intx4, + &raid6_intx2, + &raid6_intx1, NULL }; -- cgit v1.2.3 From be85f93ae2df32dea0b20908316f1d894c3e0f64 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Mon, 12 Nov 2018 15:26:52 -0800 Subject: lib/raid6: add option to skip algo benchmarking This is helpful for systems where fast startup time is important. It is especially nice to avoid benchmarking RAID functions that are never used (for example, BTRFS selects RAID6_PQ even if the parity RAID mode is not in use). This saves 250+ milliseconds of boot time on modern x86 and ARM systems with a dozen or more available implementations. The new option is defaulted to 'y' to match the previous behavior of always benchmarking on init. Signed-off-by: Daniel Verkamp Signed-off-by: Shaohua Li --- include/linux/raid/pq.h | 3 +++ lib/Kconfig | 8 ++++++++ lib/raid6/algos.c | 5 +++++ 3 files changed, 16 insertions(+) (limited to 'lib') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index d7c99161bba2..605cf46c17bd 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -70,6 +70,9 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; #define MODULE_DESCRIPTION(desc) #define subsys_initcall(x) #define module_exit(x) + +#define IS_ENABLED(x) (x) +#define CONFIG_RAID6_PQ_BENCHMARK 1 #endif /* __KERNEL__ */ /* Routine choices */ diff --git a/lib/Kconfig b/lib/Kconfig index a9965f4af4dd..fcb05305a5a2 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -10,6 +10,14 @@ menu "Library routines" config RAID6_PQ tristate +config RAID6_PQ_BENCHMARK + bool "Automatically choose fastest RAID6 PQ functions" + depends on RAID6_PQ + default y + help + Benchmark all available RAID6 PQ functions on init and choose the + fastest one. + config BITREVERSE tristate diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index a753ff56670f..7e4f7a8ffa8e 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -163,6 +163,11 @@ static inline const struct raid6_calls *raid6_choose_gen( if ((*algo)->valid && !(*algo)->valid()) continue; + if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { + best = *algo; + break; + } + perf = 0; preempt_disable(); -- cgit v1.2.3