From 34644524bce91883d5051a7eaf3ec5464ed149bf Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Fri, 6 Feb 2015 19:15:27 +0530 Subject: lib: devres: add a helper function for ioremap_wc Implement a resource managed writecombine ioremap function. Signed-off-by: Abhilash Kesavan Acked-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- lib/devres.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'lib') diff --git a/lib/devres.c b/lib/devres.c index 0f1dd2e9d2c1..fbe2aac522e6 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -71,6 +71,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, } EXPORT_SYMBOL(devm_ioremap_nocache); +/** + * devm_ioremap_wc - Managed ioremap_wc() + * @dev: Generic device to remap IO address for + * @offset: BUS offset to map + * @size: Size of map + * + * Managed ioremap_wc(). Map is automatically unmapped on driver detach. + */ +void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, + resource_size_t size) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioremap_wc(offset, size); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioremap_wc); + /** * devm_iounmap - Managed iounmap() * @dev: Generic device to unmap for -- cgit v1.2.3 From 8d7dc9283f399e1fda4e48a1c453f689326d9396 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Apr 2015 19:33:59 -0700 Subject: rcu: Control grace-period delays directly from value In a misguided attempt to avoid an #ifdef, the use of the gp_init_delay module parameter was conditioned on the corresponding RCU_TORTURE_TEST_SLOW_INIT Kconfig variable, using IS_ENABLED() at the point of use in the code. This meant that the compiler always saw the delay, which meant that RCU_TORTURE_TEST_SLOW_INIT_DELAY had to be unconditionally defined. This in turn caused "make oldconfig" to ask pointless questions about the value of RCU_TORTURE_TEST_SLOW_INIT_DELAY in cases where it was not even used. This commit avoids these pointless questions by defining gp_init_delay under #ifdef. In one branch, gp_init_delay is initialized to RCU_TORTURE_TEST_SLOW_INIT_DELAY and is also a module parameter (thus allowing boot-time modification), and in the other branch gp_init_delay is a const variable initialized by default to zero. This approach also simplifies the code at the delay point by eliminating the IS_DEFINED(). Because gp_init_delay is constant zero in the no-delay case intended for production use, the "gp_init_delay > 0" check causes the delay to become dead code, as desired in this case. In addition, this commit replaces magic constant "10" with the preprocessor variable PER_RCU_NODE_PERIOD, which controls the number of grace periods that are allowed to elapse at full speed before a delay is inserted. Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 16 +++++++++------- lib/Kconfig.debug | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 233165da782f..8cf7304b2867 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -162,11 +162,14 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; module_param(kthread_prio, int, 0644); -/* Delay in jiffies for grace-period initialization delays. */ -static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) - ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY - : 0; +/* Delay in jiffies for grace-period initialization delays, debug only. */ +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT +static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; module_param(gp_init_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +static const int gp_init_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ /* * Track the rcutorture test sequence number and the update version @@ -1843,9 +1846,8 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); ACCESS_ONCE(rsp->gp_activity) = jiffies; - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) && - gp_init_delay > 0 && - !(rsp->gpnum % (rcu_num_nodes * 10))) + if (gp_init_delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD))) schedule_timeout_uninterruptible(gp_init_delay); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1ad74c0df01f..5f5ff7d7e5eb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1268,6 +1268,7 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY int "How much to slow down RCU grace-period initialization" range 0 5 default 3 + depends on RCU_TORTURE_TEST_SLOW_INIT help This option specifies the number of jiffies to wait between each rcu_node structure initialization. -- cgit v1.2.3 From b0cc836d306c12462a60e72aae8f6d2318f10817 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sun, 19 Apr 2015 13:13:30 -0400 Subject: iommu-common: fix x86_64 compiler warnings Declare iommu_large_alloc as static. Remove extern definition for iommu_tbl_pool_init(). Signed-off-by: Sowmini Varadhan Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck Signed-off-by: David S. Miller --- lib/iommu-common.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/iommu-common.c b/lib/iommu-common.c index a1a517cba7ec..a9a53f566237 100644 --- a/lib/iommu-common.c +++ b/lib/iommu-common.c @@ -15,7 +15,7 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0x0) #endif -unsigned long iommu_large_alloc = 15; +static unsigned long iommu_large_alloc = 15; static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); @@ -53,12 +53,12 @@ static void setup_iommu_pool_hash(void) * the top 1/4 of the table will be set aside for pool allocations * of more than iommu_large_alloc pages. */ -extern void iommu_tbl_pool_init(struct iommu_map_table *iommu, - unsigned long num_entries, - u32 table_shift, - void (*lazy_flush)(struct iommu_map_table *), - bool large_pool, u32 npools, - bool skip_span_boundary_check) +void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check) { unsigned int start, i; struct iommu_pool *p = &(iommu->large_pool); -- cgit v1.2.3 From 7b3372d4c2bced80598771aab8fea87c40ebb52a Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sun, 19 Apr 2015 13:13:31 -0400 Subject: iommu-common: rename iommu_pool_hash to iommu_hash_common When CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set, the DEFINE_PER_CPU_SECTION macro will define an extern __pcpu_unique_##name variable that could conflict with the same definition in powerpc at this time. Avoid that conflict by renaming iommu_pool_hash in iommu-common.c Thanks to Guenter Roeck for catching this, and helping to test the fix. Signed-off-by: Sowmini Varadhan Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck Signed-off-by: David S. Miller --- lib/iommu-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/iommu-common.c b/lib/iommu-common.c index a9a53f566237..df30632f0bef 100644 --- a/lib/iommu-common.c +++ b/lib/iommu-common.c @@ -17,7 +17,7 @@ static unsigned long iommu_large_alloc = 15; -static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); +static DEFINE_PER_CPU(unsigned int, iommu_hash_common); static inline bool need_flush(struct iommu_map_table *iommu) { @@ -44,7 +44,7 @@ static void setup_iommu_pool_hash(void) return; do_once = true; for_each_possible_cpu(i) - per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); + per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS); } /* @@ -106,7 +106,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, unsigned long mask, unsigned int align_order) { - unsigned int pool_hash = __this_cpu_read(iommu_pool_hash); + unsigned int pool_hash = __this_cpu_read(iommu_hash_common); unsigned long n, end, start, limit, boundary_size; struct iommu_pool *pool; int pass = 0; -- cgit v1.2.3 From fe5cbc6e06c7d8b3a86f6f5491d74766bb5c2827 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Mon, 15 Dec 2014 12:57:04 +1100 Subject: md/raid6 algorithms: delta syndrome functions v3: s-o-b comment, explanation of performance and descision for the start/stop implementation Implementing rmw functionality for RAID6 requires optimized syndrome calculation. Up to now we can only generate a complete syndrome. The target P/Q pages are always overwritten. With this patch we provide a framework for inplace P/Q modification. In the first place simply fill those functions with NULL values. xor_syndrome() has two additional parameters: start & stop. These will indicate the first and last page that are changing during a rmw run. That makes it possible to avoid several unneccessary loops and speed up calculation. The caller needs to implement the following logic to make the functions work. 1) xor_syndrome(disks, start, stop, ...): "Remove" all data of source blocks inside P/Q between (and including) start and end. 2) modify any block with start <= block <= stop 3) xor_syndrome(disks, start, stop, ...): "Reinsert" all data of source blocks into P/Q between (and including) start and end. Pages between start and stop that won't be changed should be filled with a pointer to the kernel zero page. The reasons for not taking NULL pages are: 1) Algorithms cross the whole source data line by line. Thus avoid additional branches. 2) Having a NULL page avoids calculating the XOR P parity but still need calulation steps for the Q parity. Depending on the algorithm unrolling that might be only a difference of 2 instructions per loop. The benchmark numbers of the gen_syndrome() functions are displayed in the kernel log. Do the same for the xor_syndrome() functions. This will help to analyze performance problems and give an rough estimate how well the algorithm works. The choice of the fastest algorithm will still depend on the gen_syndrome() performance. With the start/stop page implementation the speed can vary a lot in real life. E.g. a change of page 0 & page 15 on a stripe will be harder to compute than the case where page 0 & page 1 are XOR candidates. To be not to enthusiatic about the expected speeds we will run a worse case test that simulates a change on the upper half of the stripe. So we do: 1) calculation of P/Q for the upper pages 2) continuation of Q for the lower (empty) pages Signed-off-by: Markus Stockhausen Signed-off-by: NeilBrown --- include/linux/raid/pq.h | 1 + lib/raid6/algos.c | 41 ++++++++++++++++++++++++++++++++++------- lib/raid6/altivec.uc | 1 + lib/raid6/avx2.c | 3 +++ lib/raid6/int.uc | 3 ++- lib/raid6/mmx.c | 2 ++ lib/raid6/neon.c | 1 + lib/raid6/sse1.c | 2 ++ lib/raid6/sse2.c | 3 +++ lib/raid6/tilegx.uc | 1 + 10 files changed, 50 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 73069cb6c54a..a7a06d1dcf9c 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -72,6 +72,7 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; /* Routine choices */ struct raid6_calls { void (*gen_syndrome)(int, size_t, void **); + void (*xor_syndrome)(int, int, int, size_t, void **); int (*valid)(void); /* Returns 1 if this routine set is usable */ const char *name; /* Name of this routine set */ int prefer; /* Has special performance attribute */ diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index dbef2314901e..975c6e0434bd 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) { - unsigned long perf, bestperf, j0, j1; + unsigned long perf, bestgenperf, bestxorperf, j0, j1; + int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; - for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { if (!best || (*algo)->prefer >= best->prefer) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen( } preempt_enable(); - if (perf > bestperf) { - bestperf = perf; + if (perf > bestgenperf) { + bestgenperf = perf; best = *algo; } - pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, + pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + + if (!(*algo)->xor_syndrome) + continue; + + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ((j1 = jiffies) == j0) + cpu_relax(); + while (time_before(jiffies, + j1 + (1<xor_syndrome(disks, start, stop, + PAGE_SIZE, *dptrs); + perf++; + } + preempt_enable(); + + if (best == *algo) + bestxorperf = perf; + + pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); } } if (best) { - pr_info("raid6: using algorithm %s (%ld MB/s)\n", + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", best->name, - (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + if (best->xor_syndrome) + pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", + (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); raid6_call = *best; } else pr_err("raid6: Yikes! No algorithm found!\n"); diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 7cc12b532e95..bec27fce7501 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -119,6 +119,7 @@ int raid6_have_altivec(void) const struct raid6_calls raid6_altivec$# = { raid6_altivec$#_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_altivec, "altivecx$#", 0 diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index bc3b1dd436eb..76734004358d 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c @@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x1 = { raid6_avx21_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x1", 1 /* Has cache hints */ @@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x2 = { raid6_avx22_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x2", 1 /* Has cache hints */ @@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x4 = { raid6_avx24_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x4", 1 /* Has cache hints */ diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index 5b50f8dfc5d2..5ca60bee1388 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc @@ -109,7 +109,8 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_intx$# = { raid6_int$#_gen_syndrome, - NULL, /* always valid */ + NULL, /* XOR not yet implemented */ + NULL, /* always valid */ "int" NSTRING "x$#", 0 }; diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c index 590c71c9e200..b3b0e1fcd3af 100644 --- a/lib/raid6/mmx.c +++ b/lib/raid6/mmx.c @@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_mmxx1 = { raid6_mmx1_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_mmx, "mmxx1", 0 @@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_mmxx2 = { raid6_mmx2_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_mmx, "mmxx2", 0 diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c index 36ad4705df1a..d9ad6ee284f4 100644 --- a/lib/raid6/neon.c +++ b/lib/raid6/neon.c @@ -42,6 +42,7 @@ } \ struct raid6_calls const raid6_neonx ## _n = { \ raid6_neon ## _n ## _gen_syndrome, \ + NULL, /* XOR not yet implemented */ \ raid6_have_neon, \ "neonx" #_n, \ 0 \ diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c index f76297139445..9025b8ca9aa3 100644 --- a/lib/raid6/sse1.c +++ b/lib/raid6/sse1.c @@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse1x1 = { raid6_sse11_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse1_or_mmxext, "sse1x1", 1 /* Has cache hints */ @@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse1x2 = { raid6_sse12_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse1_or_mmxext, "sse1x2", 1 /* Has cache hints */ diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index 85b82c85f28e..31acd59a0ef7 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c @@ -90,6 +90,7 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse2x1 = { raid6_sse21_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse2, "sse2x1", 1 /* Has cache hints */ @@ -152,6 +153,7 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse2x2 = { raid6_sse22_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse2, "sse2x2", 1 /* Has cache hints */ @@ -250,6 +252,7 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse2x4 = { raid6_sse24_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse2, "sse2x4", 1 /* Has cache hints */ diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc index e7c29459cbcd..2dd291a11264 100644 --- a/lib/raid6/tilegx.uc +++ b/lib/raid6/tilegx.uc @@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_tilegx$# = { raid6_tilegx$#_gen_syndrome, + NULL, /* XOR not yet implemented */ NULL, "tilegx$#", 0 -- cgit v1.2.3 From 7e92e1d7629b00578cef22b1f4c6ada726663701 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Mon, 15 Dec 2014 12:57:04 +1100 Subject: md/raid6 algorithms: improve test program It is always helpful to have a test tool in place if we implement new data critical algorithms. So add some test routines to the raid6 checker that can prove if the new xor_syndrome() works as expected. Run through all permutations of start/stop pages per algorithm and simulate a xor_syndrome() assisted rmw run. After each rmw check if the recovery algorithm still confirms that the stripe is fine. Signed-off-by: Markus Stockhausen Signed-off-by: NeilBrown --- lib/raid6/test/test.c | 51 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 5a485b7a7d3c..3bebbabdb510 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -28,11 +28,11 @@ char *dataptrs[NDISKS]; char data[NDISKS][PAGE_SIZE]; char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; -static void makedata(void) +static void makedata(int start, int stop) { int i, j; - for (i = 0; i < NDISKS; i++) { + for (i = start; i <= stop; i++) { for (j = 0; j < PAGE_SIZE; j++) data[i][j] = rand(); @@ -91,34 +91,55 @@ int main(int argc, char *argv[]) { const struct raid6_calls *const *algo; const struct raid6_recov_calls *const *ra; - int i, j; + int i, j, p1, p2; int err = 0; - makedata(); + makedata(0, NDISKS-1); for (ra = raid6_recov_algos; *ra; ra++) { if ((*ra)->valid && !(*ra)->valid()) continue; + raid6_2data_recov = (*ra)->data2; raid6_datap_recov = (*ra)->datap; printf("using recovery %s\n", (*ra)->name); for (algo = raid6_algos; *algo; algo++) { - if (!(*algo)->valid || (*algo)->valid()) { - raid6_call = **algo; + if ((*algo)->valid && !(*algo)->valid()) + continue; + + raid6_call = **algo; + + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + + if (!raid6_call.xor_syndrome) + continue; + + for (p1 = 0; p1 < NDISKS-2; p1++) + for (p2 = p1; p2 < NDISKS-2; p2++) { - /* Nuke syndromes */ - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + /* Simulate rmw run */ + raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, + (void **)&dataptrs); + makedata(p1, p2); + raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, + (void **)&dataptrs); - /* Generate assumed good syndrome */ - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, - (void **)&dataptrs); + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } - for (i = 0; i < NDISKS-1; i++) - for (j = i+1; j < NDISKS; j++) - err += test_disks(i, j); - } } printf("\n"); } -- cgit v1.2.3 From 9a5ce91d053961b7cc8fa56bd083819a9fc92734 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Mon, 15 Dec 2014 12:57:04 +1100 Subject: md/raid6 algorithms: xor_syndrome() for generic int Start the algorithms with the very basic one. It is left and right optimized. That means we can avoid all calculations for unneeded pages above the right stop offset. For pages below the left start offset we still need the syndrome multiplication but without reading data pages. Signed-off-by: Markus Stockhausen Signed-off-by: NeilBrown --- lib/raid6/int.uc | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index 5ca60bee1388..558aeac9342a 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc @@ -107,9 +107,47 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) } } +static void raid6_int$#_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + /* P/Q data pages */ + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= start ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ ^= wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + w1$$ ^= w2$$; + wq$$ = w1$$ ^ wd$$; + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + wq$$ = w1$$ ^ w2$$; + } + *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + } + +} + const struct raid6_calls raid6_intx$# = { raid6_int$#_gen_syndrome, - NULL, /* XOR not yet implemented */ + raid6_int$#_xor_syndrome, NULL, /* always valid */ "int" NSTRING "x$#", 0 -- cgit v1.2.3 From a582564b24bec0443b5c5ff43ee6d1258f8bd658 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Mon, 15 Dec 2014 12:57:05 +1100 Subject: md/raid6 algorithms: xor_syndrome() for SSE2 The second and (last) optimized XOR syndrome calculation. This version supports right and left side optimization. All CPUs with architecture older than Haswell will benefit from it. It should be noted that SSE2 movntdq kills performance for memory areas that are read and written simultaneously in chunks smaller than cache line size. So use movdqa instead for P/Q writes in sse21 and sse22 XOR functions. Signed-off-by: Markus Stockhausen Signed-off-by: NeilBrown --- lib/raid6/sse2.c | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 227 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index 31acd59a0ef7..1d2276b007ee 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c @@ -88,9 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + +static void raid6_sse21_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("pxor %xmm4,%xmm2"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm5,%xmm4"); + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + } + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + /* Don't use movntdq for r/w memory area < cache line */ + asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); + asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + const struct raid6_calls raid6_sse2x1 = { raid6_sse21_gen_syndrome, - NULL, /* XOR not yet implemented */ + raid6_sse21_xor_syndrome, raid6_have_sse2, "sse2x1", 1 /* Has cache hints */ @@ -151,9 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + static void raid6_sse22_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 32 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); + asm volatile("pxor %xmm4,%xmm2"); + asm volatile("pxor %xmm6,%xmm3"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + } + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); + /* Don't use movntdq for r/w memory area < cache line */ + asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); + asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); + asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); + } + const struct raid6_calls raid6_sse2x2 = { raid6_sse22_gen_syndrome, - NULL, /* XOR not yet implemented */ + raid6_sse22_xor_syndrome, raid6_have_sse2, "sse2x2", 1 /* Has cache hints */ @@ -250,9 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + static void raid6_sse24_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 64 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); + asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32])); + asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); + asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32])); + asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48])); + asm volatile("pxor %xmm4,%xmm2"); + asm volatile("pxor %xmm6,%xmm3"); + asm volatile("pxor %xmm12,%xmm10"); + asm volatile("pxor %xmm14,%xmm11"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm13,%xmm10"); + asm volatile("pxor %xmm15,%xmm11"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + } + asm volatile("prefetchnta %0" :: "m" (q[d])); + asm volatile("prefetchnta %0" :: "m" (q[d+32])); + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); + asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32])); + asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48])); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); + } + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); + } + + const struct raid6_calls raid6_sse2x4 = { raid6_sse24_gen_syndrome, - NULL, /* XOR not yet implemented */ + raid6_sse24_xor_syndrome, raid6_have_sse2, "sse2x4", 1 /* Has cache hints */ -- cgit v1.2.3 From e2307ed6cbe71c74e291681aaa7e92ab98bc3177 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 22 Apr 2015 09:41:45 +0200 Subject: rhashtable: Schedule async resize when sync realloc fails When rhashtable_insert_rehash() fails with ENOMEM, this indicates that we can't allocate the necessary memory in the current context but the limits as set by the user would still allow to grow. Thus attempt an async resize in the background where we can allocate using GFP_KERNEL which is more likely to succeed. The insertion itself will still fail to indicate pressure. This fixes a bug where the table would never continue growing once the utilization is above 100%. Fixes: ccd57b1bd324 ("rhashtable: Add immediate rehash during insertion") Signed-off-by: Thomas Graf Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4898442b837f..f648cfde8520 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -410,8 +410,13 @@ int rhashtable_insert_rehash(struct rhashtable *ht) return -EBUSY; new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); - if (new_tbl == NULL) + if (new_tbl == NULL) { + /* Schedule async resize/rehash to try allocation + * non-atomic context. + */ + schedule_work(&ht->run_work); return -ENOMEM; + } err = rhashtable_rehash_attach(ht, tbl, new_tbl); if (err) { -- cgit v1.2.3 From a87b9ebf1709687ff213091d0fdb4254b1564803 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 22 Apr 2015 09:41:46 +0200 Subject: rhashtable: Do not schedule more than one rehash if we can't grow further The current code currently only stops inserting rehashes into the chain when no resizes are currently scheduled. As long as resizes are scheduled and while inserting above the utilization watermark, more and more rehashes will be scheduled. This lead to a perfect DoS storm with thousands of rehashes scheduled which lead to thousands of spinlocks to be taken sequentially. Instead, only allow either a series of resizes or a single rehash. Drop any further rehashes and return -EBUSY. Fixes: ccd57b1bd324 ("rhashtable: Add immediate rehash during insertion") Signed-off-by: Thomas Graf Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index f648cfde8520..b28df4019ade 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -405,8 +405,8 @@ int rhashtable_insert_rehash(struct rhashtable *ht) if (rht_grow_above_75(ht, tbl)) size *= 2; - /* More than two rehashes (not resizes) detected. */ - else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) + /* Do not schedule more than one rehash */ + else if (old_tbl != tbl) return -EBUSY; new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); -- cgit v1.2.3 From 7829fb09a2b4268b30dd9bc782fa5ebee278b137 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Apr 2015 04:13:52 +0200 Subject: lib: make memzero_explicit more robust against dead store elimination In commit 0b053c951829 ("lib: memzero_explicit: use barrier instead of OPTIMIZER_HIDE_VAR"), we made memzero_explicit() more robust in case LTO would decide to inline memzero_explicit() and eventually find out it could be elimiated as dead store. While using barrier() works well for the case of gcc, recent efforts from LLVMLinux people suggest to use llvm as an alternative to gcc, and there, Stephan found in a simple stand-alone user space example that llvm could nevertheless optimize and thus elimitate the memset(). A similar issue has been observed in the referenced llvm bug report, which is regarded as not-a-bug. Based on some experiments, icc is a bit special on its own, while it doesn't seem to eliminate the memset(), it could do so with an own implementation, and then result in similar findings as with llvm. The fix in this patch now works for all three compilers (also tested with more aggressive optimization levels). Arguably, in the current kernel tree it's more of a theoretical issue, but imho, it's better to be pedantic about it. It's clearly visible with gcc/llvm though, with the below code: if we would have used barrier() only here, llvm would have omitted clearing, not so with barrier_data() variant: static inline void memzero_explicit(void *s, size_t count) { memset(s, 0, count); barrier_data(s); } int main(void) { char buff[20]; memzero_explicit(buff, sizeof(buff)); return 0; } $ gcc -O2 test.c $ gdb a.out (gdb) disassemble main Dump of assembler code for function main: 0x0000000000400400 <+0>: lea -0x28(%rsp),%rax 0x0000000000400405 <+5>: movq $0x0,-0x28(%rsp) 0x000000000040040e <+14>: movq $0x0,-0x20(%rsp) 0x0000000000400417 <+23>: movl $0x0,-0x18(%rsp) 0x000000000040041f <+31>: xor %eax,%eax 0x0000000000400421 <+33>: retq End of assembler dump. $ clang -O2 test.c $ gdb a.out (gdb) disassemble main Dump of assembler code for function main: 0x00000000004004f0 <+0>: xorps %xmm0,%xmm0 0x00000000004004f3 <+3>: movaps %xmm0,-0x18(%rsp) 0x00000000004004f8 <+8>: movl $0x0,-0x8(%rsp) 0x0000000000400500 <+16>: lea -0x18(%rsp),%rax 0x0000000000400505 <+21>: xor %eax,%eax 0x0000000000400507 <+23>: retq End of assembler dump. As gcc, clang, but also icc defines __GNUC__, it's sufficient to define this in compiler-gcc.h only to be picked up. For a fallback or otherwise unsupported compiler, we define it as a barrier. Similarly, for ecc which does not support gcc inline asm. Reference: https://llvm.org/bugs/show_bug.cgi?id=15495 Reported-by: Stephan Mueller Tested-by: Stephan Mueller Signed-off-by: Daniel Borkmann Cc: Theodore Ts'o Cc: Stephan Mueller Cc: Hannes Frederic Sowa Cc: mancha security Cc: Mark Charlebois Cc: Behan Webster Signed-off-by: Herbert Xu --- include/linux/compiler-gcc.h | 16 +++++++++++++++- include/linux/compiler-intel.h | 3 +++ include/linux/compiler.h | 4 ++++ lib/string.c | 2 +- 4 files changed, 23 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cdf13ca7cac3..371e560d13cf 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -9,10 +9,24 @@ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) - /* Optimization barrier */ + /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proofed that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") /* * This macro obfuscates arithmetic on a variable address so that gcc diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index ba147a1727e6..0c9a2f2c2802 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,9 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier_data(ptr) barrier() + #define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ __ptr = (unsigned long) (ptr); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0e41ca0e5927..867722591be2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -169,6 +169,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +#ifndef barrier_data +# define barrier_data(ptr) barrier() +#endif + /* Unreachable code */ #ifndef unreachable # define unreachable() do { } while (1) diff --git a/lib/string.c b/lib/string.c index a5792019193c..bb3d4b6993c4 100644 --- a/lib/string.c +++ b/lib/string.c @@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset); void memzero_explicit(void *s, size_t count) { memset(s, 0, count); - barrier(); + barrier_data(s); } EXPORT_SYMBOL(memzero_explicit); -- cgit v1.2.3 From 7d616e4ddb9c0754ed6245a43332d5b867e4db11 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 5 May 2015 16:23:33 -0700 Subject: lib: delete lib/find_last_bit.c The file lib/find_last_bit.c was no longer used and supposed to be deleted by commit 8f6f19dd51 ("lib: move find_last_bit to lib/find_next_bit.c") but that delete didn't happen. This gets rid of it. Signed-off-by: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/find_last_bit.c | 41 ----------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 lib/find_last_bit.c (limited to 'lib') diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c deleted file mode 100644 index 3e3be40c6a6e..000000000000 --- a/lib/find_last_bit.c +++ /dev/null @@ -1,41 +0,0 @@ -/* find_last_bit.c: fallback find next bit implementation - * - * Copyright (C) 2008 IBM Corporation - * Written by Rusty Russell - * (Inspired by David Howell's find_next_bit implementation) - * - * Rewritten by Yury Norov to decrease - * size and improve performance, 2015. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include - -#ifndef find_last_bit - -unsigned long find_last_bit(const unsigned long *addr, unsigned long size) -{ - if (size) { - unsigned long val = BITMAP_LAST_WORD_MASK(size); - unsigned long idx = (size-1) / BITS_PER_LONG; - - do { - val &= addr[idx]; - if (val) - return idx * BITS_PER_LONG + __fls(val); - - val = ~0ul; - } while (idx--); - } - return size; -} -EXPORT_SYMBOL(find_last_bit); - -#endif -- cgit v1.2.3 From 01e76903f655a4d88c2e09d3182436c65f6e1213 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 16:23:38 -0700 Subject: kasan: show gcc version requirements in Kconfig and Documentation The documentation shows a need for gcc > 4.9.2, but it's really >=. The Kconfig entries don't show require versions so add them. Correct a latter/later typo too. Also mention that gcc 5 required to catch out of bounds accesses to global and stack variables. Signed-off-by: Joe Perches Signed-off-by: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kasan.txt | 8 +++++--- lib/Kconfig.kasan | 8 ++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index 092fc10961fe..4692241789b1 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -9,7 +9,9 @@ a fast and comprehensive solution for finding use-after-free and out-of-bounds bugs. KASan uses compile-time instrumentation for checking every memory access, -therefore you will need a certain version of GCC > 4.9.2 +therefore you will need a gcc version of 4.9.2 or later. KASan could detect out +of bounds accesses to stack or global variables, but only if gcc 5.0 or later was +used to built the kernel. Currently KASan is supported only for x86_64 architecture and requires that the kernel be built with the SLUB allocator. @@ -23,8 +25,8 @@ To enable KASAN configure kernel with: and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline is compiler instrumentation types. The former produces smaller binary the -latter is 1.1 - 2 times faster. Inline instrumentation requires GCC 5.0 or -latter. +latter is 1.1 - 2 times faster. Inline instrumentation requires a gcc version +of 5.0 or later. Currently KASAN works only with the SLUB memory allocator. For better bug detection and nicer report, enable CONFIG_STACKTRACE and put diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4fecaedc80a2..777eda7d1ab4 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -10,8 +10,11 @@ config KASAN help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. - This is strictly debugging feature. It consumes about 1/8 - of available memory and brings about ~x3 performance slowdown. + This is strictly a debugging feature and it requires a gcc version + of 4.9.2 or later. Detection of out of bounds accesses to stack or + global variables requires gcc 5.0 or later. + This feature consumes about 1/8 of available memory and brings about + ~x3 performance slowdown. For better error detection enable CONFIG_STACKTRACE, and add slub_debug=U to boot cmdline. @@ -40,6 +43,7 @@ config KASAN_INLINE memory accesses. This is faster than outline (in some workloads it gives about x2 boost over outline instrumentation), but make kernel's .text size much bigger. + This requires a gcc version of 5.0 or later. endchoice -- cgit v1.2.3