From 4e5dff41be7b5201c1c47ceb3a2a8d698516bc2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Nov 2017 10:24:58 -0700 Subject: blk-mq: improve heavily contended tag case Even with a number of waitqueues, we can get into a situation where we are heavily contended on the waitqueue lock. I got a report on spc1 where we're spending seconds doing this. Arguably the use case is nasty, I reproduce it with one device and 1000 threads banging on the device. But that doesn't mean we shouldn't be handling it better. What ends up happening is that a thread will fail to get a tag, add itself to the waitqueue, and subsequently get woken up when a tag is freed - only to find itself going back to sleep on the waitqueue. Instead of waking all threads, use an exclusive wait and wake up our sbitmap batch count instead. This seems to work well for me (massive improvement for this use case), and it survives basic testing. But I haven't fully verified it yet. An additional improvement is running the queue and checking for a new tag BEFORE needing to add ourselves to the waitqueue. Signed-off-by: Jens Axboe --- lib/sbitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 80aa8d5463fa..42b5ca0acf93 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -462,7 +462,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq) */ atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); sbq_index_atomic_inc(&sbq->wake_index); - wake_up(&ws->wait); + wake_up_nr(&ws->wait, wake_batch); } } -- cgit v1.2.3 From e80a0af4759a164214f02da157a3800753ce135f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 08:26:46 -0800 Subject: lib/scatterlist: Introduce sgl_alloc() and sgl_free() Many kernel drivers contain code that allocates and frees both a scatterlist and the pages that populate that scatterlist. Introduce functions in lib/scatterlist.c that perform these tasks instead of duplicating this functionality in multiple drivers. Only include these functions in the build if CONFIG_SGL_ALLOC=y to avoid that the kernel size increases if this functionality is not used. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/scatterlist.h | 10 +++++ lib/Kconfig | 4 ++ lib/scatterlist.c | 105 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) (limited to 'lib') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b7c83254c566..b8a7c1d1dbe3 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -276,6 +276,16 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask); +#ifdef CONFIG_SGL_ALLOC +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p); +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p); +void sgl_free_order(struct scatterlist *sgl, int order); +void sgl_free(struct scatterlist *sgl); +#endif /* CONFIG_SGL_ALLOC */ + size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip, bool to_buffer); diff --git a/lib/Kconfig b/lib/Kconfig index c5e84fbcb30b..4dd5c11366f9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -409,6 +409,10 @@ config HAS_DMA depends on !NO_DMA default y +config SGL_ALLOC + bool + default n + config DMA_NOOP_OPS bool depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7c1c55f7daaa..9afc9b432083 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -474,6 +474,111 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, } EXPORT_SYMBOL(sg_alloc_table_from_pages); +#ifdef CONFIG_SGL_ALLOC + +/** + * sgl_alloc_order - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist. Must be at least one + * @order: Second argument for alloc_pages() + * @chainable: Whether or not to allocate an extra element in the scatterlist + * for scatterlist chaining purposes + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist that have pages + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p) +{ + struct scatterlist *sgl, *sg; + struct page *page; + unsigned int nent, nalloc; + u32 elem_len; + + nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); + /* Check for integer overflow */ + if (length > (nent << (PAGE_SHIFT + order))) + return NULL; + nalloc = nent; + if (chainable) { + /* Check for integer overflow */ + if (nalloc + 1 < nalloc) + return NULL; + nalloc++; + } + sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), + (gfp & ~GFP_DMA) | __GFP_ZERO); + if (!sgl) + return NULL; + + sg_init_table(sgl, nent); + sg = sgl; + while (length) { + elem_len = min_t(u64, length, PAGE_SIZE << order); + page = alloc_pages(gfp, order); + if (!page) { + sgl_free(sgl); + return NULL; + } + + sg_set_page(sg, page, elem_len, 0); + length -= elem_len; + sg = sg_next(sg); + } + WARN_ON_ONCE(sg); + if (nent_p) + *nent_p = nent; + return sgl; +} +EXPORT_SYMBOL(sgl_alloc_order); + +/** + * sgl_alloc - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p) +{ + return sgl_alloc_order(length, 0, false, gfp, nent_p); +} +EXPORT_SYMBOL(sgl_alloc); + +/** + * sgl_free_order - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + * @order: Second argument for __free_pages() + */ +void sgl_free_order(struct scatterlist *sgl, int order) +{ + struct scatterlist *sg; + struct page *page; + + for (sg = sgl; sg; sg = sg_next(sg)) { + page = sg_page(sg); + if (page) + __free_pages(page, order); + } + kfree(sgl); +} +EXPORT_SYMBOL(sgl_free_order); + +/** + * sgl_free - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + */ +void sgl_free(struct scatterlist *sgl) +{ + sgl_free_order(sgl, 0); +} +EXPORT_SYMBOL(sgl_free); + +#endif /* CONFIG_SGL_ALLOC */ + void __sg_page_iter_start(struct sg_page_iter *piter, struct scatterlist *sglist, unsigned int nents, unsigned long pgoffset) -- cgit v1.2.3 From 8c7a8d1c4b9c30a2be3b31a2e6af1cefd45574eb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 19 Jan 2018 11:00:54 -0800 Subject: lib/scatterlist: Fix chaining support in sgl_alloc_order() This patch avoids that workloads with large block sizes (megabytes) can trigger the following call stack with the ib_srpt driver (that driver is the only driver that chains scatterlists allocated by sgl_alloc_order()): BUG: Bad page state in process kworker/0:1H pfn:2423a78 page:fffffb03d08e9e00 count:-3 mapcount:0 mapping: (null) index:0x0 flags: 0x57ffffc0000000() raw: 0057ffffc0000000 0000000000000000 0000000000000000 fffffffdffffffff raw: dead000000000100 dead000000000200 0000000000000000 0000000000000000 page dumped because: nonzero _count CPU: 0 PID: 733 Comm: kworker/0:1H Tainted: G I 4.15.0-rc7.bart+ #1 Hardware name: HP ProLiant DL380 G7, BIOS P67 08/16/2015 Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] Call Trace: dump_stack+0x5c/0x83 bad_page+0xf5/0x10f get_page_from_freelist+0xa46/0x11b0 __alloc_pages_nodemask+0x103/0x290 sgl_alloc_order+0x101/0x180 target_alloc_sgl+0x2c/0x40 [target_core_mod] srpt_alloc_rw_ctxs+0x173/0x2d0 [ib_srpt] srpt_handle_new_iu+0x61e/0x7f0 [ib_srpt] __ib_process_cq+0x55/0xa0 [ib_core] ib_cq_poll_work+0x1b/0x60 [ib_core] process_one_work+0x141/0x340 worker_thread+0x47/0x3e0 kthread+0xf5/0x130 ret_from_fork+0x1f/0x30 Fixes: e80a0af4759a ("lib/scatterlist: Introduce sgl_alloc() and sgl_free()") Reported-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Bart Van Assche Cc: Nicholas A. Bellinger Cc: Laurence Oberman Signed-off-by: Jens Axboe --- drivers/target/target_core_transport.c | 2 +- include/linux/scatterlist.h | 1 + lib/scatterlist.c | 32 +++++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index a001ba711cca..c03a78ee26cd 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2300,7 +2300,7 @@ queue_full: void target_free_sgl(struct scatterlist *sgl, int nents) { - sgl_free(sgl); + sgl_free_n_order(sgl, nents, 0); } EXPORT_SYMBOL(target_free_sgl); diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b8a7c1d1dbe3..22b2131bcdcd 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -282,6 +282,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, gfp_t gfp, unsigned int *nent_p); struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, unsigned int *nent_p); +void sgl_free_n_order(struct scatterlist *sgl, int nents, int order); void sgl_free_order(struct scatterlist *sgl, int order); void sgl_free(struct scatterlist *sgl); #endif /* CONFIG_SGL_ALLOC */ diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 9afc9b432083..53728d391d3a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -512,7 +512,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, if (!sgl) return NULL; - sg_init_table(sgl, nent); + sg_init_table(sgl, nalloc); sg = sgl; while (length) { elem_len = min_t(u64, length, PAGE_SIZE << order); @@ -526,7 +526,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, length -= elem_len; sg = sg_next(sg); } - WARN_ON_ONCE(sg); + WARN_ONCE(length, "length = %lld\n", length); if (nent_p) *nent_p = nent; return sgl; @@ -549,22 +549,44 @@ struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, EXPORT_SYMBOL(sgl_alloc); /** - * sgl_free_order - free a scatterlist and its pages + * sgl_free_n_order - free a scatterlist and its pages * @sgl: Scatterlist with one or more elements + * @nents: Maximum number of elements to free * @order: Second argument for __free_pages() + * + * Notes: + * - If several scatterlists have been chained and each chain element is + * freed separately then it's essential to set nents correctly to avoid that a + * page would get freed twice. + * - All pages in a chained scatterlist can be freed at once by setting @nents + * to a high number. */ -void sgl_free_order(struct scatterlist *sgl, int order) +void sgl_free_n_order(struct scatterlist *sgl, int nents, int order) { struct scatterlist *sg; struct page *page; + int i; - for (sg = sgl; sg; sg = sg_next(sg)) { + for_each_sg(sgl, sg, nents, i) { + if (!sg) + break; page = sg_page(sg); if (page) __free_pages(page, order); } kfree(sgl); } +EXPORT_SYMBOL(sgl_free_n_order); + +/** + * sgl_free_order - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + * @order: Second argument for __free_pages() + */ +void sgl_free_order(struct scatterlist *sgl, int order) +{ + sgl_free_n_order(sgl, INT_MAX, order); +} EXPORT_SYMBOL(sgl_free_order); /** -- cgit v1.2.3