diff options
author | Israel Rukshin <israelr@mellanox.com> | 2019-11-24 18:38:32 +0200 |
---|---|---|
committer | Keith Busch <kbusch@kernel.org> | 2019-11-27 02:14:19 +0900 |
commit | 52e6d8ed16fdf9f1d2923a2b036222a5ac834b1d (patch) | |
tree | 23b7589ed985efafe34fe252e1e5e96a54197398 /drivers/nvme | |
parent | b1ae1a238900474a9f51431c0f7f169ade1faa19 (diff) | |
download | linux-52e6d8ed16fdf9f1d2923a2b036222a5ac834b1d.tar.bz2 |
nvmet-loop: Avoid preallocating big SGL for data
nvme_loop_create_io_queues() preallocates a big buffer for the IO SGL based
on SG_CHUNK_SIZE.
Modern DMA engines are often capable of dealing with very big segments so
the SG_CHUNK_SIZE is often too big. SG_CHUNK_SIZE results in a static 4KB
SGL allocation per command.
If a controller has lots of deep queues, preallocation for the sg list can
consume substantial amounts of memory. For nvmet-loop, nr_hw_queues can be
128 and each queue's depth 128. This means the resulting preallocation
for the data SGL is 128*128*4K = 64MB per controller.
Switch to runtime allocation for SGL for lists longer than 2 entries. This
is the approach used by NVMe PCI so it should be reasonable for NVMeOF as
well. Runtime SGL allocation has always been the case for the legacy I/O
path so this is nothing new.
Tested-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/target/loop.c | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index a758bb3d5dd4..4df4ebde208a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -76,7 +76,7 @@ static void nvme_loop_complete_rq(struct request *req) { struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); - sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE); + sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT); nvme_complete_rq(req); } @@ -156,7 +156,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->sg_table.sgl = iod->first_sgl; if (sg_alloc_table_chained(&iod->sg_table, blk_rq_nr_phys_segments(req), - iod->sg_table.sgl, SG_CHUNK_SIZE)) { + iod->sg_table.sgl, NVME_INLINE_SG_CNT)) { nvme_cleanup_cmd(req); return BLK_STS_RESOURCE; } @@ -342,7 +342,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; @@ -516,7 +516,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); ctrl->tag_set.driver_data = ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; |