From b4f42e2831ff9b9fa19252265d7c8985d47eefb9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Apr 2014 09:46:28 -0600 Subject: block: remove struct request buffer member This was used in the olden days, back when onions were proper yellow. Basically it mapped to the current buffer to be transferred. With highmem being added more than a decade ago, most drivers map pages out of a bio, and rq->buffer isn't pointing at anything valid. Convert old style drivers to just use bio_data(). For the discard payload use case, just reference the page in the bio. Signed-off-by: Jens Axboe --- drivers/block/amiflop.c | 2 +- drivers/block/ataflop.c | 2 +- drivers/block/floppy.c | 18 +++++++++--------- drivers/block/hd.c | 10 +++++----- drivers/block/mg_disk.c | 12 ++++++------ drivers/block/paride/pcd.c | 2 +- drivers/block/paride/pd.c | 4 ++-- drivers/block/paride/pf.c | 4 ++-- drivers/block/skd_main.c | 5 ++--- drivers/block/swim.c | 2 +- drivers/block/swim3.c | 6 +++--- drivers/block/xen-blkfront.c | 4 ++-- drivers/block/xsysace.c | 4 ++-- drivers/block/z2ram.c | 6 ++++-- 14 files changed, 41 insertions(+), 40 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 748dea4f34dc..758da2287d9a 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1406,7 +1406,7 @@ next_segment: track = block / (floppy->dtype->sects * floppy->type->sect_mult); sector = block % (floppy->dtype->sects * floppy->type->sect_mult); - data = rq->buffer + 512 * cnt; + data = bio_data(rq->bio) + 512 * cnt; #ifdef DEBUG printk("access to track %d, sector %d, with buffer at " "0x%08lx\n", track, sector, data); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 96b629e1f0c9..7e8a55f8917c 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1484,7 +1484,7 @@ repeat: ReqCnt = 0; ReqCmd = rq_data_dir(fd_request); ReqBlock = blk_rq_pos(fd_request); - ReqBuffer = fd_request->buffer; + ReqBuffer = bio_data(fd_request->bio); setup_req_params( drive ); do_fd_action( drive ); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8f5565bf34cd..5f69c910c3ac 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2351,7 +2351,7 @@ static void rw_interrupt(void) } if (CT(COMMAND) != FD_READ || - raw_cmd->kernel_data == current_req->buffer) { + raw_cmd->kernel_data == bio_data(current_req->bio)) { /* transfer directly from buffer */ cont->done(1); } else if (CT(COMMAND) == FD_READ) { @@ -2640,7 +2640,7 @@ static int make_raw_rw_request(void) raw_cmd->flags &= ~FD_RAW_WRITE; raw_cmd->flags |= FD_RAW_READ; COMMAND = FM_MODE(_floppy, FD_READ); - } else if ((unsigned long)current_req->buffer < MAX_DMA_ADDRESS) { + } else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) { unsigned long dma_limit; int direct, indirect; @@ -2654,13 +2654,13 @@ static int make_raw_rw_request(void) */ max_size = buffer_chain_size(); dma_limit = (MAX_DMA_ADDRESS - - ((unsigned long)current_req->buffer)) >> 9; + ((unsigned long)bio_data(current_req->bio))) >> 9; if ((unsigned long)max_size > dma_limit) max_size = dma_limit; /* 64 kb boundaries */ - if (CROSS_64KB(current_req->buffer, max_size << 9)) + if (CROSS_64KB(bio_data(current_req->bio), max_size << 9)) max_size = (K_64 - - ((unsigned long)current_req->buffer) % + ((unsigned long)bio_data(current_req->bio)) % K_64) >> 9; direct = transfer_size(ssize, max_sector, max_size) - fsector_t; /* @@ -2677,7 +2677,7 @@ static int make_raw_rw_request(void) (DP->read_track & (1 << DRS->probed_format)))))) { max_size = blk_rq_sectors(current_req); } else { - raw_cmd->kernel_data = current_req->buffer; + raw_cmd->kernel_data = bio_data(current_req->bio); raw_cmd->length = current_count_sectors << 9; if (raw_cmd->length == 0) { DPRINT("%s: zero dma transfer attempted\n", __func__); @@ -2731,7 +2731,7 @@ static int make_raw_rw_request(void) raw_cmd->length = ((raw_cmd->length - 1) | (ssize - 1)) + 1; raw_cmd->length <<= 9; if ((raw_cmd->length < current_count_sectors << 9) || - (raw_cmd->kernel_data != current_req->buffer && + (raw_cmd->kernel_data != bio_data(current_req->bio) && CT(COMMAND) == FD_WRITE && (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max || aligned_sector_t < buffer_min)) || @@ -2739,7 +2739,7 @@ static int make_raw_rw_request(void) raw_cmd->length <= 0 || current_count_sectors <= 0) { DPRINT("fractionary current count b=%lx s=%lx\n", raw_cmd->length, current_count_sectors); - if (raw_cmd->kernel_data != current_req->buffer) + if (raw_cmd->kernel_data != bio_data(current_req->bio)) pr_info("addr=%d, length=%ld\n", (int)((raw_cmd->kernel_data - floppy_track_buffer) >> 9), @@ -2756,7 +2756,7 @@ static int make_raw_rw_request(void) return 0; } - if (raw_cmd->kernel_data != current_req->buffer) { + if (raw_cmd->kernel_data != bio_data(current_req->bio)) { if (raw_cmd->kernel_data < floppy_track_buffer || current_count_sectors < 0 || raw_cmd->length < 0 || diff --git a/drivers/block/hd.c b/drivers/block/hd.c index bf397bf108b7..8a290c08262f 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -464,11 +464,11 @@ static void read_intr(void) ok_to_read: req = hd_req; - insw(HD_DATA, req->buffer, 256); + insw(HD_DATA, bio_data(req->bio), 256); #ifdef DEBUG printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", req->rq_disk->disk_name, blk_rq_pos(req) + 1, - blk_rq_sectors(req) - 1, req->buffer+512); + blk_rq_sectors(req) - 1, bio_data(req->bio)+512); #endif if (hd_end_request(0, 512)) { SET_HANDLER(&read_intr); @@ -505,7 +505,7 @@ static void write_intr(void) ok_to_write: if (hd_end_request(0, 512)) { SET_HANDLER(&write_intr); - outsw(HD_DATA, req->buffer, 256); + outsw(HD_DATA, bio_data(req->bio), 256); return; } @@ -624,7 +624,7 @@ repeat: printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", req->rq_disk->disk_name, req_data_dir(req) == READ ? "read" : "writ", - cyl, head, sec, nsect, req->buffer); + cyl, head, sec, nsect, bio_data(req->bio)); #endif if (req->cmd_type == REQ_TYPE_FS) { switch (rq_data_dir(req)) { @@ -643,7 +643,7 @@ repeat: bad_rw_intr(); goto repeat; } - outsw(HD_DATA, req->buffer, 256); + outsw(HD_DATA, bio_data(req->bio), 256); break; default: printk("unknown hd-command\n"); diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index eb59b1241366..e352cac707e8 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -479,7 +479,7 @@ static unsigned int mg_out(struct mg_host *host, static void mg_read_one(struct mg_host *host, struct request *req) { - u16 *buff = (u16 *)req->buffer; + u16 *buff = (u16 *)bio_data(req->bio); u32 i; for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) @@ -496,7 +496,7 @@ static void mg_read(struct request *req) mg_bad_rw_intr(host); MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - blk_rq_sectors(req), blk_rq_pos(req), req->buffer); + blk_rq_sectors(req), blk_rq_pos(req), bio_data(req->bio)); do { if (mg_wait(host, ATA_DRQ, @@ -514,7 +514,7 @@ static void mg_read(struct request *req) static void mg_write_one(struct mg_host *host, struct request *req) { - u16 *buff = (u16 *)req->buffer; + u16 *buff = (u16 *)bio_data(req->bio); u32 i; for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) @@ -534,7 +534,7 @@ static void mg_write(struct request *req) } MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - rem, blk_rq_pos(req), req->buffer); + rem, blk_rq_pos(req), bio_data(req->bio)); if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { @@ -585,7 +585,7 @@ ok_to_read: mg_read_one(host, req); MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", - blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); + blk_rq_pos(req), blk_rq_sectors(req) - 1, bio_data(req->bio)); /* send read confirm */ outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); @@ -624,7 +624,7 @@ ok_to_write: /* write 1 sector and set handler if remains */ mg_write_one(host, req); MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", - blk_rq_pos(req), blk_rq_sectors(req), req->buffer); + blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio)); host->mg_do_intr = mg_write_intr; mod_timer(&host->timer, jiffies + 3 * HZ); } diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index e76bdc074dbe..719cb1bc1640 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -747,7 +747,7 @@ static void do_pcd_request(struct request_queue * q) pcd_current = cd; pcd_sector = blk_rq_pos(pcd_req); pcd_count = blk_rq_cur_sectors(pcd_req); - pcd_buf = pcd_req->buffer; + pcd_buf = bio_data(pcd_req->bio); pcd_busy = 1; ps_set_intr(do_pcd_read, NULL, 0, nice); return; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 19ad8f0c83ef..fea7e76a00de 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -454,7 +454,7 @@ static enum action do_pd_io_start(void) if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) return Fail; pd_run = blk_rq_sectors(pd_req); - pd_buf = pd_req->buffer; + pd_buf = bio_data(pd_req->bio); pd_retries = 0; if (pd_cmd == READ) return do_pd_read_start(); @@ -485,7 +485,7 @@ static int pd_next_buf(void) spin_lock_irqsave(&pd_lock, saved_flags); __blk_end_request_cur(pd_req, 0); pd_count = blk_rq_cur_sectors(pd_req); - pd_buf = pd_req->buffer; + pd_buf = bio_data(pd_req->bio); spin_unlock_irqrestore(&pd_lock, saved_flags); return 0; } diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index f5c86d523ba0..9a15fd3c9349 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -795,7 +795,7 @@ repeat: } pf_cmd = rq_data_dir(pf_req); - pf_buf = pf_req->buffer; + pf_buf = bio_data(pf_req->bio); pf_retries = 0; pf_busy = 1; @@ -827,7 +827,7 @@ static int pf_next_buf(void) if (!pf_req) return 1; pf_count = blk_rq_cur_sectors(pf_req); - pf_buf = pf_req->buffer; + pf_buf = bio_data(pf_req->bio); } return 0; } diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index a69dd93d1bd5..36bcedfd930c 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -563,7 +563,6 @@ skd_prep_discard_cdb(struct skd_scsi_request *scsi_req, req = skreq->req; blk_add_request_payload(req, page, len); - req->buffer = buf; } static void skd_request_fn_not_online(struct request_queue *q); @@ -856,10 +855,10 @@ static void skd_end_request(struct skd_device *skdev, if ((io_flags & REQ_DISCARD) && (skreq->discard_page == 1)) { + struct bio *bio = req->bio; pr_debug("%s:%s:%d, free the page!", skdev->name, __func__, __LINE__); - free_page((unsigned long)req->buffer); - req->buffer = NULL; + __free_page(bio->bi_io_vec->bv_page); } if (unlikely(error)) { diff --git a/drivers/block/swim.c b/drivers/block/swim.c index b02d53a399f3..6b44bbe528b7 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -549,7 +549,7 @@ static void redo_fd_request(struct request_queue *q) case READ: err = floppy_read_sectors(fs, blk_rq_pos(req), blk_rq_cur_sectors(req), - req->buffer); + bio_data(req->bio)); break; } done: diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index c74f7b56e7c4..523ee8fd4c15 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -342,7 +342,7 @@ static void start_request(struct floppy_state *fs) swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", req->rq_disk->disk_name, req->cmd, (long)blk_rq_pos(req), blk_rq_sectors(req), - req->buffer); + bio_data(req->bio)); swim3_dbg(" errors=%d current_nr_sectors=%u\n", req->errors, blk_rq_cur_sectors(req)); #endif @@ -479,11 +479,11 @@ static inline void setup_transfer(struct floppy_state *fs) /* Set up 3 dma commands: write preamble, data, postamble */ init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble)); ++cp; - init_dma(cp, OUTPUT_MORE, req->buffer, 512); + init_dma(cp, OUTPUT_MORE, bio_data(req->bio), 512); ++cp; init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble)); } else { - init_dma(cp, INPUT_LAST, req->buffer, n * 512); + init_dma(cp, INPUT_LAST, bio_data(req->bio), n * 512); } ++cp; out_le16(&cp->command, DBDMA_STOP); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index efe1b4761735..283a30e88287 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -612,10 +612,10 @@ static void do_blkif_request(struct request_queue *rq) } pr_debug("do_blk_req %p: cmd %p, sec %lx, " - "(%u/%u) buffer:%p [%s]\n", + "(%u/%u) [%s]\n", req, req->cmd, (unsigned long)blk_rq_pos(req), blk_rq_cur_sectors(req), blk_rq_sectors(req), - req->buffer, rq_data_dir(req) ? "write" : "read"); + rq_data_dir(req) ? "write" : "read"); if (blkif_queue_request(req)) { blk_requeue_request(rq, req); diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 1393b8871a28..ab3ea62e5dfc 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -661,7 +661,7 @@ static void ace_fsm_dostate(struct ace_device *ace) rq_data_dir(req)); ace->req = req; - ace->data_ptr = req->buffer; + ace->data_ptr = bio_data(req->bio); ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR; ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF); @@ -733,7 +733,7 @@ static void ace_fsm_dostate(struct ace_device *ace) * blk_rq_sectors(ace->req), * blk_rq_cur_sectors(ace->req)); */ - ace->data_ptr = ace->req->buffer; + ace->data_ptr = bio_data(ace->req->bio); ace->data_count = blk_rq_cur_sectors(ace->req) * 16; ace_fsm_yieldirq(ace); break; diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 27de5046708a..968f9e52effa 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -87,13 +87,15 @@ static void do_z2_request(struct request_queue *q) while (len) { unsigned long addr = start & Z2RAM_CHUNKMASK; unsigned long size = Z2RAM_CHUNKSIZE - addr; + void *buffer = bio_data(req->bio); + if (len < size) size = len; addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; if (rq_data_dir(req) == READ) - memcpy(req->buffer, (char *)addr, size); + memcpy(buffer, (char *)addr, size); else - memcpy((char *)addr, req->buffer, size); + memcpy((char *)addr, buffer, size); start += size; len -= size; } -- cgit v1.2.3 From 9d74e25737d73e93ccddeb5a61bcd56b7b8eb57b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Apr 2014 10:30:07 +0200 Subject: blk-mq: do not initialize req->special Drivers can reach their private data easily using the blk_mq_rq_to_pdu helper and don't need req->special. By not initializing it code can be simplified nicely, and we also shave off a few more instructions from the I/O path. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 10 ++-------- block/blk-mq.c | 15 ++------------- block/blk-mq.h | 1 - drivers/block/null_blk.c | 4 ++-- drivers/block/virtio_blk.c | 6 +++--- 5 files changed, 9 insertions(+), 27 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 77f20458910c..0e42adcfb55e 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -306,22 +306,16 @@ static bool blk_kick_flush(struct request_queue *q) */ q->flush_pending_idx ^= 1; + blk_rq_init(q, q->flush_rq); if (q->mq_ops) { - struct blk_mq_ctx *ctx = first_rq->mq_ctx; - struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); - - blk_mq_rq_init(hctx, q->flush_rq); - q->flush_rq->mq_ctx = ctx; - /* * Reuse the tag value from the fist waiting request, * with blk-mq the tag is generated during request * allocation and drivers can rely on it being inside * the range they asked for. */ + q->flush_rq->mq_ctx = first_rq->mq_ctx; q->flush_rq->tag = first_rq->tag; - } else { - blk_rq_init(q, q->flush_rq); } q->flush_rq->cmd_type = REQ_TYPE_FS; diff --git a/block/blk-mq.c b/block/blk-mq.c index 658428a28faf..d9d0984d2f01 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -251,24 +251,13 @@ struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, } EXPORT_SYMBOL(blk_mq_alloc_reserved_request); -/* - * Re-init and set pdu, if we have it - */ -void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) -{ - blk_rq_init(hctx->queue, rq); - - if (hctx->cmd_size) - rq->special = blk_mq_rq_to_pdu(rq); -} - static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct request *rq) { const int tag = rq->tag; struct request_queue *q = rq->q; - blk_mq_rq_init(hctx, rq); + blk_rq_init(hctx->queue, rq); blk_mq_put_tag(hctx->tags, tag); blk_mq_queue_exit(q); @@ -1165,7 +1154,7 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, left -= to_do * rq_size; for (j = 0; j < to_do; j++) { hctx->rqs[i] = p; - blk_mq_rq_init(hctx, hctx->rqs[i]); + blk_rq_init(hctx->queue, hctx->rqs[i]); p += rq_size; i++; } diff --git a/block/blk-mq.h b/block/blk-mq.h index ebbe6bac9d61..238379a612e4 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -27,7 +27,6 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); -void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq); /* * CPU hotplug helpers diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 091b9ea14feb..71df69d90900 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -226,7 +226,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) static void null_softirq_done_fn(struct request *rq) { - end_cmd(rq->special); + end_cmd(blk_mq_rq_to_pdu(rq)); } static inline void null_handle_cmd(struct nullb_cmd *cmd) @@ -311,7 +311,7 @@ static void null_request_fn(struct request_queue *q) static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) { - struct nullb_cmd *cmd = rq->special; + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->rq = rq; cmd->nq = hctx->driver_data; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6d8a87f252de..c7d02bc9d945 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -112,7 +112,7 @@ static int __virtblk_add_req(struct virtqueue *vq, static inline void virtblk_request_done(struct request *req) { - struct virtblk_req *vbr = req->special; + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); int error = virtblk_result(vbr); if (req->cmd_type == REQ_TYPE_BLOCK_PC) { @@ -154,7 +154,7 @@ static void virtblk_done(struct virtqueue *vq) static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) { struct virtio_blk *vblk = hctx->queue->queuedata; - struct virtblk_req *vbr = req->special; + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; unsigned int num; const bool last = (req->cmd_flags & REQ_END) != 0; @@ -501,7 +501,7 @@ static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, struct request *rq, unsigned int nr) { struct virtio_blk *vblk = data; - struct virtblk_req *vbr = rq->special; + struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); sg_init_table(vbr->sg, vblk->sg_elems); return 0; -- cgit v1.2.3 From e9b267d91f6ddbc694cb40aa962b0b2cec03971d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Apr 2014 13:59:10 -0600 Subject: blk-mq: add ->init_request and ->exit_request methods The current blk_mq_init_commands/blk_mq_free_commands interface has a two problems: 1) Because only the constructor is passed to blk_mq_init_commands there is no easy way to clean up when a comman initialization failed. The current code simply leaks the allocations done in the constructor. 2) There is no good place to call blk_mq_free_commands: before blk_cleanup_queue there is no guarantee that all outstanding commands have completed, so we can't free them yet. After blk_cleanup_queue the queue has usually been freed. This can be worked around by grabbing an unconditional reference before calling blk_cleanup_queue and dropping it after blk_mq_free_commands is done, although that's not exatly pretty and driver writers are guaranteed to get it wrong sooner or later. Both issues are easily fixed by making the request constructor and destructor normal blk_mq_ops methods. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 105 ++++++++++++++------------------------------- drivers/block/virtio_blk.c | 23 +++++----- include/linux/blk-mq.h | 14 +++++- 3 files changed, 55 insertions(+), 87 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-mq.c b/block/blk-mq.c index e644feec068c..48d2d8495f5e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1031,74 +1031,20 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, blk_mq_put_ctx(ctx); } -static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx, - int (*init)(void *, struct blk_mq_hw_ctx *, - struct request *, unsigned int), - void *data) +static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx, void *driver_data) { - unsigned int i; - int ret = 0; - - for (i = 0; i < hctx->queue_depth; i++) { - struct request *rq = hctx->rqs[i]; - - ret = init(data, hctx, rq, i); - if (ret) - break; - } - - return ret; -} - -int blk_mq_init_commands(struct request_queue *q, - int (*init)(void *, struct blk_mq_hw_ctx *, - struct request *, unsigned int), - void *data) -{ - struct blk_mq_hw_ctx *hctx; - unsigned int i; - int ret = 0; - - queue_for_each_hw_ctx(q, hctx, i) { - ret = blk_mq_init_hw_commands(hctx, init, data); - if (ret) - break; - } - - return ret; -} -EXPORT_SYMBOL(blk_mq_init_commands); - -static void blk_mq_free_hw_commands(struct blk_mq_hw_ctx *hctx, - void (*free)(void *, struct blk_mq_hw_ctx *, - struct request *, unsigned int), - void *data) -{ - unsigned int i; + struct page *page; - for (i = 0; i < hctx->queue_depth; i++) { - struct request *rq = hctx->rqs[i]; + if (hctx->rqs && hctx->queue->mq_ops->exit_request) { + int i; - free(data, hctx, rq, i); + for (i = 0; i < hctx->queue_depth; i++) { + if (!hctx->rqs[i]) + continue; + hctx->queue->mq_ops->exit_request(driver_data, hctx, + hctx->rqs[i], i); + } } -} - -void blk_mq_free_commands(struct request_queue *q, - void (*free)(void *, struct blk_mq_hw_ctx *, - struct request *, unsigned int), - void *data) -{ - struct blk_mq_hw_ctx *hctx; - unsigned int i; - - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_free_hw_commands(hctx, free, data); -} -EXPORT_SYMBOL(blk_mq_free_commands); - -static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx) -{ - struct page *page; while (!list_empty(&hctx->page_list)) { page = list_first_entry(&hctx->page_list, struct page, lru); @@ -1123,10 +1069,12 @@ static size_t order_to_size(unsigned int order) } static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, - unsigned int reserved_tags, int node) + struct blk_mq_reg *reg, void *driver_data, int node) { + unsigned int reserved_tags = reg->reserved_tags; unsigned int i, j, entries_per_page, max_order = 4; size_t rq_size, left; + int error; INIT_LIST_HEAD(&hctx->page_list); @@ -1175,14 +1123,23 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, for (j = 0; j < to_do; j++) { hctx->rqs[i] = p; blk_rq_init(hctx->queue, hctx->rqs[i]); + if (reg->ops->init_request) { + error = reg->ops->init_request(driver_data, + hctx, hctx->rqs[i], i); + if (error) + goto err_rq_map; + } + p += rq_size; i++; } } - if (i < (reserved_tags + BLK_MQ_TAG_MIN)) + if (i < (reserved_tags + BLK_MQ_TAG_MIN)) { + error = -ENOMEM; goto err_rq_map; - else if (i != hctx->queue_depth) { + } + if (i != hctx->queue_depth) { hctx->queue_depth = i; pr_warn("%s: queue depth set to %u because of low memory\n", __func__, i); @@ -1190,12 +1147,14 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node); if (!hctx->tags) { -err_rq_map: - blk_mq_free_rq_map(hctx); - return -ENOMEM; + error = -ENOMEM; + goto err_rq_map; } return 0; +err_rq_map: + blk_mq_free_rq_map(hctx, driver_data); + return error; } static int blk_mq_init_hw_queues(struct request_queue *q, @@ -1228,7 +1187,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q, blk_mq_hctx_notify, hctx); blk_mq_register_cpu_notifier(&hctx->cpu_notifier); - if (blk_mq_init_rq_map(hctx, reg->reserved_tags, node)) + if (blk_mq_init_rq_map(hctx, reg, driver_data, node)) break; /* @@ -1268,7 +1227,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q, reg->ops->exit_hctx(hctx, j); blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); - blk_mq_free_rq_map(hctx); + blk_mq_free_rq_map(hctx, driver_data); kfree(hctx->ctxs); } @@ -1455,7 +1414,7 @@ void blk_mq_free_queue(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { kfree(hctx->ctx_map); kfree(hctx->ctxs); - blk_mq_free_rq_map(hctx); + blk_mq_free_rq_map(hctx, q->queuedata); blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); if (q->mq_ops->exit_hctx) q->mq_ops->exit_hctx(hctx, i); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c7d02bc9d945..d06206abd340 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -480,11 +480,22 @@ static const struct device_attribute dev_attr_cache_type_rw = __ATTR(cache_type, S_IRUGO|S_IWUSR, virtblk_cache_type_show, virtblk_cache_type_store); +static int virtblk_init_request(void *data, struct blk_mq_hw_ctx *hctx, + struct request *rq, unsigned int nr) +{ + struct virtio_blk *vblk = data; + struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); + + sg_init_table(vbr->sg, vblk->sg_elems); + return 0; +} + static struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .map_queue = blk_mq_map_queue, .alloc_hctx = blk_mq_alloc_single_hw_queue, .free_hctx = blk_mq_free_single_hw_queue, + .init_request = virtblk_init_request, .complete = virtblk_request_done, }; @@ -497,16 +508,6 @@ static struct blk_mq_reg virtio_mq_reg = { }; module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444); -static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, - struct request *rq, unsigned int nr) -{ - struct virtio_blk *vblk = data; - struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); - - sg_init_table(vbr->sg, vblk->sg_elems); - return 0; -} - static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -577,8 +578,6 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_put_disk; } - blk_mq_init_commands(q, virtblk_init_vbr, vblk); - q->queuedata = vblk; virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b6ee48740458..29c1a6e83814 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -67,6 +67,10 @@ typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int); typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); +typedef int (init_request_fn)(void *, struct blk_mq_hw_ctx *, + struct request *, unsigned int); +typedef void (exit_request_fn)(void *, struct blk_mq_hw_ctx *, + struct request *, unsigned int); struct blk_mq_ops { /* @@ -99,6 +103,14 @@ struct blk_mq_ops { */ init_hctx_fn *init_hctx; exit_hctx_fn *exit_hctx; + + /* + * Called for every command allocated by the block layer to allow + * the driver to set up driver specific data. + * Ditto for exit/teardown. + */ + init_request_fn *init_request; + exit_request_fn *exit_request; }; enum { @@ -118,8 +130,6 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); -int blk_mq_init_commands(struct request_queue *, int (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); -void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -- cgit v1.2.3 From 24d2f90309b23f2cfe016b2aebc5f0d6e01c57fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Apr 2014 14:14:00 -0600 Subject: blk-mq: split out tag initialization, support shared tags Add a new blk_mq_tag_set structure that gets set up before we initialize the queue. A single blk_mq_tag_set structure can be shared by multiple queues. Signed-off-by: Christoph Hellwig Modular export of blk_mq_{alloc,free}_tagset added by me. Signed-off-by: Jens Axboe --- block/blk-mq-cpumap.c | 6 +- block/blk-mq-tag.c | 14 --- block/blk-mq-tag.h | 19 +++- block/blk-mq.c | 244 +++++++++++++++++++++++++-------------------- block/blk-mq.h | 5 +- drivers/block/null_blk.c | 92 ++++++++++------- drivers/block/virtio_blk.c | 48 +++++---- include/linux/blk-mq.h | 34 +++---- 8 files changed, 262 insertions(+), 200 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 097921329619..5d0f93cf358c 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -80,17 +80,17 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) return 0; } -unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg) +unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) { unsigned int *map; /* If cpus are offline, map them to first hctx */ map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, - reg->numa_node); + set->numa_node); if (!map) return NULL; - if (!blk_mq_update_queue_map(map, reg->nr_hw_queues)) + if (!blk_mq_update_queue_map(map, set->nr_hw_queues)) return map; kfree(map); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 83ae96c51a27..7a799c46c32d 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -1,25 +1,11 @@ #include #include -#include #include #include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" -/* - * Per tagged queue (tag address space) map - */ -struct blk_mq_tags { - unsigned int nr_tags; - unsigned int nr_reserved_tags; - unsigned int nr_batch_move; - unsigned int nr_max_cache; - - struct percpu_ida free_tags; - struct percpu_ida reserved_tags; -}; - void blk_mq_wait_for_tags(struct blk_mq_tags *tags) { int tag = blk_mq_get_tag(tags, __GFP_WAIT, false); diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 947ba2c6148e..b602e3fa66ea 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -1,7 +1,24 @@ #ifndef INT_BLK_MQ_TAG_H #define INT_BLK_MQ_TAG_H -struct blk_mq_tags; +#include + +/* + * Tag address space map. + */ +struct blk_mq_tags { + unsigned int nr_tags; + unsigned int nr_reserved_tags; + unsigned int nr_batch_move; + unsigned int nr_max_cache; + + struct percpu_ida free_tags; + struct percpu_ida reserved_tags; + + struct request **rqs; + struct list_head page_list; +}; + extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); extern void blk_mq_free_tags(struct blk_mq_tags *tags); diff --git a/block/blk-mq.c b/block/blk-mq.c index 2a5a0fed10a3..9180052d42cc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -81,7 +81,7 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, tag = blk_mq_get_tag(hctx->tags, gfp, reserved); if (tag != BLK_MQ_TAG_FAIL) { - rq = hctx->rqs[tag]; + rq = hctx->tags->rqs[tag]; blk_rq_init(hctx->queue, rq); rq->tag = tag; @@ -404,6 +404,12 @@ static void blk_mq_requeue_request(struct request *rq) rq->nr_phys_segments--; } +struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) +{ + return tags->rqs[tag]; +} +EXPORT_SYMBOL(blk_mq_tag_to_rq); + struct blk_mq_timeout_data { struct blk_mq_hw_ctx *hctx; unsigned long *next; @@ -425,12 +431,13 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) do { struct request *rq; - tag = find_next_zero_bit(free_tags, hctx->queue_depth, tag); - if (tag >= hctx->queue_depth) + tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag); + if (tag >= hctx->tags->nr_tags) break; - rq = hctx->rqs[tag++]; - + rq = blk_mq_tag_to_rq(hctx->tags, tag++); + if (rq->q != hctx->queue) + continue; if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) continue; @@ -969,11 +976,11 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu) } EXPORT_SYMBOL(blk_mq_map_queue); -struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *reg, +struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *set, unsigned int hctx_index) { return kmalloc_node(sizeof(struct blk_mq_hw_ctx), - GFP_KERNEL | __GFP_ZERO, reg->numa_node); + GFP_KERNEL | __GFP_ZERO, set->numa_node); } EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue); @@ -1030,31 +1037,31 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, blk_mq_put_ctx(ctx); } -static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx, void *driver_data) +static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, + struct blk_mq_tags *tags, unsigned int hctx_idx) { struct page *page; - if (hctx->rqs && hctx->queue->mq_ops->exit_request) { + if (tags->rqs && set->ops->exit_request) { int i; - for (i = 0; i < hctx->queue_depth; i++) { - if (!hctx->rqs[i]) + for (i = 0; i < tags->nr_tags; i++) { + if (!tags->rqs[i]) continue; - hctx->queue->mq_ops->exit_request(driver_data, hctx, - hctx->rqs[i], i); + set->ops->exit_request(set->driver_data, tags->rqs[i], + hctx_idx, i); } } - while (!list_empty(&hctx->page_list)) { - page = list_first_entry(&hctx->page_list, struct page, lru); + while (!list_empty(&tags->page_list)) { + page = list_first_entry(&tags->page_list, struct page, lru); list_del_init(&page->lru); __free_pages(page, page->private); } - kfree(hctx->rqs); + kfree(tags->rqs); - if (hctx->tags) - blk_mq_free_tags(hctx->tags); + blk_mq_free_tags(tags); } static size_t order_to_size(unsigned int order) @@ -1067,30 +1074,36 @@ static size_t order_to_size(unsigned int order) return ret; } -static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, - struct blk_mq_reg *reg, void *driver_data, int node) +static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, + unsigned int hctx_idx) { - unsigned int reserved_tags = reg->reserved_tags; + struct blk_mq_tags *tags; unsigned int i, j, entries_per_page, max_order = 4; size_t rq_size, left; - int error; - INIT_LIST_HEAD(&hctx->page_list); + tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, + set->numa_node); + if (!tags) + return NULL; - hctx->rqs = kmalloc_node(hctx->queue_depth * sizeof(struct request *), - GFP_KERNEL, node); - if (!hctx->rqs) - return -ENOMEM; + INIT_LIST_HEAD(&tags->page_list); + + tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), + GFP_KERNEL, set->numa_node); + if (!tags->rqs) { + blk_mq_free_tags(tags); + return NULL; + } /* * rq_size is the size of the request plus driver payload, rounded * to the cacheline size */ - rq_size = round_up(sizeof(struct request) + hctx->cmd_size, + rq_size = round_up(sizeof(struct request) + set->cmd_size, cache_line_size()); - left = rq_size * hctx->queue_depth; + left = rq_size * set->queue_depth; - for (i = 0; i < hctx->queue_depth;) { + for (i = 0; i < set->queue_depth; ) { int this_order = max_order; struct page *page; int to_do; @@ -1100,7 +1113,8 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, this_order--; do { - page = alloc_pages_node(node, GFP_KERNEL, this_order); + page = alloc_pages_node(set->numa_node, GFP_KERNEL, + this_order); if (page) break; if (!this_order--) @@ -1110,22 +1124,22 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, } while (1); if (!page) - break; + goto fail; page->private = this_order; - list_add_tail(&page->lru, &hctx->page_list); + list_add_tail(&page->lru, &tags->page_list); p = page_address(page); entries_per_page = order_to_size(this_order) / rq_size; - to_do = min(entries_per_page, hctx->queue_depth - i); + to_do = min(entries_per_page, set->queue_depth - i); left -= to_do * rq_size; for (j = 0; j < to_do; j++) { - hctx->rqs[i] = p; - if (reg->ops->init_request) { - error = reg->ops->init_request(driver_data, - hctx, hctx->rqs[i], i); - if (error) - goto err_rq_map; + tags->rqs[i] = p; + if (set->ops->init_request) { + if (set->ops->init_request(set->driver_data, + tags->rqs[i], hctx_idx, i, + set->numa_node)) + goto fail; } p += rq_size; @@ -1133,30 +1147,16 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, } } - if (i < (reserved_tags + BLK_MQ_TAG_MIN)) { - error = -ENOMEM; - goto err_rq_map; - } - if (i != hctx->queue_depth) { - hctx->queue_depth = i; - pr_warn("%s: queue depth set to %u because of low memory\n", - __func__, i); - } + return tags; - hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node); - if (!hctx->tags) { - error = -ENOMEM; - goto err_rq_map; - } - - return 0; -err_rq_map: - blk_mq_free_rq_map(hctx, driver_data); - return error; +fail: + pr_warn("%s: failed to allocate requests\n", __func__); + blk_mq_free_rq_map(set, tags, hctx_idx); + return NULL; } static int blk_mq_init_hw_queues(struct request_queue *q, - struct blk_mq_reg *reg, void *driver_data) + struct blk_mq_tag_set *set) { struct blk_mq_hw_ctx *hctx; unsigned int i, j; @@ -1170,23 +1170,21 @@ static int blk_mq_init_hw_queues(struct request_queue *q, node = hctx->numa_node; if (node == NUMA_NO_NODE) - node = hctx->numa_node = reg->numa_node; + node = hctx->numa_node = set->numa_node; INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn); spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); hctx->queue = q; hctx->queue_num = i; - hctx->flags = reg->flags; - hctx->queue_depth = reg->queue_depth; - hctx->cmd_size = reg->cmd_size; + hctx->flags = set->flags; + hctx->cmd_size = set->cmd_size; blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_hctx_notify, hctx); blk_mq_register_cpu_notifier(&hctx->cpu_notifier); - if (blk_mq_init_rq_map(hctx, reg, driver_data, node)) - break; + hctx->tags = set->tags[i]; /* * Allocate space for all possible cpus to avoid allocation in @@ -1206,8 +1204,8 @@ static int blk_mq_init_hw_queues(struct request_queue *q, hctx->nr_ctx_map = num_maps; hctx->nr_ctx = 0; - if (reg->ops->init_hctx && - reg->ops->init_hctx(hctx, driver_data, i)) + if (set->ops->init_hctx && + set->ops->init_hctx(hctx, set->driver_data, i)) break; } @@ -1221,11 +1219,10 @@ static int blk_mq_init_hw_queues(struct request_queue *q, if (i == j) break; - if (reg->ops->exit_hctx) - reg->ops->exit_hctx(hctx, j); + if (set->ops->exit_hctx) + set->ops->exit_hctx(hctx, j); blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); - blk_mq_free_rq_map(hctx, driver_data); kfree(hctx->ctxs); } @@ -1290,41 +1287,25 @@ static void blk_mq_map_swqueue(struct request_queue *q) } } -struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, - void *driver_data) +struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { struct blk_mq_hw_ctx **hctxs; struct blk_mq_ctx *ctx; struct request_queue *q; int i; - if (!reg->nr_hw_queues || - !reg->ops->queue_rq || !reg->ops->map_queue || - !reg->ops->alloc_hctx || !reg->ops->free_hctx) - return ERR_PTR(-EINVAL); - - if (!reg->queue_depth) - reg->queue_depth = BLK_MQ_MAX_DEPTH; - else if (reg->queue_depth > BLK_MQ_MAX_DEPTH) { - pr_err("blk-mq: queuedepth too large (%u)\n", reg->queue_depth); - reg->queue_depth = BLK_MQ_MAX_DEPTH; - } - - if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) - return ERR_PTR(-EINVAL); - ctx = alloc_percpu(struct blk_mq_ctx); if (!ctx) return ERR_PTR(-ENOMEM); - hctxs = kmalloc_node(reg->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, - reg->numa_node); + hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, + set->numa_node); if (!hctxs) goto err_percpu; - for (i = 0; i < reg->nr_hw_queues; i++) { - hctxs[i] = reg->ops->alloc_hctx(reg, i); + for (i = 0; i < set->nr_hw_queues; i++) { + hctxs[i] = set->ops->alloc_hctx(set, i); if (!hctxs[i]) goto err_hctxs; @@ -1335,11 +1316,11 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, hctxs[i]->queue_num = i; } - q = blk_alloc_queue_node(GFP_KERNEL, reg->numa_node); + q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); if (!q) goto err_hctxs; - q->mq_map = blk_mq_make_queue_map(reg); + q->mq_map = blk_mq_make_queue_map(set); if (!q->mq_map) goto err_map; @@ -1347,33 +1328,34 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, blk_queue_rq_timeout(q, 30000); q->nr_queues = nr_cpu_ids; - q->nr_hw_queues = reg->nr_hw_queues; + q->nr_hw_queues = set->nr_hw_queues; q->queue_ctx = ctx; q->queue_hw_ctx = hctxs; - q->mq_ops = reg->ops; + q->mq_ops = set->ops; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; q->sg_reserved_size = INT_MAX; blk_queue_make_request(q, blk_mq_make_request); - blk_queue_rq_timed_out(q, reg->ops->timeout); - if (reg->timeout) - blk_queue_rq_timeout(q, reg->timeout); + blk_queue_rq_timed_out(q, set->ops->timeout); + if (set->timeout) + blk_queue_rq_timeout(q, set->timeout); - if (reg->ops->complete) - blk_queue_softirq_done(q, reg->ops->complete); + if (set->ops->complete) + blk_queue_softirq_done(q, set->ops->complete); blk_mq_init_flush(q); - blk_mq_init_cpu_queues(q, reg->nr_hw_queues); + blk_mq_init_cpu_queues(q, set->nr_hw_queues); - q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, - cache_line_size()), GFP_KERNEL); + q->flush_rq = kzalloc(round_up(sizeof(struct request) + + set->cmd_size, cache_line_size()), + GFP_KERNEL); if (!q->flush_rq) goto err_hw; - if (blk_mq_init_hw_queues(q, reg, driver_data)) + if (blk_mq_init_hw_queues(q, set)) goto err_flush_rq; blk_mq_map_swqueue(q); @@ -1391,11 +1373,11 @@ err_hw: err_map: blk_cleanup_queue(q); err_hctxs: - for (i = 0; i < reg->nr_hw_queues; i++) { + for (i = 0; i < set->nr_hw_queues; i++) { if (!hctxs[i]) break; free_cpumask_var(hctxs[i]->cpumask); - reg->ops->free_hctx(hctxs[i], i); + set->ops->free_hctx(hctxs[i], i); } kfree(hctxs); err_percpu: @@ -1412,7 +1394,6 @@ void blk_mq_free_queue(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { kfree(hctx->ctx_map); kfree(hctx->ctxs); - blk_mq_free_rq_map(hctx, q->queuedata); blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); if (q->mq_ops->exit_hctx) q->mq_ops->exit_hctx(hctx, i); @@ -1473,6 +1454,53 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, return NOTIFY_OK; } +int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) +{ + int i; + + if (!set->nr_hw_queues) + return -EINVAL; + if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH) + return -EINVAL; + if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) + return -EINVAL; + + if (!set->nr_hw_queues || + !set->ops->queue_rq || !set->ops->map_queue || + !set->ops->alloc_hctx || !set->ops->free_hctx) + return -EINVAL; + + + set->tags = kmalloc_node(set->nr_hw_queues * sizeof(struct blk_mq_tags), + GFP_KERNEL, set->numa_node); + if (!set->tags) + goto out; + + for (i = 0; i < set->nr_hw_queues; i++) { + set->tags[i] = blk_mq_init_rq_map(set, i); + if (!set->tags[i]) + goto out_unwind; + } + + return 0; + +out_unwind: + while (--i >= 0) + blk_mq_free_rq_map(set, set->tags[i], i); +out: + return -ENOMEM; +} +EXPORT_SYMBOL(blk_mq_alloc_tag_set); + +void blk_mq_free_tag_set(struct blk_mq_tag_set *set) +{ + int i; + + for (i = 0; i < set->nr_hw_queues; i++) + blk_mq_free_rq_map(set, set->tags[i], i); +} +EXPORT_SYMBOL(blk_mq_free_tag_set); + void blk_mq_disable_hotplug(void) { mutex_lock(&all_q_mutex); diff --git a/block/blk-mq.h b/block/blk-mq.h index 7964dadb7d64..5fa14f19f752 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -1,6 +1,8 @@ #ifndef INT_BLK_MQ_H #define INT_BLK_MQ_H +struct blk_mq_tag_set; + struct blk_mq_ctx { struct { spinlock_t lock; @@ -46,8 +48,7 @@ void blk_mq_disable_hotplug(void); /* * CPU -> queue mappings */ -struct blk_mq_reg; -extern unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg); +extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); void blk_mq_add_timer(struct request *rq); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 71df69d90900..8e7e3a0b0d24 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -32,6 +32,7 @@ struct nullb { unsigned int index; struct request_queue *q; struct gendisk *disk; + struct blk_mq_tag_set tag_set; struct hrtimer timer; unsigned int queue_depth; spinlock_t lock; @@ -320,10 +321,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) return BLK_MQ_RQ_QUEUE_OK; } -static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) +static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_tag_set *set, + unsigned int hctx_index) { - int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes); - int tip = (reg->nr_hw_queues % nr_online_nodes); + int b_size = DIV_ROUND_UP(set->nr_hw_queues, nr_online_nodes); + int tip = (set->nr_hw_queues % nr_online_nodes); int node = 0, i, n; /* @@ -338,7 +340,7 @@ static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned in tip--; if (!tip) - b_size = reg->nr_hw_queues / nr_online_nodes; + b_size = set->nr_hw_queues / nr_online_nodes; } } @@ -387,13 +389,17 @@ static struct blk_mq_ops null_mq_ops = { .map_queue = blk_mq_map_queue, .init_hctx = null_init_hctx, .complete = null_softirq_done_fn, + .alloc_hctx = blk_mq_alloc_single_hw_queue, + .free_hctx = blk_mq_free_single_hw_queue, }; -static struct blk_mq_reg null_mq_reg = { - .ops = &null_mq_ops, - .queue_depth = 64, - .cmd_size = sizeof(struct nullb_cmd), - .flags = BLK_MQ_F_SHOULD_MERGE, +static struct blk_mq_ops null_mq_ops_pernode = { + .queue_rq = null_queue_rq, + .map_queue = blk_mq_map_queue, + .init_hctx = null_init_hctx, + .complete = null_softirq_done_fn, + .alloc_hctx = null_alloc_hctx, + .free_hctx = null_free_hctx, }; static void null_del_dev(struct nullb *nullb) @@ -402,6 +408,8 @@ static void null_del_dev(struct nullb *nullb) del_gendisk(nullb->disk); blk_cleanup_queue(nullb->q); + if (queue_mode == NULL_Q_MQ) + blk_mq_free_tag_set(&nullb->tag_set); put_disk(nullb->disk); kfree(nullb); } @@ -506,7 +514,7 @@ static int null_add_dev(void) nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); if (!nullb) - return -ENOMEM; + goto out; spin_lock_init(&nullb->lock); @@ -514,49 +522,47 @@ static int null_add_dev(void) submit_queues = nr_online_nodes; if (setup_queues(nullb)) - goto err; + goto out_free_nullb; if (queue_mode == NULL_Q_MQ) { - null_mq_reg.numa_node = home_node; - null_mq_reg.queue_depth = hw_queue_depth; - null_mq_reg.nr_hw_queues = submit_queues; - - if (use_per_node_hctx) { - null_mq_reg.ops->alloc_hctx = null_alloc_hctx; - null_mq_reg.ops->free_hctx = null_free_hctx; - } else { - null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue; - null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue; - } - - nullb->q = blk_mq_init_queue(&null_mq_reg, nullb); + if (use_per_node_hctx) + nullb->tag_set.ops = &null_mq_ops_pernode; + else + nullb->tag_set.ops = &null_mq_ops; + nullb->tag_set.nr_hw_queues = submit_queues; + nullb->tag_set.queue_depth = hw_queue_depth; + nullb->tag_set.numa_node = home_node; + nullb->tag_set.cmd_size = sizeof(struct nullb_cmd); + nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + nullb->tag_set.driver_data = nullb; + + if (blk_mq_alloc_tag_set(&nullb->tag_set)) + goto out_cleanup_queues; + + nullb->q = blk_mq_init_queue(&nullb->tag_set); + if (!nullb->q) + goto out_cleanup_tags; } else if (queue_mode == NULL_Q_BIO) { nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); + if (!nullb->q) + goto out_cleanup_queues; blk_queue_make_request(nullb->q, null_queue_bio); init_driver_queues(nullb); } else { nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); + if (!nullb->q) + goto out_cleanup_queues; blk_queue_prep_rq(nullb->q, null_rq_prep_fn); - if (nullb->q) - blk_queue_softirq_done(nullb->q, null_softirq_done_fn); + blk_queue_softirq_done(nullb->q, null_softirq_done_fn); init_driver_queues(nullb); } - if (!nullb->q) - goto queue_fail; - nullb->q->queuedata = nullb; queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); disk = nullb->disk = alloc_disk_node(1, home_node); - if (!disk) { -queue_fail: - blk_cleanup_queue(nullb->q); - cleanup_queues(nullb); -err: - kfree(nullb); - return -ENOMEM; - } + if (!disk) + goto out_cleanup_blk_queue; mutex_lock(&lock); list_add_tail(&nullb->list, &nullb_list); @@ -579,6 +585,18 @@ err: sprintf(disk->disk_name, "nullb%d", nullb->index); add_disk(disk); return 0; + +out_cleanup_blk_queue: + blk_cleanup_queue(nullb->q); +out_cleanup_tags: + if (queue_mode == NULL_Q_MQ) + blk_mq_free_tag_set(&nullb->tag_set); +out_cleanup_queues: + cleanup_queues(nullb); +out_free_nullb: + kfree(nullb); +out: + return -ENOMEM; } static int __init null_init(void) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index d06206abd340..f909a8821e65 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -30,6 +30,9 @@ struct virtio_blk /* The disk structure for the kernel. */ struct gendisk *disk; + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + /* Process context for config space updates */ struct work_struct config_work; @@ -480,8 +483,9 @@ static const struct device_attribute dev_attr_cache_type_rw = __ATTR(cache_type, S_IRUGO|S_IWUSR, virtblk_cache_type_show, virtblk_cache_type_store); -static int virtblk_init_request(void *data, struct blk_mq_hw_ctx *hctx, - struct request *rq, unsigned int nr) +static int virtblk_init_request(void *data, struct request *rq, + unsigned int hctx_idx, unsigned int request_idx, + unsigned int numa_node) { struct virtio_blk *vblk = data; struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); @@ -495,18 +499,12 @@ static struct blk_mq_ops virtio_mq_ops = { .map_queue = blk_mq_map_queue, .alloc_hctx = blk_mq_alloc_single_hw_queue, .free_hctx = blk_mq_free_single_hw_queue, - .init_request = virtblk_init_request, .complete = virtblk_request_done, + .init_request = virtblk_init_request, }; -static struct blk_mq_reg virtio_mq_reg = { - .ops = &virtio_mq_ops, - .nr_hw_queues = 1, - .queue_depth = 0, /* Set in virtblk_probe */ - .numa_node = NUMA_NO_NODE, - .flags = BLK_MQ_F_SHOULD_MERGE, -}; -module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444); +static unsigned int virtblk_queue_depth; +module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); static int virtblk_probe(struct virtio_device *vdev) { @@ -562,20 +560,32 @@ static int virtblk_probe(struct virtio_device *vdev) } /* Default queue sizing is to fill the ring. */ - if (!virtio_mq_reg.queue_depth) { - virtio_mq_reg.queue_depth = vblk->vq->num_free; + if (!virtblk_queue_depth) { + virtblk_queue_depth = vblk->vq->num_free; /* ... but without indirect descs, we use 2 descs per req */ if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) - virtio_mq_reg.queue_depth /= 2; + virtblk_queue_depth /= 2; } - virtio_mq_reg.cmd_size = + + memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); + vblk->tag_set.ops = &virtio_mq_ops; + vblk->tag_set.nr_hw_queues = 1; + vblk->tag_set.queue_depth = virtblk_queue_depth; + vblk->tag_set.numa_node = NUMA_NO_NODE; + vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + vblk->tag_set.cmd_size = sizeof(struct virtblk_req) + sizeof(struct scatterlist) * sg_elems; + vblk->tag_set.driver_data = vblk; - q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk); + err = blk_mq_alloc_tag_set(&vblk->tag_set); + if (err) + goto out_put_disk; + + q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); if (!q) { err = -ENOMEM; - goto out_put_disk; + goto out_free_tags; } q->queuedata = vblk; @@ -678,6 +688,8 @@ static int virtblk_probe(struct virtio_device *vdev) out_del_disk: del_gendisk(vblk->disk); blk_cleanup_queue(vblk->disk->queue); +out_free_tags: + blk_mq_free_tag_set(&vblk->tag_set); out_put_disk: put_disk(vblk->disk); out_free_vq: @@ -704,6 +716,8 @@ static void virtblk_remove(struct virtio_device *vdev) del_gendisk(vblk->disk); blk_cleanup_queue(vblk->disk->queue); + blk_mq_free_tag_set(&vblk->tag_set); + /* Stop all the virtqueues. */ vdev->config->reset(vdev); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 29c1a6e83814..a4ea0ce83b07 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -33,8 +33,6 @@ struct blk_mq_hw_ctx { unsigned int nr_ctx_map; unsigned long *ctx_map; - struct request **rqs; - struct list_head page_list; struct blk_mq_tags *tags; unsigned long queued; @@ -42,7 +40,6 @@ struct blk_mq_hw_ctx { #define BLK_MQ_MAX_DISPATCH_ORDER 10 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; - unsigned int queue_depth; unsigned int numa_node; unsigned int cmd_size; /* per-request extra data */ @@ -50,7 +47,7 @@ struct blk_mq_hw_ctx { struct kobject kobj; }; -struct blk_mq_reg { +struct blk_mq_tag_set { struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; @@ -59,18 +56,22 @@ struct blk_mq_reg { int numa_node; unsigned int timeout; unsigned int flags; /* BLK_MQ_F_* */ + void *driver_data; + + struct blk_mq_tags **tags; }; typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); -typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int); +typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *, + unsigned int); typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); -typedef int (init_request_fn)(void *, struct blk_mq_hw_ctx *, - struct request *, unsigned int); -typedef void (exit_request_fn)(void *, struct blk_mq_hw_ctx *, - struct request *, unsigned int); +typedef int (init_request_fn)(void *, struct request *, unsigned int, + unsigned int, unsigned int); +typedef void (exit_request_fn)(void *, struct request *, unsigned int, + unsigned int); struct blk_mq_ops { /* @@ -127,10 +128,13 @@ enum { BLK_MQ_MAX_DEPTH = 2048, }; -struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); +struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); +int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); +void blk_mq_free_tag_set(struct blk_mq_tag_set *set); + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_insert_request(struct request *, bool, bool, bool); @@ -139,10 +143,10 @@ void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); -struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); +struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); -struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); +struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); bool blk_mq_end_io_partial(struct request *rq, int error, @@ -173,12 +177,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) return (void *) rq + sizeof(*rq); } -static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, - unsigned int tag) -{ - return hctx->rqs[tag]; -} - #define queue_for_each_hw_ctx(q, hctx, i) \ for ((i) = 0; (i) < (q)->nr_hw_queues && \ ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) -- cgit v1.2.3 From 1b4a325858f695a9b5041313602d34b36f463724 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 09:44:54 +0200 Subject: blk-mq: add async parameter to blk_mq_start_stopped_hw_queues Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- drivers/block/virtio_blk.c | 4 ++-- include/linux/blk-mq.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 963a82109386..da3808823e44 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -700,7 +700,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) } EXPORT_SYMBOL(blk_mq_start_hw_queue); -void blk_mq_start_stopped_hw_queues(struct request_queue *q) +void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) { struct blk_mq_hw_ctx *hctx; int i; @@ -711,7 +711,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q) clear_bit(BLK_MQ_S_STOPPED, &hctx->state); preempt_disable(); - blk_mq_run_hw_queue(hctx, true); + blk_mq_run_hw_queue(hctx, async); preempt_enable(); } } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f909a8821e65..7a51f065edcd 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -151,7 +151,7 @@ static void virtblk_done(struct virtqueue *vq) /* In case queue is stopped waiting for more buffers. */ if (req_done) - blk_mq_start_stopped_hw_queues(vblk->disk->queue); + blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); } static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) @@ -762,7 +762,7 @@ static int virtblk_restore(struct virtio_device *vdev) vblk->config_enable = true; ret = init_vq(vdev->priv); if (!ret) - blk_mq_start_stopped_hw_queues(vblk->disk->queue); + blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); return ret; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a81b474b794f..9ecfab96d8c9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -157,7 +157,7 @@ void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); -void blk_mq_start_stopped_hw_queues(struct request_queue *q); +void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); /* * Driver command data is immediately after the request. So subtract request -- cgit v1.2.3 From dc4a93078b8a6a10d2dcaba76ab488d6dbe73922 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Apr 2014 21:37:30 -0600 Subject: sd/skd: stuff discard page in request->completion_data Store the pointer to the page there, so we can always safely reference it from end_io context where ->bio may have been cleared. Signed-off-by: Jens Axboe --- drivers/block/skd_main.c | 4 ++-- drivers/scsi/sd.c | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 36bcedfd930c..c48d9084c965 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -743,6 +743,7 @@ static void skd_request_fn(struct request_queue *q) break; } skreq->discard_page = 1; + req->completion_data = page; skd_prep_discard_cdb(scsi_req, skreq, page, lba, count); } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) { @@ -855,10 +856,9 @@ static void skd_end_request(struct skd_device *skdev, if ((io_flags & REQ_DISCARD) && (skreq->discard_page == 1)) { - struct bio *bio = req->bio; pr_debug("%s:%s:%d, free the page!", skdev->name, __func__, __LINE__); - __free_page(bio->bi_io_vec->bv_page); + __free_page(req->completion_data); } if (unlikely(error)) { diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 06d154d20faa..96af195224f2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -737,6 +737,7 @@ static int sd_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq) goto out; } + rq->completion_data = page; blk_add_request_payload(rq, page, len); ret = scsi_setup_blk_pc_cmnd(sdp, rq); rq->__data_len = nr_bytes; @@ -839,11 +840,9 @@ static void sd_unprep_fn(struct request_queue *q, struct request *rq) { struct scsi_cmnd *SCpnt = rq->special; - if (rq->cmd_flags & REQ_DISCARD) { - struct bio *bio = rq->bio; + if (rq->cmd_flags & REQ_DISCARD) + __free_page(rq->completion_data); - __free_page(bio->bi_io_vec->bv_page); - } if (SCpnt->cmnd != rq->cmd) { mempool_free(SCpnt->cmnd, sd_cdb_pool); SCpnt->cmnd = NULL; -- cgit v1.2.3 From f14bbe77a96bb979dc539d8308ee18a9363a544f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2014 12:06:53 -0600 Subject: blk-mq: pass in suggested NUMA node to ->alloc_hctx() Drivers currently have to figure this out on their own, and they are missing information to do it properly. The ones that did attempt to do it, do it wrong. So just pass in the suggested node directly to the alloc function. Signed-off-by: Jens Axboe --- block/blk-mq-cpumap.c | 16 ++++++++++++++++ block/blk-mq.c | 26 +++++++++++++++----------- block/blk-mq.h | 1 + drivers/block/null_blk.c | 35 +++-------------------------------- include/linux/blk-mq.h | 4 ++-- 5 files changed, 37 insertions(+), 45 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 5d0f93cf358c..0daacb927be1 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -96,3 +96,19 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) kfree(map); return NULL; } + +/* + * We have no quick way of doing reverse lookups. This is only used at + * queue init time, so runtime isn't important. + */ +int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index) +{ + int i; + + for_each_possible_cpu(i) { + if (index == mq_map[i]) + return cpu_to_node(i); + } + + return NUMA_NO_NODE; +} diff --git a/block/blk-mq.c b/block/blk-mq.c index e8b5f74dc1a1..30bad930e661 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1297,10 +1297,10 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu) EXPORT_SYMBOL(blk_mq_map_queue); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *set, - unsigned int hctx_index) + unsigned int hctx_index, + int node) { - return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, - set->numa_node); + return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, node); } EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue); @@ -1752,6 +1752,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) struct blk_mq_hw_ctx **hctxs; struct blk_mq_ctx *ctx; struct request_queue *q; + unsigned int *map; int i; ctx = alloc_percpu(struct blk_mq_ctx); @@ -1764,8 +1765,14 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!hctxs) goto err_percpu; + map = blk_mq_make_queue_map(set); + if (!map) + goto err_map; + for (i = 0; i < set->nr_hw_queues; i++) { - hctxs[i] = set->ops->alloc_hctx(set, i); + int node = blk_mq_hw_queue_to_node(map, i); + + hctxs[i] = set->ops->alloc_hctx(set, i, node); if (!hctxs[i]) goto err_hctxs; @@ -1773,7 +1780,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) goto err_hctxs; atomic_set(&hctxs[i]->nr_active, 0); - hctxs[i]->numa_node = NUMA_NO_NODE; + hctxs[i]->numa_node = node; hctxs[i]->queue_num = i; } @@ -1784,15 +1791,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (percpu_counter_init(&q->mq_usage_counter, 0)) goto err_map; - q->mq_map = blk_mq_make_queue_map(set); - if (!q->mq_map) - goto err_map; - setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); blk_queue_rq_timeout(q, 30000); q->nr_queues = nr_cpu_ids; q->nr_hw_queues = set->nr_hw_queues; + q->mq_map = map; q->queue_ctx = ctx; q->queue_hw_ctx = hctxs; @@ -1844,16 +1848,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) err_flush_rq: kfree(q->flush_rq); err_hw: - kfree(q->mq_map); -err_map: blk_cleanup_queue(q); err_hctxs: + kfree(map); for (i = 0; i < set->nr_hw_queues; i++) { if (!hctxs[i]) break; free_cpumask_var(hctxs[i]->cpumask); set->ops->free_hctx(hctxs[i], i); } +err_map: kfree(hctxs); err_percpu: free_percpu(ctx); diff --git a/block/blk-mq.h b/block/blk-mq.h index 491dbd4e93f5..ff5e6bf0f691 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -52,6 +52,7 @@ void blk_mq_disable_hotplug(void); */ extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); +extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); /* * Basic implementation of sparser bitmap, allowing the user to spread diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 8e7e3a0b0d24..4d33c8c25fbf 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -322,39 +322,10 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) } static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_tag_set *set, - unsigned int hctx_index) + unsigned int hctx_index, + int node) { - int b_size = DIV_ROUND_UP(set->nr_hw_queues, nr_online_nodes); - int tip = (set->nr_hw_queues % nr_online_nodes); - int node = 0, i, n; - - /* - * Split submit queues evenly wrt to the number of nodes. If uneven, - * fill the first buckets with one extra, until the rest is filled with - * no extra. - */ - for (i = 0, n = 1; i < hctx_index; i++, n++) { - if (n % b_size == 0) { - n = 0; - node++; - - tip--; - if (!tip) - b_size = set->nr_hw_queues / nr_online_nodes; - } - } - - /* - * A node might not be online, therefore map the relative node id to the - * real node id. - */ - for_each_online_node(n) { - if (!node) - break; - node--; - } - - return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n); + return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, node); } static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f76bb18350af..afeb93496907 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -80,7 +80,7 @@ struct blk_mq_tag_set { typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *, - unsigned int); + unsigned int, int); typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); @@ -165,7 +165,7 @@ struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, g struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); -struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int); +struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); void blk_mq_end_io(struct request *rq, int error); -- cgit v1.2.3 From cdef54dd85ad66e77262ea57796a3e81683dd5d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 May 2014 18:11:06 +0200 Subject: blk-mq: remove alloc_hctx and free_hctx methods There is no need for drivers to control hardware context allocation now that we do the context to node mapping in common code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 26 +++++--------------------- drivers/block/null_blk.c | 28 +--------------------------- drivers/block/virtio_blk.c | 2 -- include/linux/blk-mq.h | 10 ---------- 4 files changed, 6 insertions(+), 60 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 5cc4b871cb11..f27fe44230c2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1335,21 +1335,6 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu) } EXPORT_SYMBOL(blk_mq_map_queue); -struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *set, - unsigned int hctx_index, - int node) -{ - return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, node); -} -EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue); - -void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx, - unsigned int hctx_index) -{ - kfree(hctx); -} -EXPORT_SYMBOL(blk_mq_free_single_hw_queue); - static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) { @@ -1590,7 +1575,7 @@ static void blk_mq_free_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { free_cpumask_var(hctx->cpumask); - set->ops->free_hctx(hctx, i); + kfree(hctx); } } @@ -1811,7 +1796,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) { int node = blk_mq_hw_queue_to_node(map, i); - hctxs[i] = set->ops->alloc_hctx(set, i, node); + hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx), + GFP_KERNEL, node); if (!hctxs[i]) goto err_hctxs; @@ -1898,7 +1884,7 @@ err_hctxs: if (!hctxs[i]) break; free_cpumask_var(hctxs[i]->cpumask); - set->ops->free_hctx(hctxs[i], i); + kfree(hctxs[i]); } err_map: kfree(hctxs); @@ -1983,9 +1969,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) return -EINVAL; - if (!set->nr_hw_queues || - !set->ops->queue_rq || !set->ops->map_queue || - !set->ops->alloc_hctx || !set->ops->free_hctx) + if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) return -EINVAL; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 4d33c8c25fbf..b40af63a5476 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -321,18 +321,6 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) return BLK_MQ_RQ_QUEUE_OK; } -static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_tag_set *set, - unsigned int hctx_index, - int node) -{ - return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, node); -} - -static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) -{ - kfree(hctx); -} - static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) { BUG_ON(!nullb); @@ -360,17 +348,6 @@ static struct blk_mq_ops null_mq_ops = { .map_queue = blk_mq_map_queue, .init_hctx = null_init_hctx, .complete = null_softirq_done_fn, - .alloc_hctx = blk_mq_alloc_single_hw_queue, - .free_hctx = blk_mq_free_single_hw_queue, -}; - -static struct blk_mq_ops null_mq_ops_pernode = { - .queue_rq = null_queue_rq, - .map_queue = blk_mq_map_queue, - .init_hctx = null_init_hctx, - .complete = null_softirq_done_fn, - .alloc_hctx = null_alloc_hctx, - .free_hctx = null_free_hctx, }; static void null_del_dev(struct nullb *nullb) @@ -496,10 +473,7 @@ static int null_add_dev(void) goto out_free_nullb; if (queue_mode == NULL_Q_MQ) { - if (use_per_node_hctx) - nullb->tag_set.ops = &null_mq_ops_pernode; - else - nullb->tag_set.ops = &null_mq_ops; + nullb->tag_set.ops = &null_mq_ops; nullb->tag_set.nr_hw_queues = submit_queues; nullb->tag_set.queue_depth = hw_queue_depth; nullb->tag_set.numa_node = home_node; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7a51f065edcd..16c21c0cb14d 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -497,8 +497,6 @@ static int virtblk_init_request(void *data, struct request *rq, static struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .map_queue = blk_mq_map_queue, - .alloc_hctx = blk_mq_alloc_single_hw_queue, - .free_hctx = blk_mq_free_single_hw_queue, .complete = virtblk_request_done, .init_request = virtblk_init_request, }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2bd82f399128..91dfb75ce39f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -79,9 +79,6 @@ struct blk_mq_tag_set { typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); -typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *, - unsigned int, int); -typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_request_fn)(void *, struct request *, unsigned int, @@ -107,12 +104,6 @@ struct blk_mq_ops { softirq_done_fn *complete; - /* - * Override for hctx allocations (should probably go) - */ - alloc_hctx_fn *alloc_hctx; - free_hctx_fn *free_hctx; - /* * Called when the block layer side of a hardware queue has been * set up, allowing the driver to allocate/init matching structures. @@ -166,7 +157,6 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); -void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); void blk_mq_end_io(struct request *rq, int error); void __blk_mq_end_io(struct request *rq, int error); -- cgit v1.2.3