summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig5
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-mq-debugfs.c3
-rw-r--r--block/blk-mq-rdma.c52
-rw-r--r--block/blk-throttle.c18
-rw-r--r--block/bsg-lib.c74
-rw-r--r--block/compat_ioctl.c2
7 files changed, 120 insertions, 35 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 89cd28f8d051..3ab42bbb06d5 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -206,4 +206,9 @@ config BLK_MQ_VIRTIO
depends on BLOCK && VIRTIO
default y
+config BLK_MQ_RDMA
+ bool
+ depends on BLOCK && INFINIBAND
+ default y
+
source block/Kconfig.iosched
diff --git a/block/Makefile b/block/Makefile
index 2b281cf258a0..9396ebc85d24 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
+obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 9ebc2945f991..4f927a58dff8 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -75,6 +75,8 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(STATS),
QUEUE_FLAG_NAME(POLL_STATS),
QUEUE_FLAG_NAME(REGISTERED),
+ QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
+ QUEUE_FLAG_NAME(QUIESCED),
};
#undef QUEUE_FLAG_NAME
@@ -265,6 +267,7 @@ static const char *const cmd_flag_name[] = {
CMD_FLAG_NAME(RAHEAD),
CMD_FLAG_NAME(BACKGROUND),
CMD_FLAG_NAME(NOUNMAP),
+ CMD_FLAG_NAME(NOWAIT),
};
#undef CMD_FLAG_NAME
diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c
new file mode 100644
index 000000000000..996167f1de18
--- /dev/null
+++ b/block/blk-mq-rdma.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 Sagi Grimberg.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/blk-mq.h>
+#include <linux/blk-mq-rdma.h>
+#include <rdma/ib_verbs.h>
+
+/**
+ * blk_mq_rdma_map_queues - provide a default queue mapping for rdma device
+ * @set: tagset to provide the mapping for
+ * @dev: rdma device associated with @set.
+ * @first_vec: first interrupt vectors to use for queues (usually 0)
+ *
+ * This function assumes the rdma device @dev has at least as many available
+ * interrupt vetors as @set has queues. It will then query it's affinity mask
+ * and built queue mapping that maps a queue to the CPUs that have irq affinity
+ * for the corresponding vector.
+ *
+ * In case either the driver passed a @dev with less vectors than
+ * @set->nr_hw_queues, or @dev does not provide an affinity mask for a
+ * vector, we fallback to the naive mapping.
+ */
+int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set,
+ struct ib_device *dev, int first_vec)
+{
+ const struct cpumask *mask;
+ unsigned int queue, cpu;
+
+ for (queue = 0; queue < set->nr_hw_queues; queue++) {
+ mask = ib_get_vector_affinity(dev, first_vec + queue);
+ if (!mask)
+ goto fallback;
+
+ for_each_cpu(cpu, mask)
+ set->mq_map[cpu] = queue;
+ }
+
+ return 0;
+
+fallback:
+ return blk_mq_map_queues(set);
+}
+EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a7285bf2831c..80f5481fe9f6 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -382,6 +382,14 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
} \
} while (0)
+static inline unsigned int throtl_bio_data_size(struct bio *bio)
+{
+ /* assume it's one sector */
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
+ return 512;
+ return bio->bi_iter.bi_size;
+}
+
static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
{
INIT_LIST_HEAD(&qn->node);
@@ -934,6 +942,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
bool rw = bio_data_dir(bio);
u64 bytes_allowed, extra_bytes, tmp;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
+ unsigned int bio_size = throtl_bio_data_size(bio);
jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
@@ -947,14 +956,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
do_div(tmp, HZ);
bytes_allowed = tmp;
- if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
+ if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
if (wait)
*wait = 0;
return true;
}
/* Calc approx time to dispatch */
- extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
+ extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw));
if (!jiffy_wait)
@@ -1034,11 +1043,12 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
+ unsigned int bio_size = throtl_bio_data_size(bio);
/* Charge the bio to the group */
- tg->bytes_disp[rw] += bio->bi_iter.bi_size;
+ tg->bytes_disp[rw] += bio_size;
tg->io_disp[rw]++;
- tg->last_bytes_disp[rw] += bio->bi_iter.bi_size;
+ tg->last_bytes_disp[rw] += bio_size;
tg->last_io_disp[rw]++;
/*
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c4513b23f57a..dd56d7460cb9 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -29,26 +29,25 @@
#include <scsi/scsi_cmnd.h>
/**
- * bsg_destroy_job - routine to teardown/delete a bsg job
+ * bsg_teardown_job - routine to teardown a bsg job
* @job: bsg_job that is to be torn down
*/
-static void bsg_destroy_job(struct kref *kref)
+static void bsg_teardown_job(struct kref *kref)
{
struct bsg_job *job = container_of(kref, struct bsg_job, kref);
struct request *rq = job->req;
- blk_end_request_all(rq, BLK_STS_OK);
-
put_device(job->dev); /* release reference for the request */
kfree(job->request_payload.sg_list);
kfree(job->reply_payload.sg_list);
- kfree(job);
+
+ blk_end_request_all(rq, BLK_STS_OK);
}
void bsg_job_put(struct bsg_job *job)
{
- kref_put(&job->kref, bsg_destroy_job);
+ kref_put(&job->kref, bsg_teardown_job);
}
EXPORT_SYMBOL_GPL(bsg_job_put);
@@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
*/
static void bsg_softirq_done(struct request *rq)
{
- struct bsg_job *job = rq->special;
+ struct bsg_job *job = blk_mq_rq_to_pdu(rq);
bsg_job_put(job);
}
@@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
}
/**
- * bsg_create_job - create the bsg_job structure for the bsg request
+ * bsg_prepare_job - create the bsg_job structure for the bsg request
* @dev: device that is being sent the bsg request
* @req: BSG request that needs a job structure
*/
-static int bsg_create_job(struct device *dev, struct request *req)
+static int bsg_prepare_job(struct device *dev, struct request *req)
{
struct request *rsp = req->next_rq;
- struct request_queue *q = req->q;
struct scsi_request *rq = scsi_req(req);
- struct bsg_job *job;
+ struct bsg_job *job = blk_mq_rq_to_pdu(req);
int ret;
- BUG_ON(req->special);
-
- job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
- if (!job)
- return -ENOMEM;
-
- req->special = job;
- job->req = req;
- if (q->bsg_job_size)
- job->dd_data = (void *)&job[1];
job->request = rq->cmd;
job->request_len = rq->cmd_len;
- job->reply = rq->sense;
- job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
- * allocated */
+
if (req->bio) {
ret = bsg_map_buffer(&job->request_payload, req);
if (ret)
@@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q)
{
struct device *dev = q->queuedata;
struct request *req;
- struct bsg_job *job;
int ret;
if (!get_device(dev))
@@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q)
break;
spin_unlock_irq(q->queue_lock);
- ret = bsg_create_job(dev, req);
+ ret = bsg_prepare_job(dev, req);
if (ret) {
scsi_req(req)->result = ret;
blk_end_request_all(req, BLK_STS_OK);
@@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q)
continue;
}
- job = req->special;
- ret = q->bsg_job_fn(job);
+ ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req));
spin_lock_irq(q->queue_lock);
if (ret)
break;
@@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q)
spin_lock_irq(q->queue_lock);
}
+static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
+{
+ struct bsg_job *job = blk_mq_rq_to_pdu(req);
+ struct scsi_request *sreq = &job->sreq;
+
+ memset(job, 0, sizeof(*job));
+
+ scsi_req_init(sreq);
+ sreq->sense_len = SCSI_SENSE_BUFFERSIZE;
+ sreq->sense = kzalloc(sreq->sense_len, gfp);
+ if (!sreq->sense)
+ return -ENOMEM;
+
+ job->req = req;
+ job->reply = sreq->sense;
+ job->reply_len = sreq->sense_len;
+ job->dd_data = job + 1;
+
+ return 0;
+}
+
+static void bsg_exit_rq(struct request_queue *q, struct request *req)
+{
+ struct bsg_job *job = blk_mq_rq_to_pdu(req);
+ struct scsi_request *sreq = &job->sreq;
+
+ kfree(sreq->sense);
+}
+
/**
* bsg_setup_queue - Create and add the bsg hooks so we can receive requests
* @dev: device to attach bsg device to
@@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return ERR_PTR(-ENOMEM);
- q->cmd_size = sizeof(struct scsi_request);
+ q->cmd_size = sizeof(struct bsg_job) + dd_job_size;
+ q->init_rq_fn = bsg_init_rq;
+ q->exit_rq_fn = bsg_exit_rq;
q->request_fn = bsg_request_fn;
ret = blk_init_allocated_queue(q);
@@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
goto out_cleanup_queue;
q->queuedata = dev;
- q->bsg_job_size = dd_job_size;
q->bsg_job_fn = job_fn;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 38554c2ea38a..abaf9d78a206 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -79,7 +79,7 @@ static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev,
static int compat_hdio_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- unsigned long *__user p;
+ unsigned long __user *p;
int error;
p = compat_alloc_user_space(sizeof(unsigned long));