summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorBart Van Assche <bart.vanassche@wdc.com>2017-08-17 13:13:26 -0700
committerJens Axboe <axboe@kernel.dk>2017-08-18 08:45:29 -0600
commitf18c17c889e2d3a9fa079ca883534b41d9dd3155 (patch)
tree5fab9bf17bbd2d62a1a897fbe162be3c086571b6 /drivers
parent32494df9a5ae9aaf8bed23068e6ec1aeb3196ebf (diff)
downloadlinux-f18c17c889e2d3a9fa079ca883534b41d9dd3155.tar.bz2
skd: Enable request tags for the block layer queue
Use the request tag when allocating a skd_fitmsg_context or skd_request_context such that the lists used to track free elements can be eliminated. Swap the skd_end_request() and skd_release_req() calls to avoid triggering a use-after-free. Remove skd_fitmsg_context.state and .outstanding because FIT messages are shared among requests and because updating a FIT message after a request has finished whould trigger a use-after-free. Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/skd_main.c267
1 files changed, 73 insertions, 194 deletions
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 392c898d86e2..35343fbf4144 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/compiler.h>
@@ -154,11 +155,6 @@ enum skd_req_state {
SKD_REQ_STATE_TIMEOUT,
};
-enum skd_fit_msg_state {
- SKD_MSG_STATE_IDLE,
- SKD_MSG_STATE_BUSY,
-};
-
enum skd_check_status_action {
SKD_CHECK_STATUS_REPORT_GOOD,
SKD_CHECK_STATUS_REPORT_SMART_ALERT,
@@ -173,12 +169,7 @@ struct skd_msg_buf {
};
struct skd_fitmsg_context {
- enum skd_fit_msg_state state;
-
- struct skd_fitmsg_context *next;
-
u32 id;
- u16 outstanding;
u32 length;
@@ -189,8 +180,6 @@ struct skd_fitmsg_context {
struct skd_request_context {
enum skd_req_state state;
- struct skd_request_context *next;
-
u16 id;
u32 fitmsg_id;
@@ -264,10 +253,8 @@ struct skd_device {
u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
u32 timeout_stamp;
- struct skd_fitmsg_context *skmsg_free_list;
struct skd_fitmsg_context *skmsg_table;
- struct skd_request_context *skreq_free_list;
struct skd_request_context *skreq_table;
struct skd_special_context internal_skspcl;
@@ -387,8 +374,8 @@ static void skd_send_fitmsg(struct skd_device *skdev,
static void skd_send_special_fitmsg(struct skd_device *skdev,
struct skd_special_context *skspcl);
static void skd_request_fn(struct request_queue *rq);
-static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, blk_status_t status);
+static void skd_end_request(struct skd_device *skdev, struct request *req,
+ blk_status_t status);
static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
static void skd_postop_sg_list(struct skd_device *skdev,
@@ -405,8 +392,6 @@ static void skd_soft_reset(struct skd_device *skdev);
const char *skd_drive_state_to_str(int state);
const char *skd_skdev_state_to_str(enum skd_drvr_state state);
static void skd_log_skdev(struct skd_device *skdev, const char *event);
-static void skd_log_skmsg(struct skd_device *skdev,
- struct skd_fitmsg_context *skmsg, const char *event);
static void skd_log_skreq(struct skd_device *skdev,
struct skd_request_context *skreq, const char *event);
@@ -424,7 +409,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
req = blk_peek_request(q);
if (req == NULL)
break;
- blk_start_request(req);
+ WARN_ON_ONCE(blk_queue_start_tag(q, req));
__blk_end_request_all(req, BLK_STS_IOERR);
}
}
@@ -523,6 +508,7 @@ static void skd_request_fn(struct request_queue *q)
u64 cmdctxt;
u32 timo_slot;
int flush, fua;
+ u32 tag;
if (skdev->state != SKD_DRVR_STATE_ONLINE) {
if (skd_fail_all(q))
@@ -531,9 +517,7 @@ static void skd_request_fn(struct request_queue *q)
}
if (blk_queue_stopped(skdev->queue)) {
- if (skdev->skmsg_free_list == NULL ||
- skdev->skreq_free_list == NULL ||
- skdev->in_flight >= skdev->queue_low_water_mark)
+ if (skdev->in_flight >= skdev->queue_low_water_mark)
/* There is still some kind of shortage */
return;
@@ -581,27 +565,6 @@ static void skd_request_fn(struct request_queue *q)
break;
}
- /* Is a skd_request_context available? */
- skreq = skdev->skreq_free_list;
- if (skreq == NULL) {
- dev_dbg(&skdev->pdev->dev, "Out of req=%p\n", q);
- break;
- }
- SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
- SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
-
- /* Now we check to see if we can get a fit msg */
- if (skmsg == NULL) {
- if (skdev->skmsg_free_list == NULL) {
- dev_dbg(&skdev->pdev->dev, "Out of msg\n");
- break;
- }
- }
-
- skreq->flush_cmd = 0;
- skreq->n_sg = 0;
- skreq->sg_byte_count = 0;
-
/*
* OK to now dequeue request from q.
*
@@ -609,7 +572,22 @@ static void skd_request_fn(struct request_queue *q)
* the native request. Note that skd_request_context is
* available but is still at the head of the free list.
*/
- blk_start_request(req);
+ WARN_ON_ONCE(blk_queue_start_tag(q, req));
+
+ tag = blk_mq_unique_tag(req);
+ WARN_ONCE(tag >= skd_max_queue_depth,
+ "%#x > %#x (nr_requests = %lu)\n", tag,
+ skd_max_queue_depth, q->nr_requests);
+
+ skreq = &skdev->skreq_table[tag];
+ SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
+ SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
+
+ skreq->id = tag + SKD_ID_RW_REQUEST;
+ skreq->flush_cmd = 0;
+ skreq->n_sg = 0;
+ skreq->sg_byte_count = 0;
+
skreq->req = req;
skreq->fitmsg_id = 0;
@@ -618,27 +596,13 @@ static void skd_request_fn(struct request_queue *q)
if (req->bio && !skd_preop_sg_list(skdev, skreq)) {
dev_dbg(&skdev->pdev->dev, "error Out\n");
- skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
+ skd_end_request(skdev, skreq->req, BLK_STS_RESOURCE);
continue;
}
/* Either a FIT msg is in progress or we have to start one. */
if (skmsg == NULL) {
- /* Are there any FIT msg buffers available? */
- skmsg = skdev->skmsg_free_list;
- if (skmsg == NULL) {
- dev_dbg(&skdev->pdev->dev,
- "Out of msg skdev=%p\n",
- skdev);
- break;
- }
- SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
- SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
-
- skdev->skmsg_free_list = skmsg->next;
-
- skmsg->state = SKD_MSG_STATE_BUSY;
- skmsg->id += SKD_ID_INCR;
+ skmsg = &skdev->skmsg_table[tag];
/* Initialize the FIT msg header */
fmh = &skmsg->msg_buf->fmh;
@@ -673,7 +637,6 @@ static void skd_request_fn(struct request_queue *q)
cpu_to_be32(skreq->sg_byte_count);
/* Complete resource allocations. */
- skdev->skreq_free_list = skreq->next;
skreq->state = SKD_REQ_STATE_BUSY;
skreq->id += SKD_ID_INCR;
@@ -717,23 +680,22 @@ static void skd_request_fn(struct request_queue *q)
blk_stop_queue(skdev->queue);
}
-static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, blk_status_t error)
+static void skd_end_request(struct skd_device *skdev, struct request *req,
+ blk_status_t error)
{
if (unlikely(error)) {
- struct request *req = skreq->req;
char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
u32 lba = (u32)blk_rq_pos(req);
u32 count = blk_rq_sectors(req);
dev_err(&skdev->pdev->dev,
"Error cmd=%s sect=%u count=%u id=0x%x\n", cmd, lba,
- count, skreq->id);
+ count, req->tag);
} else
- dev_dbg(&skdev->pdev->dev, "id=0x%x error=%d\n", skreq->id,
+ dev_dbg(&skdev->pdev->dev, "id=0x%x error=%d\n", req->tag,
error);
- __blk_end_request_all(skreq->req, error);
+ __blk_end_request_all(req, error);
}
static bool skd_preop_sg_list(struct skd_device *skdev,
@@ -1346,7 +1308,6 @@ static void skd_send_fitmsg(struct skd_device *skdev,
struct skd_fitmsg_context *skmsg)
{
u64 qcmd;
- struct fit_msg_hdr *fmh;
dev_dbg(&skdev->pdev->dev, "dma address 0x%llx, busy=%d\n",
skmsg->mb_dma_address, skdev->in_flight);
@@ -1355,9 +1316,6 @@ static void skd_send_fitmsg(struct skd_device *skdev,
qcmd = skmsg->mb_dma_address;
qcmd |= FIT_QCMD_QID_NORMAL;
- fmh = &skmsg->msg_buf->fmh;
- skmsg->outstanding = fmh->num_protocol_cmds_coalesced;
-
if (unlikely(skdev->dbg_level > 1)) {
u8 *bp = (u8 *)skmsg->msg_buf;
int i;
@@ -1547,19 +1505,20 @@ skd_check_status(struct skd_device *skdev,
}
static void skd_resolve_req_exception(struct skd_device *skdev,
- struct skd_request_context *skreq)
+ struct skd_request_context *skreq,
+ struct request *req)
{
u8 cmp_status = skreq->completion.status;
switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
case SKD_CHECK_STATUS_REPORT_GOOD:
case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
- skd_end_request(skdev, skreq, BLK_STS_OK);
+ skd_end_request(skdev, req, BLK_STS_OK);
break;
case SKD_CHECK_STATUS_BUSY_IMMINENT:
skd_log_skreq(skdev, skreq, "retry(busy)");
- blk_requeue_request(skdev->queue, skreq->req);
+ blk_requeue_request(skdev->queue, req);
dev_info(&skdev->pdev->dev, "drive BUSY imminent\n");
skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT;
skdev->timer_countdown = SKD_TIMER_MINUTES(20);
@@ -1567,16 +1526,16 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
break;
case SKD_CHECK_STATUS_REQUEUE_REQUEST:
- if ((unsigned long) ++skreq->req->special < SKD_MAX_RETRIES) {
+ if ((unsigned long) ++req->special < SKD_MAX_RETRIES) {
skd_log_skreq(skdev, skreq, "retry");
- blk_requeue_request(skdev->queue, skreq->req);
+ blk_requeue_request(skdev->queue, req);
break;
}
/* fall through */
case SKD_CHECK_STATUS_REPORT_ERROR:
default:
- skd_end_request(skdev, skreq, BLK_STS_IOERR);
+ skd_end_request(skdev, req, BLK_STS_IOERR);
break;
}
}
@@ -1585,45 +1544,9 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
static void skd_release_skreq(struct skd_device *skdev,
struct skd_request_context *skreq)
{
- u32 msg_slot;
- struct skd_fitmsg_context *skmsg;
-
u32 timo_slot;
/*
- * Reclaim the FIT msg buffer if this is
- * the first of the requests it carried to
- * be completed. The FIT msg buffer used to
- * send this request cannot be reused until
- * we are sure the s1120 card has copied
- * it to its memory. The FIT msg might have
- * contained several requests. As soon as
- * any of them are completed we know that
- * the entire FIT msg was transferred.
- * Only the first completed request will
- * match the FIT msg buffer id. The FIT
- * msg buffer id is immediately updated.
- * When subsequent requests complete the FIT
- * msg buffer id won't match, so we know
- * quite cheaply that it is already done.
- */
- msg_slot = skreq->fitmsg_id & SKD_ID_SLOT_MASK;
- SKD_ASSERT(msg_slot < skdev->num_fitmsg_context);
-
- skmsg = &skdev->skmsg_table[msg_slot];
- if (skmsg->id == skreq->fitmsg_id) {
- SKD_ASSERT(skmsg->state == SKD_MSG_STATE_BUSY);
- SKD_ASSERT(skmsg->outstanding > 0);
- skmsg->outstanding--;
- if (skmsg->outstanding == 0) {
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- skmsg->next = skdev->skmsg_free_list;
- skdev->skmsg_free_list = skmsg;
- }
- }
-
- /*
* Decrease the number of active requests.
* Also decrements the count in the timeout slot.
*/
@@ -1644,8 +1567,20 @@ static void skd_release_skreq(struct skd_device *skdev,
*/
skreq->state = SKD_REQ_STATE_IDLE;
skreq->id += SKD_ID_INCR;
- skreq->next = skdev->skreq_free_list;
- skdev->skreq_free_list = skreq;
+}
+
+static struct skd_request_context *skd_skreq_from_rq(struct skd_device *skdev,
+ struct request *rq)
+{
+ struct skd_request_context *skreq;
+ int i;
+
+ for (i = 0, skreq = skdev->skreq_table; i < skdev->num_fitmsg_context;
+ i++, skreq++)
+ if (skreq->req == rq)
+ return skreq;
+
+ return NULL;
}
static int skd_isr_completion_posted(struct skd_device *skdev,
@@ -1654,7 +1589,8 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
struct fit_completion_entry_v1 *skcmp;
struct fit_comp_error_info *skerr;
u16 req_id;
- u32 req_slot;
+ u32 tag;
+ struct request *rq;
struct skd_request_context *skreq;
u16 cmp_cntxt;
u8 cmp_status;
@@ -1702,18 +1638,24 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
* r/w request (see skd_start() above) or a special request.
*/
req_id = cmp_cntxt;
- req_slot = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
+ tag = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
/* Is this other than a r/w request? */
- if (req_slot >= skdev->num_req_context) {
+ if (tag >= skdev->num_req_context) {
/*
* This is not a completion for a r/w request.
*/
+ WARN_ON_ONCE(blk_map_queue_find_tag(skdev->queue->
+ queue_tags, tag));
skd_complete_other(skdev, skcmp, skerr);
continue;
}
- skreq = &skdev->skreq_table[req_slot];
+ rq = blk_map_queue_find_tag(skdev->queue->queue_tags, tag);
+ if (WARN(!rq, "No request for tag %#x -> %#x\n", cmp_cntxt,
+ tag))
+ continue;
+ skreq = skd_skreq_from_rq(skdev, rq);
/*
* Make sure the request ID for the slot matches.
@@ -1745,26 +1687,16 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
if (skreq->n_sg > 0)
skd_postop_sg_list(skdev, skreq);
- if (!skreq->req) {
- dev_dbg(&skdev->pdev->dev,
- "NULL backptr skdreq %p, req=0x%x req_id=0x%x\n",
- skreq, skreq->id, req_id);
- } else {
- /*
- * Capture the outcome and post it back to the
- * native request.
- */
- if (likely(cmp_status == SAM_STAT_GOOD))
- skd_end_request(skdev, skreq, BLK_STS_OK);
- else
- skd_resolve_req_exception(skdev, skreq);
- }
+ /* Mark the FIT msg and timeout slot as free. */
+ skd_release_skreq(skdev, skreq);
/*
- * Release the skreq, its FIT msg (if one), timeout slot,
- * and queue depth.
+ * Capture the outcome and post it back to the native request.
*/
- skd_release_skreq(skdev, skreq);
+ if (likely(cmp_status == SAM_STAT_GOOD))
+ skd_end_request(skdev, rq, BLK_STS_OK);
+ else
+ skd_resolve_req_exception(skdev, skreq, rq);
/* skd_isr_comp_limit equal zero means no limit */
if (limit) {
@@ -2099,44 +2031,26 @@ static void skd_recover_requests(struct skd_device *skdev)
for (i = 0; i < skdev->num_req_context; i++) {
struct skd_request_context *skreq = &skdev->skreq_table[i];
+ struct request *req = skreq->req;
if (skreq->state == SKD_REQ_STATE_BUSY) {
skd_log_skreq(skdev, skreq, "recover");
SKD_ASSERT((skreq->id & SKD_ID_INCR) != 0);
- SKD_ASSERT(skreq->req != NULL);
+ SKD_ASSERT(req != NULL);
/* Release DMA resources for the request. */
if (skreq->n_sg > 0)
skd_postop_sg_list(skdev, skreq);
- skd_end_request(skdev, skreq, BLK_STS_IOERR);
-
skreq->req = NULL;
skreq->state = SKD_REQ_STATE_IDLE;
skreq->id += SKD_ID_INCR;
- }
- if (i > 0)
- skreq[-1].next = skreq;
- skreq->next = NULL;
- }
- skdev->skreq_free_list = skdev->skreq_table;
-
- for (i = 0; i < skdev->num_fitmsg_context; i++) {
- struct skd_fitmsg_context *skmsg = &skdev->skmsg_table[i];
- if (skmsg->state == SKD_MSG_STATE_BUSY) {
- skd_log_skmsg(skdev, skmsg, "salvaged");
- SKD_ASSERT((skmsg->id & SKD_ID_INCR) != 0);
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
+ skd_end_request(skdev, req, BLK_STS_IOERR);
}
- if (i > 0)
- skmsg[-1].next = skmsg;
- skmsg->next = NULL;
}
- skdev->skmsg_free_list = skdev->skmsg_table;
for (i = 0; i < SKD_N_TIMEOUT_SLOT; i++)
skdev->timeout_slot[i] = 0;
@@ -2876,7 +2790,6 @@ static int skd_cons_skmsg(struct skd_device *skdev)
skmsg->id = i + SKD_ID_FIT_MSG;
- skmsg->state = SKD_MSG_STATE_IDLE;
skmsg->msg_buf = pci_alloc_consistent(skdev->pdev,
SKD_N_FITMSG_BYTES,
&skmsg->mb_dma_address);
@@ -2891,14 +2804,8 @@ static int skd_cons_skmsg(struct skd_device *skdev)
"not aligned: msg_buf %p mb_dma_address %#llx\n",
skmsg->msg_buf, skmsg->mb_dma_address);
memset(skmsg->msg_buf, 0, SKD_N_FITMSG_BYTES);
-
- skmsg->next = &skmsg[1];
}
- /* Free list is in order starting with the 0th entry. */
- skdev->skmsg_table[i - 1].next = NULL;
- skdev->skmsg_free_list = skdev->skmsg_table;
-
err_out:
return rc;
}
@@ -2958,10 +2865,7 @@ static int skd_cons_skreq(struct skd_device *skdev)
struct skd_request_context *skreq;
skreq = &skdev->skreq_table[i];
-
- skreq->id = i + SKD_ID_RW_REQUEST;
skreq->state = SKD_REQ_STATE_IDLE;
-
skreq->sg = kcalloc(skdev->sgs_per_request,
sizeof(struct scatterlist), GFP_KERNEL);
if (skreq->sg == NULL) {
@@ -2978,14 +2882,8 @@ static int skd_cons_skreq(struct skd_device *skdev)
rc = -ENOMEM;
goto err_out;
}
-
- skreq->next = &skreq[1];
}
- /* Free list is in order starting with the 0th entry. */
- skdev->skreq_table[i - 1].next = NULL;
- skdev->skreq_free_list = skdev->skreq_table;
-
err_out:
return rc;
}
@@ -3061,6 +2959,8 @@ static int skd_cons_disk(struct skd_device *skdev)
goto err_out;
}
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+ q->nr_requests = skd_max_queue_depth / 2;
+ blk_queue_init_tags(q, skd_max_queue_depth, NULL, BLK_TAG_ALLOC_FIFO);
skdev->queue = q;
disk->queue = q;
@@ -3789,18 +3689,6 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
}
}
-static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
-{
- switch (state) {
- case SKD_MSG_STATE_IDLE:
- return "IDLE";
- case SKD_MSG_STATE_BUSY:
- return "BUSY";
- default:
- return "???";
- }
-}
-
static const char *skd_skreq_state_to_str(enum skd_req_state state)
{
switch (state) {
@@ -3832,15 +3720,6 @@ static void skd_log_skdev(struct skd_device *skdev, const char *event)
skdev->timeout_stamp, skdev->skcomp_cycle, skdev->skcomp_ix);
}
-static void skd_log_skmsg(struct skd_device *skdev,
- struct skd_fitmsg_context *skmsg, const char *event)
-{
- dev_dbg(&skdev->pdev->dev, "skmsg=%p event='%s'\n", skmsg, event);
- dev_dbg(&skdev->pdev->dev, " state=%s(%d) id=0x%04x length=%d\n",
- skd_skmsg_state_to_str(skmsg->state), skmsg->state, skmsg->id,
- skmsg->length);
-}
-
static void skd_log_skreq(struct skd_device *skdev,
struct skd_request_context *skreq, const char *event)
{