summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony Battersby <tonyb@cybernetics.com>2015-02-13 12:09:44 -0500
committerJames Bottomley <JBottomley@Parallels.com>2015-02-17 06:55:32 -0800
commit7568615c1054907ea8c7701ab86dad51aa099888 (patch)
tree9a8fbf348961fd4970423b51a41ce7ae7b3e0421
parent3b524a683af8991b4eab4182b947c65f0ce1421b (diff)
downloadlinux-7568615c1054907ea8c7701ab86dad51aa099888.tar.bz2
sg: fix unkillable I/O wait deadlock with scsi-mq
When using the write()/read() interface for submitting commands, the SCSI generic driver does not call blk_put_request() on a completed SCSI command until userspace calls read() to get the command completion. Since scsi-mq uses a fixed number of preallocated requests, this makes it possible for userspace to exhaust the entire preallocated supply of requests. For places in the kernel that call blk_get_request() with GFP_KERNEL, this can cause the calling process to deadlock in a permanent unkillable I/O wait in blk_get_request() -> ... -> bt_get(). For places in the kernel that call blk_get_request() with GFP_ATOMIC, this can cause blk_get_request() always to return -EWOULDBLOCK. Note that these problems happen only if scsi-mq is enabled. Prevent the problems by calling blk_put_request() as soon as the SCSI command completes instead of waiting for userspace to call read(). Cc: <stable@vger.kernel.org> # 3.17+ Signed-off-by: Tony Battersby <tonyb@cybernetics.com> Acked-by: Douglas Gilbert <dgilbert@interlog.com> Tested-by: Douglas Gilbert <dgilbert@interlog.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/scsi/sg.c17
1 files changed, 14 insertions, 3 deletions
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 6ad1480e87b7..208bf3c8a16c 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1335,6 +1335,17 @@ sg_rq_end_io(struct request *rq, int uptodate)
}
/* Rely on write phase to clean out srp status values, so no "else" */
+ /*
+ * Free the request as soon as it is complete so that its resources
+ * can be reused without waiting for userspace to read() the
+ * result. But keep the associated bio (if any) around until
+ * blk_rq_unmap_user() can be called from user context.
+ */
+ srp->rq = NULL;
+ if (rq->cmd != rq->__cmd)
+ kfree(rq->cmd);
+ __blk_put_request(rq->q, rq);
+
write_lock_irqsave(&sfp->rq_list_lock, iflags);
if (unlikely(srp->orphan)) {
if (sfp->keep_orphan)
@@ -1762,10 +1773,10 @@ sg_finish_rem_req(Sg_request *srp)
SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp,
"sg_finish_rem_req: res_used=%d\n",
(int) srp->res_used));
- if (srp->rq) {
- if (srp->bio)
- ret = blk_rq_unmap_user(srp->bio);
+ if (srp->bio)
+ ret = blk_rq_unmap_user(srp->bio);
+ if (srp->rq) {
if (srp->rq->cmd != srp->rq->__cmd)
kfree(srp->rq->cmd);
blk_put_request(srp->rq);