From 1b8d26206134458044b0689f48194af00c96d406 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 17 May 2012 23:56:56 -0500 Subject: [SCSI] add new SDEV_TRANSPORT_OFFLINE state This patch adds a new state SDEV_TRANSPORT_OFFLINE. It will be used by transport classes to offline devices for cases like when the fast_io_fail/recovery_tmo fires. In those cases we want all IO to fail, and we have not yet escalated to dev_loss_tmo behavior where we are removing the devices. Currently to handle this state, transport classes are setting the scsi_device's state to running, setting their internal session/port structs state to something that indicates failed, and then failing IO from some transport check in the queuecommand. The reason for the new value is so that users can distinguish between a device failure that is a result of a transport problem vs the wide range of errors that devices get offlined for when a scsi command times out and we offline the devices there. It also fixes the confusion as to why the transport class is failing IO, but has set the device state from blocked to running. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_sysfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/scsi/scsi_sysfs.c') diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 04c2a278076e..5747478a2bf8 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -35,6 +35,7 @@ static const struct { { SDEV_DEL, "deleted" }, { SDEV_QUIESCE, "quiesce" }, { SDEV_OFFLINE, "offline" }, + { SDEV_TRANSPORT_OFFLINE, "transport-offline" }, { SDEV_BLOCK, "blocked" }, { SDEV_CREATED_BLOCK, "created-blocked" }, }; -- cgit v1.2.3 From 67bd94130015c507011af37858989b199c52e1de Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 29 Jun 2012 15:33:22 +0000 Subject: [SCSI] Fix device removal NULL pointer dereference Use blk_queue_dead() to test whether the queue is dead instead of !sdev. Since scsi_prep_fn() may be invoked concurrently with __scsi_remove_device(), keep the queuedata (sdev) pointer in __scsi_remove_device(). This patch fixes a kernel oops that can be triggered by USB device removal. See also http://www.spinics.net/lists/linux-scsi/msg56254.html. Other changes included in this patch: - Swap the blk_cleanup_queue() and kfree() calls in scsi_host_dev_release() to make that code easier to grasp. - Remove the queue dead check from scsi_run_queue() since the queue state can change anyway at any point in that function where the queue lock is not held. - Remove the queue dead check from the start of scsi_request_fn() since it is redundant with the scsi_device_online() check. Reported-by: Jun'ichi Nomura Signed-off-by: Bart Van Assche Reviewed-by: Mike Christie Reviewed-by: Tejun Heo Cc: Signed-off-by: James Bottomley --- drivers/scsi/hosts.c | 7 ++++--- drivers/scsi/scsi_lib.c | 32 ++++---------------------------- drivers/scsi/scsi_priv.h | 1 - drivers/scsi/scsi_sysfs.c | 5 +---- 4 files changed, 9 insertions(+), 36 deletions(-) (limited to 'drivers/scsi/scsi_sysfs.c') diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 2b6a03de5787..593085a52275 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -290,6 +290,7 @@ static void scsi_host_dev_release(struct device *dev) struct Scsi_Host *shost = dev_to_shost(dev); struct device *parent = dev->parent; struct request_queue *q; + void *queuedata; scsi_proc_hostdir_rm(shost->hostt); @@ -299,9 +300,9 @@ static void scsi_host_dev_release(struct device *dev) destroy_workqueue(shost->work_q); q = shost->uspace_req_q; if (q) { - kfree(q->queuedata); - q->queuedata = NULL; - scsi_free_queue(q); + queuedata = q->queuedata; + blk_cleanup_queue(q); + kfree(queuedata); } scsi_destroy_command_freelist(shost); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9f00c128e4d1..4acf5c284c2e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -406,10 +406,6 @@ static void scsi_run_queue(struct request_queue *q) LIST_HEAD(starved_list); unsigned long flags; - /* if the device is dead, sdev will be NULL, so no queue to run */ - if (!sdev) - return; - shost = sdev->host; if (scsi_target(sdev)->single_lun) scsi_single_lun_run(sdev); @@ -1371,16 +1367,16 @@ static inline int scsi_host_queue_ready(struct request_queue *q, * may be changed after request stacking drivers call the function, * regardless of taking lock or not. * - * When scsi can't dispatch I/Os anymore and needs to kill I/Os - * (e.g. !sdev), scsi needs to return 'not busy'. - * Otherwise, request stacking drivers may hold requests forever. + * When scsi can't dispatch I/Os anymore and needs to kill I/Os scsi + * needs to return 'not busy'. Otherwise, request stacking drivers + * may hold requests forever. */ static int scsi_lld_busy(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost; - if (!sdev) + if (blk_queue_dead(q)) return 0; shost = sdev->host; @@ -1491,12 +1487,6 @@ static void scsi_request_fn(struct request_queue *q) struct scsi_cmnd *cmd; struct request *req; - if (!sdev) { - while ((req = blk_peek_request(q)) != NULL) - scsi_kill_request(req, q); - return; - } - if(!get_device(&sdev->sdev_gendev)) /* We must be tearing the block queue down already */ return; @@ -1698,20 +1688,6 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) return q; } -void scsi_free_queue(struct request_queue *q) -{ - unsigned long flags; - - WARN_ON(q->queuedata); - - /* cause scsi_request_fn() to kill all non-finished requests */ - spin_lock_irqsave(q->queue_lock, flags); - q->request_fn(q); - spin_unlock_irqrestore(q->queue_lock, flags); - - blk_cleanup_queue(q); -} - /* * Function: scsi_block_requests() * diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index cbfe5df24a3b..291db6effffb 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -85,7 +85,6 @@ extern void scsi_next_command(struct scsi_cmnd *cmd); extern void scsi_io_completion(struct scsi_cmnd *, unsigned int); extern void scsi_run_host_queues(struct Scsi_Host *shost); extern struct request_queue *scsi_alloc_queue(struct scsi_device *sdev); -extern void scsi_free_queue(struct request_queue *q); extern int scsi_init_queue(void); extern void scsi_exit_queue(void); struct request_queue; diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 5747478a2bf8..9aa578a5da11 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -972,11 +972,8 @@ void __scsi_remove_device(struct scsi_device *sdev) sdev->host->hostt->slave_destroy(sdev); transport_destroy_device(dev); - /* cause the request function to reject all I/O requests */ - sdev->request_queue->queuedata = NULL; - /* Freeing the queue signals to block that we're done */ - scsi_free_queue(sdev->request_queue); + blk_cleanup_queue(sdev->request_queue); put_device(dev); } -- cgit v1.2.3 From b485462aca7df4e32bcf7efb6f84a69e8b640243 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 29 Jun 2012 15:36:07 +0000 Subject: [SCSI] Stop accepting SCSI requests before removing a device Avoid that the code for requeueing SCSI requests triggers a crash by making sure that that code isn't scheduled anymore after a device has been removed. Also, source code inspection of __scsi_remove_device() revealed a race condition in this function: no new SCSI requests must be accepted for a SCSI device after device removal started. Signed-off-by: Bart Van Assche Reviewed-by: Mike Christie Acked-by: Tejun Heo Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 7 ++++--- drivers/scsi/scsi_sysfs.c | 11 +++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/scsi/scsi_sysfs.c') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 8af6a6497c4f..b58327758c58 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -155,13 +155,14 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) /* * Requeue this command. It will go before all other commands - * that are already in the queue. + * that are already in the queue. Schedule requeue work under + * lock such that the kblockd_schedule_work() call happens + * before blk_cleanup_queue() finishes. */ spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, cmd->request); - spin_unlock_irqrestore(q->queue_lock, flags); - kblockd_schedule_work(q, &device->requeue_work); + spin_unlock_irqrestore(q->queue_lock, flags); } /* diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 9aa578a5da11..d19d7e99626d 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -967,13 +967,20 @@ void __scsi_remove_device(struct scsi_device *sdev) device_del(dev); } else put_device(&sdev->sdev_dev); + + /* + * Stop accepting new requests and wait until all queuecommand() and + * scsi_run_queue() invocations have finished before tearing down the + * device. + */ scsi_device_set_state(sdev, SDEV_DEL); + blk_cleanup_queue(sdev->request_queue); + cancel_work_sync(&sdev->requeue_work); + if (sdev->host->hostt->slave_destroy) sdev->host->hostt->slave_destroy(sdev); transport_destroy_device(dev); - /* Freeing the queue signals to block that we're done */ - blk_cleanup_queue(sdev->request_queue); put_device(dev); } -- cgit v1.2.3 From 3b661a92e869ebe2358de8f4b3230ad84f7fce51 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Jun 2012 23:47:28 -0700 Subject: [SCSI] fix hot unplug vs async scan race The following crash results from cases where the end_device has been removed before scsi_sysfs_add_sdev has had a chance to run. BUG: unable to handle kernel NULL pointer dereference at 0000000000000098 IP: [] sysfs_create_dir+0x32/0xb6 ... Call Trace: [] kobject_add_internal+0x120/0x1e3 [] ? trace_hardirqs_on+0xd/0xf [] kobject_add_varg+0x41/0x50 [] kobject_add+0x64/0x66 [] device_add+0x12d/0x63a [] ? _raw_spin_unlock_irqrestore+0x47/0x56 [] ? module_refcount+0x89/0xa0 [] scsi_sysfs_add_sdev+0x4e/0x28a [] do_scan_async+0x9c/0x145 ...teach scsi_sysfs_add_devices() to check for deleted devices() before trying to add them, and teach scsi_remove_target() how to remove targets that have not been added via device_add(). Cc: Reported-by: Dariusz Majchrzak Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 3 +++ drivers/scsi/scsi_sysfs.c | 41 ++++++++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 15 deletions(-) (limited to 'drivers/scsi/scsi_sysfs.c') diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 2e5fe584aad3..f55e5f166973 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1717,6 +1717,9 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost) { struct scsi_device *sdev; shost_for_each_device(sdev, shost) { + /* target removed before the device could be added */ + if (sdev->sdev_state == SDEV_DEL) + continue; if (!scsi_host_scan_allowed(shost) || scsi_sysfs_add_sdev(sdev) != 0) __scsi_remove_device(sdev); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index d19d7e99626d..093d4f6a54d2 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1005,7 +1005,6 @@ static void __scsi_remove_target(struct scsi_target *starget) struct scsi_device *sdev; spin_lock_irqsave(shost->host_lock, flags); - starget->reap_ref++; restart: list_for_each_entry(sdev, &shost->__devices, siblings) { if (sdev->channel != starget->channel || @@ -1019,14 +1018,6 @@ static void __scsi_remove_target(struct scsi_target *starget) goto restart; } spin_unlock_irqrestore(shost->host_lock, flags); - scsi_target_reap(starget); -} - -static int __remove_child (struct device * dev, void * data) -{ - if (scsi_is_target_device(dev)) - __scsi_remove_target(to_scsi_target(dev)); - return 0; } /** @@ -1039,14 +1030,34 @@ static int __remove_child (struct device * dev, void * data) */ void scsi_remove_target(struct device *dev) { - if (scsi_is_target_device(dev)) { - __scsi_remove_target(to_scsi_target(dev)); - return; + struct Scsi_Host *shost = dev_to_shost(dev->parent); + struct scsi_target *starget, *found; + unsigned long flags; + + restart: + found = NULL; + spin_lock_irqsave(shost->host_lock, flags); + list_for_each_entry(starget, &shost->__targets, siblings) { + if (starget->state == STARGET_DEL) + continue; + if (starget->dev.parent == dev || &starget->dev == dev) { + found = starget; + found->reap_ref++; + break; + } } + spin_unlock_irqrestore(shost->host_lock, flags); - get_device(dev); - device_for_each_child(dev, NULL, __remove_child); - put_device(dev); + if (found) { + __scsi_remove_target(found); + scsi_target_reap(found); + /* in the case where @dev has multiple starget children, + * continue removing. + * + * FIXME: does such a case exist? + */ + goto restart; + } } EXPORT_SYMBOL(scsi_remove_target); -- cgit v1.2.3