From 831e3405c2a344018a18fcc2665acc5a38c3a707 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 10 Oct 2020 11:25:39 +0800 Subject: scsi: core: Don't start concurrent async scan on same host The current scanning mechanism is supposed to fall back to a synchronous host scan if an asynchronous scan is in progress. However, this rule isn't strictly respected, scsi_prep_async_scan() doesn't hold scan_mutex when checking shost->async_scan. When scsi_scan_host() is called concurrently, two async scans on same host can be started and a hang in do_scan_async() is observed. Fixes this issue by checking & setting shost->async_scan atomically with shost->scan_mutex. Link: https://lore.kernel.org/r/20201010032539.426615-1-ming.lei@redhat.com Cc: Christoph Hellwig Cc: Ewan D. Milne Cc: Hannes Reinecke Cc: Bart Van Assche Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f2437a7570ce..9af50e6f94c4 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1714,15 +1714,16 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost) */ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) { - struct async_scan_data *data; + struct async_scan_data *data = NULL; unsigned long flags; if (strncmp(scsi_scan_type, "sync", 4) == 0) return NULL; + mutex_lock(&shost->scan_mutex); if (shost->async_scan) { shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__); - return NULL; + goto err; } data = kmalloc(sizeof(*data), GFP_KERNEL); @@ -1733,7 +1734,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) goto err; init_completion(&data->prev_finished); - mutex_lock(&shost->scan_mutex); spin_lock_irqsave(shost->host_lock, flags); shost->async_scan = 1; spin_unlock_irqrestore(shost->host_lock, flags); @@ -1748,6 +1748,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) return data; err: + mutex_unlock(&shost->scan_mutex); kfree(data); return NULL; } -- cgit v1.2.3 From 98f9d33c5d0d779e6c96a4a8ac995085b11924f0 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 14 Oct 2020 09:30:48 +0200 Subject: scsi: qla2xxx: Return EBUSY on fcport deletion When the fcport is about to be deleted we should return EBUSY instead of ENODEV. Only for EBUSY will the request be requeued in a multipath setup. Also return EBUSY when the firmware has not yet started to avoid dropping the request. Link: https://lore.kernel.org/r/20201014073048.36219-1-dwagner@suse.de Reviewed-by: Arun Easi Signed-off-by: Daniel Wagner Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_nvme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 1f9005125313..b7a1dc24db38 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -554,10 +554,12 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, fcport = qla_rport->fcport; - if (!qpair || !fcport || (qpair && !qpair->fw_started) || - (fcport && fcport->deleted)) + if (!qpair || !fcport) return -ENODEV; + if (!qpair->fw_started || fcport->deleted) + return -EBUSY; + vha = fcport->vha; if (!(fcport->nvme_flag & NVME_FLAG_REGISTERED)) -- cgit v1.2.3 From 665e0224a3d76f36da40bd9012270fa629aa42ed Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Sat, 24 Oct 2020 19:13:55 -0500 Subject: scsi: ibmvscsi: Fix potential race after loss of transport After a loss of transport due to an adapter migration or crash/disconnect from the host partner there is a tiny window where we can race adjusting the request_limit of the adapter. The request limit is atomically increased/decreased to track the number of inflight requests against the allowed limit of our VIOS partner. After a transport loss we set the request_limit to zero to reflect this state. However, there is a window where the adapter may attempt to queue a command because the transport loss event hasn't been fully processed yet and request_limit is still greater than zero. The hypercall to send the event will fail and the error path will increment the request_limit as a result. If the adapter processes the transport event prior to this increment the request_limit becomes out of sync with the adapter state and can result in SCSI commands being submitted on the now reset connection prior to an SRP Login resulting in a protocol violation. Fix this race by protecting request_limit with the host lock when changing the value via atomic_set() to indicate no transport. Link: https://lore.kernel.org/r/20201025001355.4527-1-tyreld@linux.ibm.com Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvscsi.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index b1f3017b6547..29fcc44be2d5 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) spin_unlock_irqrestore(hostdata->host->host_lock, flags); } +/** + * ibmvscsi_set_request_limit - Set the adapter request_limit in response to + * an adapter failure, reset, or SRP Login. Done under host lock to prevent + * race with SCSI command submission. + * @hostdata: adapter to adjust + * @limit: new request limit + */ +static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit) +{ + unsigned long flags; + + spin_lock_irqsave(hostdata->host->host_lock, flags); + atomic_set(&hostdata->request_limit, limit); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +} + /** * ibmvscsi_reset_host - Reset the connection to the server * @hostdata: struct ibmvscsi_host_data to reset @@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata) { scsi_block_requests(hostdata->host); - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); purge_requests(hostdata, DID_ERROR); hostdata->action = IBMVSCSI_HOST_ACTION_RESET; @@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct) dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n", evt_struct->xfer_iu->srp.login_rej.reason); /* Login failed. */ - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); return; default: dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n", evt_struct->xfer_iu->srp.login_rsp.opcode); /* Login failed. */ - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); return; } @@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct) * This value is set rather than added to request_limit because * request_limit could have been set to -1 by this client. */ - atomic_set(&hostdata->request_limit, + ibmvscsi_set_request_limit(hostdata, be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta)); /* If we had any pending I/Os, kick them */ @@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata) login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); - spin_lock_irqsave(hostdata->host->host_lock, flags); /* Start out with a request limit of 0, since this is negotiated in * the login request we are just sending and login requests always * get sent by the driver regardless of request_limit. */ - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); + spin_lock_irqsave(hostdata->host->host_lock, flags); rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2); spin_unlock_irqrestore(hostdata->host->host_lock, flags); dev_info(hostdata->dev, "sent SRP login\n"); @@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, return; case VIOSRP_CRQ_XPORT_EVENT: /* Hypervisor telling us the connection is closed */ scsi_block_requests(hostdata->host); - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); if (crq->format == 0x06) { /* We need to re-setup the interpartition connection */ dev_info(hostdata->dev, "Re-enabling adapter!\n"); @@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata) } hostdata->action = IBMVSCSI_HOST_ACTION_NONE; + spin_unlock_irqrestore(hostdata->host->host_lock, flags); if (rc) { - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); dev_err(hostdata->dev, "error after %s\n", action); } - spin_unlock_irqrestore(hostdata->host->host_lock, flags); scsi_unblock_requests(hostdata->host); } @@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) init_waitqueue_head(&hostdata->work_wait_q); hostdata->host = host; hostdata->dev = dev; - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT; if (map_persist_bufs(hostdata)) { -- cgit v1.2.3 From fab09aaee80389a37d8ab49396afbb77fa86583a Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 15 Oct 2020 16:27:05 +0800 Subject: scsi: hisi_sas: Stop using queue #0 always for v2 hw In commit 8d98416a55eb ("scsi: hisi_sas: Switch v3 hw to MQ"), the dispatch function was changed to choose the delivery queue based on the request tag HW queue index. This heavily degrades performance for v2 hw, since the HW queues are not exposed there, and, as such, HW queue #0 is used for every command. Revert to previous behaviour for when nr_hw_queues is not set, that being to choose the HW queue based on target device index. Link: https://lore.kernel.org/r/1602750425-240341-1-git-send-email-john.garry@huawei.com Fixes: 8d98416a55eb ("scsi: hisi_sas: Switch v3 hw to MQ") Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 128583dfccf2..c8dd8588f800 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -445,7 +445,7 @@ static int hisi_sas_task_prep(struct sas_task *task, } } - if (scmd) { + if (scmd && hisi_hba->shost->nr_hw_queues) { unsigned int dq_index; u32 blk_tag; -- cgit v1.2.3