summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-01-24 12:24:35 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-01-24 12:24:35 -0800
commita692a610d7ed632cab31b61d6c350db68a10e574 (patch)
tree15f6cc8a130bd51d3856cd40820a6b091805c069
parent51306806426d0ffa4f9b11e65447092ae7d57ee7 (diff)
parent97784481757fba7570121a70dd37ca74a29f50a8 (diff)
downloadlinux-a692a610d7ed632cab31b61d6c350db68a10e574.tar.bz2
Merge tag 'block-5.11-2021-01-24' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - NVMe pull request from Christoph: - fix a status code in nvmet (Chaitanya Kulkarni) - avoid double completions in nvme-rdma/nvme-tcp (Chao Leng) - fix the CMB support to cope with NVMe 1.4 controllers (Klaus Jensen) - fix PRINFO handling in the passthrough ioctl (Revanth Rajashekar) - fix a double DMA unmap in nvme-pci - lightnvm error path leak fix (Pan) - MD pull request from Song: - Flush request fix (Xiao) * tag 'block-5.11-2021-01-24' of git://git.kernel.dk/linux-block: lightnvm: fix memory leak when submit fails nvme-pci: fix error unwind in nvme_map_data nvme-pci: refactor nvme_unmap_data md: Set prev_flush_start and flush_bio in an atomic way nvmet: set right status on error in id-ns handler nvme-pci: allow use of cmb on v1.4 controllers nvme-tcp: avoid request double completion for concurrent nvme_tcp_timeout nvme-rdma: avoid request double completion for concurrent nvme_rdma_timeout nvme: check the PRINFO bit before deciding the host buffer length
-rw-r--r--drivers/lightnvm/core.c3
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/nvme/host/core.c17
-rw-r--r--drivers/nvme/host/pci.c119
-rw-r--r--drivers/nvme/host/rdma.c15
-rw-r--r--drivers/nvme/host/tcp.c14
-rw-r--r--drivers/nvme/target/admin-cmd.c8
-rw-r--r--include/linux/nvme.h6
8 files changed, 132 insertions, 52 deletions
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index c1bcac71008c..28ddcaa5358b 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -844,11 +844,10 @@ static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
rqd.ppa_addr = generic_to_dev_addr(dev, ppa);
ret = nvm_submit_io_sync_raw(dev, &rqd);
+ __free_page(page);
if (ret)
return ret;
- __free_page(page);
-
return rqd.error;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ca409428b4fc..04384452a7ab 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -639,8 +639,10 @@ static void md_submit_flush_data(struct work_struct *ws)
* could wait for this and below md_handle_request could wait for those
* bios because of suspend check
*/
+ spin_lock_irq(&mddev->lock);
mddev->prev_flush_start = mddev->start_flush;
mddev->flush_bio = NULL;
+ spin_unlock_irq(&mddev->lock);
wake_up(&mddev->sb_wait);
if (bio->bi_iter.bi_size == 0) {
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 200bdd672c28..8caf9b34734d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1543,8 +1543,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
}
length = (io.nblocks + 1) << ns->lba_shift;
- meta_len = (io.nblocks + 1) * ns->ms;
- metadata = nvme_to_user_ptr(io.metadata);
+
+ if ((io.control & NVME_RW_PRINFO_PRACT) &&
+ ns->ms == sizeof(struct t10_pi_tuple)) {
+ /*
+ * Protection information is stripped/inserted by the
+ * controller.
+ */
+ if (nvme_to_user_ptr(io.metadata))
+ return -EINVAL;
+ meta_len = 0;
+ metadata = NULL;
+ } else {
+ meta_len = (io.nblocks + 1) * ns->ms;
+ metadata = nvme_to_user_ptr(io.metadata);
+ }
if (ns->features & NVME_NS_EXT_LBAS) {
length += meta_len;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 50d9a20568a2..856aa31931c1 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -23,6 +23,7 @@
#include <linux/t10-pi.h>
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
#include <linux/sed-opal.h>
#include <linux/pci-p2pdma.h>
@@ -542,50 +543,71 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
return true;
}
-static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
+static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
{
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
- dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ dma_addr_t dma_addr = iod->first_dma;
int i;
- if (iod->dma_len) {
- dma_unmap_page(dev->dev, dma_addr, iod->dma_len,
- rq_dma_dir(req));
- return;
+ for (i = 0; i < iod->npages; i++) {
+ __le64 *prp_list = nvme_pci_iod_list(req)[i];
+ dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
+
+ dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
+ dma_addr = next_dma_addr;
}
- WARN_ON_ONCE(!iod->nents);
+}
- if (is_pci_p2pdma_page(sg_page(iod->sg)))
- pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
- rq_dma_dir(req));
- else
- dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
+static void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
+{
+ const int last_sg = SGES_PER_PAGE - 1;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ dma_addr_t dma_addr = iod->first_dma;
+ int i;
+ for (i = 0; i < iod->npages; i++) {
+ struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i];
+ dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr);
- if (iod->npages == 0)
- dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
- dma_addr);
+ dma_pool_free(dev->prp_page_pool, sg_list, dma_addr);
+ dma_addr = next_dma_addr;
+ }
- for (i = 0; i < iod->npages; i++) {
- void *addr = nvme_pci_iod_list(req)[i];
+}
- if (iod->use_sgl) {
- struct nvme_sgl_desc *sg_list = addr;
+static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- next_dma_addr =
- le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr);
- } else {
- __le64 *prp_list = addr;
+ if (is_pci_p2pdma_page(sg_page(iod->sg)))
+ pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
+ rq_dma_dir(req));
+ else
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
+}
- next_dma_addr = le64_to_cpu(prp_list[last_prp]);
- }
+static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- dma_pool_free(dev->prp_page_pool, addr, dma_addr);
- dma_addr = next_dma_addr;
+ if (iod->dma_len) {
+ dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len,
+ rq_dma_dir(req));
+ return;
}
+ WARN_ON_ONCE(!iod->nents);
+
+ nvme_unmap_sg(dev, req);
+ if (iod->npages == 0)
+ dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
+ iod->first_dma);
+ else if (iod->use_sgl)
+ nvme_free_sgls(dev, req);
+ else
+ nvme_free_prps(dev, req);
mempool_free(iod->sg, dev->iod_mempool);
}
@@ -661,7 +683,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
__le64 *old_prp_list = prp_list;
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list)
- return BLK_STS_RESOURCE;
+ goto free_prps;
list[iod->npages++] = prp_list;
prp_list[0] = old_prp_list[i - 1];
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
@@ -681,14 +703,14 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
dma_addr = sg_dma_address(sg);
dma_len = sg_dma_len(sg);
}
-
done:
cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
-
return BLK_STS_OK;
-
- bad_sgl:
+free_prps:
+ nvme_free_prps(dev, req);
+ return BLK_STS_RESOURCE;
+bad_sgl:
WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
"Invalid SGL for payload:%d nents:%d\n",
blk_rq_payload_bytes(req), iod->nents);
@@ -760,7 +782,7 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
if (!sg_list)
- return BLK_STS_RESOURCE;
+ goto free_sgls;
i = 0;
nvme_pci_iod_list(req)[iod->npages++] = sg_list;
@@ -773,6 +795,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
} while (--entries > 0);
return BLK_STS_OK;
+free_sgls:
+ nvme_free_sgls(dev, req);
+ return BLK_STS_RESOURCE;
}
static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
@@ -841,7 +866,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
if (!iod->nents)
- goto out;
+ goto out_free_sg;
if (is_pci_p2pdma_page(sg_page(iod->sg)))
nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg,
@@ -850,16 +875,21 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
rq_dma_dir(req), DMA_ATTR_NO_WARN);
if (!nr_mapped)
- goto out;
+ goto out_free_sg;
iod->use_sgl = nvme_pci_use_sgls(dev, req);
if (iod->use_sgl)
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
else
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
-out:
if (ret != BLK_STS_OK)
- nvme_unmap_data(dev, req);
+ goto out_unmap_sg;
+ return BLK_STS_OK;
+
+out_unmap_sg:
+ nvme_unmap_sg(dev, req);
+out_free_sg:
+ mempool_free(iod->sg, dev->iod_mempool);
return ret;
}
@@ -1795,6 +1825,9 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if (dev->cmb_size)
return;
+ if (NVME_CAP_CMBS(dev->ctrl.cap))
+ writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC);
+
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
if (!dev->cmbsz)
return;
@@ -1809,6 +1842,16 @@ static void nvme_map_cmb(struct nvme_dev *dev)
return;
/*
+ * Tell the controller about the host side address mapping the CMB,
+ * and enable CMB decoding for the NVMe 1.4+ scheme:
+ */
+ if (NVME_CAP_CMBS(dev->ctrl.cap)) {
+ hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE |
+ (pci_bus_address(pdev, bar) + offset),
+ dev->bar + NVME_REG_CMBMSC);
+ }
+
+ /*
* Controllers may support a CMB size larger than their BAR,
* for example, due to being behind a bridge. Reduce the CMB to
* the reported size of the BAR
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index cf6c49d09c82..b7ce4f221d99 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -97,6 +97,7 @@ struct nvme_rdma_queue {
struct completion cm_done;
bool pi_support;
int cq_size;
+ struct mutex queue_lock;
};
struct nvme_rdma_ctrl {
@@ -579,6 +580,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
int ret;
queue = &ctrl->queues[idx];
+ mutex_init(&queue->queue_lock);
queue->ctrl = ctrl;
if (idx && ctrl->ctrl.max_integrity_segments)
queue->pi_support = true;
@@ -598,7 +600,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
if (IS_ERR(queue->cm_id)) {
dev_info(ctrl->ctrl.device,
"failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
- return PTR_ERR(queue->cm_id);
+ ret = PTR_ERR(queue->cm_id);
+ goto out_destroy_mutex;
}
if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
@@ -628,6 +631,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
out_destroy_cm_id:
rdma_destroy_id(queue->cm_id);
nvme_rdma_destroy_queue_ib(queue);
+out_destroy_mutex:
+ mutex_destroy(&queue->queue_lock);
return ret;
}
@@ -639,9 +644,10 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
- if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
- return;
- __nvme_rdma_stop_queue(queue);
+ mutex_lock(&queue->queue_lock);
+ if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
+ __nvme_rdma_stop_queue(queue);
+ mutex_unlock(&queue->queue_lock);
}
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
@@ -651,6 +657,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
+ mutex_destroy(&queue->queue_lock);
}
static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 216619926563..881d28eb15e9 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -76,6 +76,7 @@ struct nvme_tcp_queue {
struct work_struct io_work;
int io_cpu;
+ struct mutex queue_lock;
struct mutex send_mutex;
struct llist_head req_list;
struct list_head send_list;
@@ -1219,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
sock_release(queue->sock);
kfree(queue->pdu);
+ mutex_destroy(&queue->queue_lock);
}
static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
@@ -1380,6 +1382,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
int ret, rcv_pdu_size;
+ mutex_init(&queue->queue_lock);
queue->ctrl = ctrl;
init_llist_head(&queue->req_list);
INIT_LIST_HEAD(&queue->send_list);
@@ -1398,7 +1401,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
if (ret) {
dev_err(nctrl->device,
"failed to create socket: %d\n", ret);
- return ret;
+ goto err_destroy_mutex;
}
/* Single syn retry */
@@ -1507,6 +1510,8 @@ err_crypto:
err_sock:
sock_release(queue->sock);
queue->sock = NULL;
+err_destroy_mutex:
+ mutex_destroy(&queue->queue_lock);
return ret;
}
@@ -1534,9 +1539,10 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
- if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
- return;
- __nvme_tcp_stop_queue(queue);
+ mutex_lock(&queue->queue_lock);
+ if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
+ __nvme_tcp_stop_queue(queue);
+ mutex_unlock(&queue->queue_lock);
}
static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 8d90235e4fcc..dc1ea468b182 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -487,8 +487,10 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
/* return an all zeroed buffer if we can't find an active namespace */
ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid);
- if (!ns)
+ if (!ns) {
+ status = NVME_SC_INVALID_NS;
goto done;
+ }
nvmet_ns_revalidate(ns);
@@ -541,7 +543,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->nsattr |= (1 << 0);
nvmet_put_namespace(ns);
done:
- status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+ if (!status)
+ status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+
kfree(id);
out:
nvmet_req_complete(req, status);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index d92535997687..bfed36e342cc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -116,6 +116,9 @@ enum {
NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer
* Location
*/
+ NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory
+ * Space Control
+ */
NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */
NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */
NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */
@@ -135,6 +138,7 @@ enum {
#define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff)
#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
+#define NVME_CAP_CMBS(cap) (((cap) >> 57) & 0x1)
#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
@@ -192,6 +196,8 @@ enum {
NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2,
NVME_CSTS_SHST_MASK = 3 << 2,
+ NVME_CMBMSC_CRE = 1 << 0,
+ NVME_CMBMSC_CMSE = 1 << 1,
};
struct nvme_id_power_state {