diff options
-rw-r--r-- | block/bio-integrity.c | 3 | ||||
-rw-r--r-- | drivers/ata/sata_rcar.c | 1 | ||||
-rw-r--r-- | drivers/block/brd.c | 6 | ||||
-rw-r--r-- | drivers/lightnvm/core.c | 82 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-cache.c | 8 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-core.c | 65 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-gc.c | 52 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-init.c | 65 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-map.c | 1 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rb.c | 13 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-read.c | 394 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-recovery.c | 74 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-write.c | 1 | ||||
-rw-r--r-- | drivers/lightnvm/pblk.h | 28 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 79 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 14 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 1 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 34 | ||||
-rw-r--r-- | drivers/nvme/host/trace.h | 1 | ||||
-rw-r--r-- | drivers/s390/block/dasd_eckd.c | 2 | ||||
-rw-r--r-- | include/linux/lightnvm.h | 2 | ||||
-rw-r--r-- | include/linux/nvme.h | 4 |
25 files changed, 398 insertions, 542 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 42536674020a..4db620849515 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -43,8 +43,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, unsigned inline_vecs; if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) { - bip = kmalloc(sizeof(struct bio_integrity_payload) + - sizeof(struct bio_vec) * nr_vecs, gfp_mask); + bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask); inline_vecs = nr_vecs; } else { bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask); diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 59b2317acea9..3495e1733a8e 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -909,7 +909,6 @@ static int sata_rcar_probe(struct platform_device *pdev) host = ata_host_alloc(dev, 1); if (!host) { - dev_err(dev, "ata_host_alloc failed\n"); ret = -ENOMEM; goto err_pm_put; } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 17defbf4f332..2da615b45b31 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -153,6 +153,12 @@ static void brd_free_pages(struct brd_device *brd) pos++; /* + * It takes 3.4 seconds to remove 80GiB ramdisk. + * So, we need cond_resched to avoid stalling the CPU. + */ + cond_resched(); + + /* * This assumes radix_tree_gang_lookup always returns as * many pages as possible. If the radix-tree code changes, * so will this have to. diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 5f82036fe322..0df7454832ef 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -45,6 +45,8 @@ struct nvm_dev_map { int num_ch; }; +static void nvm_free(struct kref *ref); + static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) { struct nvm_target *tgt; @@ -325,6 +327,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) struct nvm_target *t; struct nvm_tgt_dev *tgt_dev; void *targetdata; + unsigned int mdts; int ret; switch (create->conf.type) { @@ -412,8 +415,12 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) tdisk->private_data = targetdata; tqueue->queuedata = targetdata; - blk_queue_max_hw_sectors(tqueue, - (dev->geo.csecs >> 9) * NVM_MAX_VLBA); + mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA; + if (dev->geo.mdts) { + mdts = min_t(u32, dev->geo.mdts, + (dev->geo.csecs >> 9) * NVM_MAX_VLBA); + } + blk_queue_max_hw_sectors(tqueue, mdts); set_capacity(tdisk, tt->capacity(targetdata)); add_disk(tdisk); @@ -476,7 +483,6 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful) /** * nvm_remove_tgt - Removes a target from the media manager - * @dev: device * @remove: ioctl structure with target name to remove. * * Returns: @@ -484,18 +490,28 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful) * 1: on not found * <0: on error */ -static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) +static int nvm_remove_tgt(struct nvm_ioctl_remove *remove) { struct nvm_target *t; + struct nvm_dev *dev; - mutex_lock(&dev->mlock); - t = nvm_find_target(dev, remove->tgtname); - if (!t) { + down_read(&nvm_lock); + list_for_each_entry(dev, &nvm_devices, devices) { + mutex_lock(&dev->mlock); + t = nvm_find_target(dev, remove->tgtname); + if (t) { + mutex_unlock(&dev->mlock); + break; + } mutex_unlock(&dev->mlock); - return 1; } + up_read(&nvm_lock); + + if (!t) + return 1; + __nvm_remove_target(t, true); - mutex_unlock(&dev->mlock); + kref_put(&dev->ref, nvm_free); return 0; } @@ -1089,15 +1105,16 @@ err_fmtype: return ret; } -static void nvm_free(struct nvm_dev *dev) +static void nvm_free(struct kref *ref) { - if (!dev) - return; + struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref); if (dev->dma_pool) dev->ops->destroy_dma_pool(dev->dma_pool); - nvm_unregister_map(dev); + if (dev->rmap) + nvm_unregister_map(dev); + kfree(dev->lun_map); kfree(dev); } @@ -1134,7 +1151,13 @@ err: struct nvm_dev *nvm_alloc_dev(int node) { - return kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); + struct nvm_dev *dev; + + dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); + if (dev) + kref_init(&dev->ref); + + return dev; } EXPORT_SYMBOL(nvm_alloc_dev); @@ -1142,12 +1165,16 @@ int nvm_register(struct nvm_dev *dev) { int ret, exp_pool_size; - if (!dev->q || !dev->ops) + if (!dev->q || !dev->ops) { + kref_put(&dev->ref, nvm_free); return -EINVAL; + } ret = nvm_init(dev); - if (ret) + if (ret) { + kref_put(&dev->ref, nvm_free); return ret; + } exp_pool_size = max_t(int, PAGE_SIZE, (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos))); @@ -1157,7 +1184,7 @@ int nvm_register(struct nvm_dev *dev) exp_pool_size); if (!dev->dma_pool) { pr_err("nvm: could not create dma pool\n"); - nvm_free(dev); + kref_put(&dev->ref, nvm_free); return -ENOMEM; } @@ -1179,6 +1206,7 @@ void nvm_unregister(struct nvm_dev *dev) if (t->dev->parent != dev) continue; __nvm_remove_target(t, false); + kref_put(&dev->ref, nvm_free); } mutex_unlock(&dev->mlock); @@ -1186,13 +1214,14 @@ void nvm_unregister(struct nvm_dev *dev) list_del(&dev->devices); up_write(&nvm_lock); - nvm_free(dev); + kref_put(&dev->ref, nvm_free); } EXPORT_SYMBOL(nvm_unregister); static int __nvm_configure_create(struct nvm_ioctl_create *create) { struct nvm_dev *dev; + int ret; down_write(&nvm_lock); dev = nvm_find_nvm_dev(create->dev); @@ -1203,7 +1232,12 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) return -EINVAL; } - return nvm_create_tgt(dev, create); + kref_get(&dev->ref); + ret = nvm_create_tgt(dev, create); + if (ret) + kref_put(&dev->ref, nvm_free); + + return ret; } static long nvm_ioctl_info(struct file *file, void __user *arg) @@ -1322,8 +1356,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) { struct nvm_ioctl_remove remove; - struct nvm_dev *dev; - int ret = 0; if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) return -EFAULT; @@ -1335,13 +1367,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) return -EINVAL; } - list_for_each_entry(dev, &nvm_devices, devices) { - ret = nvm_remove_tgt(dev, &remove); - if (!ret) - break; - } - - return ret; + return nvm_remove_tgt(&remove); } /* kept for compatibility reasons */ diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index c9fa26f95659..5c1034c22197 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -18,7 +18,8 @@ #include "pblk.h" -int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) +void pblk_write_to_cache(struct pblk *pblk, struct bio *bio, + unsigned long flags) { struct request_queue *q = pblk->dev->q; struct pblk_w_ctx w_ctx; @@ -43,6 +44,7 @@ retry: goto retry; case NVM_IO_ERR: pblk_pipeline_stop(pblk); + bio_io_error(bio); goto out; } @@ -79,7 +81,9 @@ retry: out: generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time); pblk_write_should_kick(pblk); - return ret; + + if (ret == NVM_IO_DONE) + bio_endio(bio); } /* diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 6ca868868fee..773537804319 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -562,11 +562,9 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd) { - struct ppa_addr *ppa_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); int ret; - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; - pblk_down_chunk(pblk, ppa_list[0]); ret = pblk_submit_io_sync(pblk, rqd); pblk_up_chunk(pblk, ppa_list[0]); @@ -725,6 +723,7 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_meta *lm = &pblk->lm; struct bio *bio; + struct ppa_addr *ppa_list; struct nvm_rq rqd; u64 paddr = pblk_line_smeta_start(pblk, line); int i, ret; @@ -748,9 +747,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) rqd.opcode = NVM_OP_PREAD; rqd.nr_ppas = lm->smeta_sec; rqd.is_seq = 1; + ppa_list = nvm_rq_to_ppa_list(&rqd); for (i = 0; i < lm->smeta_sec; i++, paddr++) - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { @@ -761,8 +761,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) atomic_dec(&pblk->inflight_io); - if (rqd.error) + if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) { pblk_log_read_err(pblk, &rqd); + ret = -EIO; + } clear_rqd: pblk_free_rqd_meta(pblk, &rqd); @@ -775,6 +777,7 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_meta *lm = &pblk->lm; struct bio *bio; + struct ppa_addr *ppa_list; struct nvm_rq rqd; __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); @@ -799,12 +802,13 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, rqd.opcode = NVM_OP_PWRITE; rqd.nr_ppas = lm->smeta_sec; rqd.is_seq = 1; + ppa_list = nvm_rq_to_ppa_list(&rqd); for (i = 0; i < lm->smeta_sec; i++, paddr++) { struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd.meta_list, i); - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); meta->lba = lba_list[paddr] = addr_empty; } @@ -834,8 +838,9 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, struct nvm_geo *geo = &dev->geo; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - void *ppa_list, *meta_list; + void *ppa_list_buf, *meta_list; struct bio *bio; + struct ppa_addr *ppa_list; struct nvm_rq rqd; u64 paddr = line->emeta_ssec; dma_addr_t dma_ppa_list, dma_meta_list; @@ -851,7 +856,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, if (!meta_list) return -ENOMEM; - ppa_list = meta_list + pblk_dma_meta_size(pblk); + ppa_list_buf = meta_list + pblk_dma_meta_size(pblk); dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); next_rq: @@ -872,11 +877,12 @@ next_rq: rqd.bio = bio; rqd.meta_list = meta_list; - rqd.ppa_list = ppa_list; + rqd.ppa_list = ppa_list_buf; rqd.dma_meta_list = dma_meta_list; rqd.dma_ppa_list = dma_ppa_list; rqd.opcode = NVM_OP_PREAD; rqd.nr_ppas = rq_ppas; + ppa_list = nvm_rq_to_ppa_list(&rqd); for (i = 0; i < rqd.nr_ppas; ) { struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); @@ -904,7 +910,7 @@ next_rq: } for (j = 0; j < min; j++, i++, paddr++) - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); } ret = pblk_submit_io_sync(pblk, &rqd); @@ -916,8 +922,11 @@ next_rq: atomic_dec(&pblk->inflight_io); - if (rqd.error) + if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) { pblk_log_read_err(pblk, &rqd); + ret = -EIO; + goto free_rqd_dma; + } emeta_buf += rq_len; left_ppas -= rq_ppas; @@ -1162,7 +1171,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, off = bit * geo->ws_opt; bitmap_set(line->map_bitmap, off, lm->smeta_sec); line->sec_in_line -= lm->smeta_sec; - line->smeta_ssec = off; line->cur_sec = off + lm->smeta_sec; if (init && pblk_line_smeta_write(pblk, line, off)) { @@ -1521,11 +1529,9 @@ void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa) void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) { - struct ppa_addr *ppa_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); int i; - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; - for (i = 0; i < rqd->nr_ppas; i++) pblk_ppa_to_line_put(pblk, ppa_list[i]); } @@ -1699,6 +1705,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); + if (line->w_err_gc->has_gc_err) { + spin_unlock(&line->lock); + pblk_err(pblk, "line %d had errors during GC\n", line->id); + pblk_put_line_back(pblk, line); + line->w_err_gc->has_gc_err = 0; + return; + } + line->state = PBLK_LINESTATE_FREE; trace_pblk_line_state(pblk_disk_name(pblk), line->id, line->state); @@ -2023,7 +2037,7 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) struct ppa_addr ppa_l2p; /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->rl.nr_secs)) { + if (!(lba < pblk->capacity)) { WARN(1, "pblk: corrupted L2P map request\n"); return; } @@ -2063,7 +2077,7 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new, #endif /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->rl.nr_secs)) { + if (!(lba < pblk->capacity)) { WARN(1, "pblk: corrupted L2P map request\n"); return 0; } @@ -2109,7 +2123,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba, } /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->rl.nr_secs)) { + if (!(lba < pblk->capacity)) { WARN(1, "pblk: corrupted L2P map request\n"); return; } @@ -2135,8 +2149,8 @@ out: spin_unlock(&pblk->trans_lock); } -void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, - sector_t blba, int nr_secs) +int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, + sector_t blba, int nr_secs, bool *from_cache) { int i; @@ -2150,10 +2164,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); + if (i > 0 && *from_cache) + break; + *from_cache = false; + kref_get(&line->ref); + } else { + if (i > 0 && !*from_cache) + break; + *from_cache = true; } } spin_unlock(&pblk->trans_lock); + return i; } void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, @@ -2167,7 +2190,7 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, lba = lba_list[i]; if (lba != ADDR_EMPTY) { /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->rl.nr_secs)) { + if (!(lba < pblk->capacity)) { WARN(1, "pblk: corrupted L2P map request\n"); continue; } diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 26a52ea7ec45..63ee205b41c4 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -59,24 +59,28 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc) wake_up_process(gc->gc_writer_ts); } -static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) +void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct list_head *move_list; + spin_lock(&l_mg->gc_lock); spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_CLOSED; trace_pblk_line_state(pblk_disk_name(pblk), line->id, line->state); + + /* We need to reset gc_group in order to ensure that + * pblk_line_gc_list will return proper move_list + * since right now current line is not on any of the + * gc lists. + */ + line->gc_group = PBLK_LINEGC_NONE; move_list = pblk_line_gc_list(pblk, line); spin_unlock(&line->lock); - - if (move_list) { - spin_lock(&l_mg->gc_lock); - list_add_tail(&line->list, move_list); - spin_unlock(&l_mg->gc_lock); - } + list_add_tail(&line->list, move_list); + spin_unlock(&l_mg->gc_lock); } static void pblk_gc_line_ws(struct work_struct *work) @@ -84,8 +88,6 @@ static void pblk_gc_line_ws(struct work_struct *work) struct pblk_line_ws *gc_rq_ws = container_of(work, struct pblk_line_ws, ws); struct pblk *pblk = gc_rq_ws->pblk; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_gc *gc = &pblk->gc; struct pblk_line *line = gc_rq_ws->line; struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; @@ -93,18 +95,10 @@ static void pblk_gc_line_ws(struct work_struct *work) up(&gc->gc_sem); - gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs)); - if (!gc_rq->data) { - pblk_err(pblk, "could not GC line:%d (%d/%d)\n", - line->id, *line->vsc, gc_rq->nr_secs); - goto out; - } - /* Read from GC victim block */ ret = pblk_submit_read_gc(pblk, gc_rq); if (ret) { - pblk_err(pblk, "failed GC read in line:%d (err:%d)\n", - line->id, ret); + line->w_err_gc->has_gc_err = 1; goto out; } @@ -189,6 +183,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) struct pblk_line *line = line_ws->line; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; struct pblk_gc *gc = &pblk->gc; struct pblk_line_ws *gc_rq_ws; struct pblk_gc_rq *gc_rq; @@ -247,9 +243,13 @@ next_rq: gc_rq->nr_secs = nr_secs; gc_rq->line = line; + gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs)); + if (!gc_rq->data) + goto fail_free_gc_rq; + gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); if (!gc_rq_ws) - goto fail_free_gc_rq; + goto fail_free_gc_data; gc_rq_ws->pblk = pblk; gc_rq_ws->line = line; @@ -281,6 +281,8 @@ out: return; +fail_free_gc_data: + vfree(gc_rq->data); fail_free_gc_rq: kfree(gc_rq); fail_free_lba_list: @@ -290,8 +292,11 @@ fail_free_invalid_bitmap: fail_free_ws: kfree(line_ws); + /* Line goes back to closed state, so we cannot release additional + * reference for line, since we do that only when we want to do + * gc to free line state transition. + */ pblk_put_line_back(pblk, line); - kref_put(&line->ref, pblk_line_put); atomic_dec(&gc->read_inflight_gc); pblk_err(pblk, "failed to GC line %d\n", line->id); @@ -355,8 +360,13 @@ static int pblk_gc_read(struct pblk *pblk) pblk_gc_kick(pblk); - if (pblk_gc_line(pblk, line)) + if (pblk_gc_line(pblk, line)) { pblk_err(pblk, "failed to GC line %d\n", line->id); + /* rollback */ + spin_lock(&gc->r_lock); + list_add_tail(&line->list, &gc->r_list); + spin_unlock(&gc->r_lock); + } return 0; } diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 8b643d0bffae..b351c7f002de 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -47,33 +47,6 @@ static struct pblk_global_caches pblk_caches = { struct bio_set pblk_bio_set; -static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, - struct bio *bio) -{ - int ret; - - /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap - * constraint. Writes can be of arbitrary size. - */ - if (bio_data_dir(bio) == READ) { - blk_queue_split(q, &bio); - ret = pblk_submit_read(pblk, bio); - if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) - bio_put(bio); - - return ret; - } - - /* Prevent deadlock in the case of a modest LUN configuration and large - * user I/Os. Unless stalled, the rate limiter leaves at least 256KB - * available for user I/O. - */ - if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) - blk_queue_split(q, &bio); - - return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); -} - static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) { struct pblk *pblk = q->queuedata; @@ -86,13 +59,21 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) } } - switch (pblk_rw_io(q, pblk, bio)) { - case NVM_IO_ERR: - bio_io_error(bio); - break; - case NVM_IO_DONE: - bio_endio(bio); - break; + /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap + * constraint. Writes can be of arbitrary size. + */ + if (bio_data_dir(bio) == READ) { + blk_queue_split(q, &bio); + pblk_submit_read(pblk, bio); + } else { + /* Prevent deadlock in the case of a modest LUN configuration + * and large user I/Os. Unless stalled, the rate limiter + * leaves at least 256KB available for user I/O. + */ + if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) + blk_queue_split(q, &bio); + + pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); } return BLK_QC_T_NONE; @@ -105,7 +86,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk) if (pblk->addrf_len < 32) entry_size = 4; - return entry_size * pblk->rl.nr_secs; + return entry_size * pblk->capacity; } #ifdef CONFIG_NVM_PBLK_DEBUG @@ -164,13 +145,18 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init) int ret = 0; map_size = pblk_trans_map_size(pblk); - pblk->trans_map = vmalloc(map_size); - if (!pblk->trans_map) + pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN + | __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM, + PAGE_KERNEL); + if (!pblk->trans_map) { + pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n", + map_size); return -ENOMEM; + } pblk_ppa_set_empty(&ppa); - for (i = 0; i < pblk->rl.nr_secs; i++) + for (i = 0; i < pblk->capacity; i++) pblk_trans_map_set(pblk, i, ppa); ret = pblk_l2p_recover(pblk, factory_init); @@ -701,7 +687,6 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) * on user capacity consider only provisioned blocks */ pblk->rl.total_blocks = nr_free_chks; - pblk->rl.nr_secs = nr_free_chks * geo->clba; /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; @@ -1284,7 +1269,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n", geo->all_luns, pblk->l_mg.nr_lines, - (unsigned long long)pblk->rl.nr_secs, + (unsigned long long)pblk->capacity, pblk->rwb.nr_entries); wake_up_process(pblk->writer_ts); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 7fbc99b60cac..5408e32b2f13 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -162,6 +162,7 @@ int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, *erase_ppa = ppa_list[i]; erase_ppa->a.blk = e_line->id; + erase_ppa->a.reserved = 0; spin_unlock(&e_line->lock); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 03c241b340ea..5abb1705b039 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -642,7 +642,7 @@ try: * be directed to disk. */ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, - struct ppa_addr ppa, int bio_iter, bool advanced_bio) + struct ppa_addr ppa) { struct pblk *pblk = container_of(rb, struct pblk, rwb); struct pblk_rb_entry *entry; @@ -673,15 +673,6 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, ret = 0; goto out; } - - /* Only advance the bio if it hasn't been advanced already. If advanced, - * this bio is at least a partial bio (i.e., it has partially been - * filled with data from the cache). If part of the data resides on the - * media, we will read later on - */ - if (unlikely(!advanced_bio)) - bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE); - data = bio_data(bio); memcpy(data, entry->data, rb->seg_size); @@ -799,8 +790,8 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb) } out: - spin_unlock(&rb->w_lock); spin_unlock_irq(&rb->s_lock); + spin_unlock(&rb->w_lock); return ret; } diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 0b7d5fb4548d..d98ea392fe33 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -26,8 +26,7 @@ * issued. */ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, - sector_t lba, struct ppa_addr ppa, - int bio_iter, bool advanced_bio) + sector_t lba, struct ppa_addr ppa) { #ifdef CONFIG_NVM_PBLK_DEBUG /* Callers must ensure that the ppa points to a cache address */ @@ -35,73 +34,75 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, BUG_ON(!pblk_addr_in_cache(ppa)); #endif - return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, - bio_iter, advanced_bio); + return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa); } -static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, +static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, sector_t blba, - unsigned long *read_bitmap) + bool *from_cache) { void *meta_list = rqd->meta_list; - struct ppa_addr ppas[NVM_MAX_VLBA]; - int nr_secs = rqd->nr_ppas; - bool advanced_bio = false; - int i, j = 0; + int nr_secs, i; - pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); +retry: + nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas, + from_cache); + + if (!*from_cache) + goto end; for (i = 0; i < nr_secs; i++) { - struct ppa_addr p = ppas[i]; struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); sector_t lba = blba + i; -retry: - if (pblk_ppa_empty(p)) { + if (pblk_ppa_empty(rqd->ppa_list[i])) { __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - WARN_ON(test_and_set_bit(i, read_bitmap)); meta->lba = addr_empty; - - if (unlikely(!advanced_bio)) { - bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); - advanced_bio = true; + } else if (pblk_addr_in_cache(rqd->ppa_list[i])) { + /* + * Try to read from write buffer. The address is later + * checked on the write buffer to prevent retrieving + * overwritten data. + */ + if (!pblk_read_from_cache(pblk, bio, lba, + rqd->ppa_list[i])) { + if (i == 0) { + /* + * We didn't call with bio_advance() + * yet, so we can just retry. + */ + goto retry; + } else { + /* + * We already call bio_advance() + * so we cannot retry and we need + * to quit that function in order + * to allow caller to handle the bio + * splitting in the current sector + * position. + */ + nr_secs = i; + goto end; + } } - - goto next; - } - - /* Try to read from write buffer. The address is later checked - * on the write buffer to prevent retrieving overwritten data. - */ - if (pblk_addr_in_cache(p)) { - if (!pblk_read_from_cache(pblk, bio, lba, p, i, - advanced_bio)) { - pblk_lookup_l2p_seq(pblk, &p, lba, 1); - goto retry; - } - WARN_ON(test_and_set_bit(i, read_bitmap)); meta->lba = cpu_to_le64(lba); - advanced_bio = true; #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_inc(&pblk->cache_reads); #endif - } else { - /* Read from media non-cached sectors */ - rqd->ppa_list[j++] = p; } - -next: - if (advanced_bio) - bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); + bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); } +end: if (pblk_io_aligned(pblk, nr_secs)) rqd->is_seq = 1; #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_add(nr_secs, &pblk->inflight_reads); #endif + + return nr_secs; } @@ -175,12 +176,12 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); } -static void pblk_end_user_read(struct bio *bio) +static void pblk_end_user_read(struct bio *bio, int error) { -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); -#endif - bio_endio(bio); + if (error && error != NVM_RSP_WARN_HIGHECC) + bio_io_error(bio); + else + bio_endio(bio); } static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, @@ -197,9 +198,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, pblk_log_read_err(pblk, rqd); pblk_read_check_seq(pblk, rqd, r_ctx->lba); - - if (int_bio) - bio_put(int_bio); + bio_put(int_bio); if (put_line) pblk_rq_to_line_put(pblk, rqd); @@ -219,188 +218,17 @@ static void pblk_end_io_read(struct nvm_rq *rqd) struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct bio *bio = (struct bio *)r_ctx->private; - pblk_end_user_read(bio); + pblk_end_user_read(bio, rqd->error); __pblk_end_io_read(pblk, rqd, true); } -static void pblk_end_partial_read(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); - struct pblk_pr_ctx *pr_ctx = r_ctx->private; - struct pblk_sec_meta *meta; - struct bio *new_bio = rqd->bio; - struct bio *bio = pr_ctx->orig_bio; - void *meta_list = rqd->meta_list; - unsigned long *read_bitmap = pr_ctx->bitmap; - struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT; - struct bvec_iter new_iter = BVEC_ITER_ALL_INIT; - int nr_secs = pr_ctx->orig_nr_secs; - int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); - void *src_p, *dst_p; - int bit, i; - - if (unlikely(nr_holes == 1)) { - struct ppa_addr ppa; - - ppa = rqd->ppa_addr; - rqd->ppa_list = pr_ctx->ppa_ptr; - rqd->dma_ppa_list = pr_ctx->dma_ppa_list; - rqd->ppa_list[0] = ppa; - } - - for (i = 0; i < nr_secs; i++) { - meta = pblk_get_meta(pblk, meta_list, i); - pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba); - meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]); - } - - /* Fill the holes in the original bio */ - i = 0; - for (bit = 0; bit < nr_secs; bit++) { - if (!test_bit(bit, read_bitmap)) { - struct bio_vec dst_bv, src_bv; - struct pblk_line *line; - - line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); - kref_put(&line->ref, pblk_line_put); - - meta = pblk_get_meta(pblk, meta_list, bit); - meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); - - dst_bv = bio_iter_iovec(bio, orig_iter); - src_bv = bio_iter_iovec(new_bio, new_iter); - - src_p = kmap_atomic(src_bv.bv_page); - dst_p = kmap_atomic(dst_bv.bv_page); - - memcpy(dst_p + dst_bv.bv_offset, - src_p + src_bv.bv_offset, - PBLK_EXPOSED_PAGE_SIZE); - - kunmap_atomic(src_p); - kunmap_atomic(dst_p); - - flush_dcache_page(dst_bv.bv_page); - mempool_free(src_bv.bv_page, &pblk->page_bio_pool); - - bio_advance_iter(new_bio, &new_iter, - PBLK_EXPOSED_PAGE_SIZE); - i++; - } - bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE); - } - - bio_put(new_bio); - kfree(pr_ctx); - - /* restore original request */ - rqd->bio = NULL; - rqd->nr_ppas = nr_secs; - - bio_endio(bio); - __pblk_end_io_read(pblk, rqd, false); -} - -static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int bio_init_idx, - unsigned long *read_bitmap, - int nr_holes) -{ - void *meta_list = rqd->meta_list; - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); - struct pblk_pr_ctx *pr_ctx; - struct bio *new_bio, *bio = r_ctx->private; - int nr_secs = rqd->nr_ppas; - int i; - - new_bio = bio_alloc(GFP_KERNEL, nr_holes); - - if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) - goto fail_bio_put; - - if (nr_holes != new_bio->bi_vcnt) { - WARN_ONCE(1, "pblk: malformed bio\n"); - goto fail_free_pages; - } - - pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL); - if (!pr_ctx) - goto fail_free_pages; - - for (i = 0; i < nr_secs; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - - pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba); - } - - new_bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(new_bio, REQ_OP_READ, 0); - - rqd->bio = new_bio; - rqd->nr_ppas = nr_holes; - - pr_ctx->orig_bio = bio; - bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA); - pr_ctx->bio_init_idx = bio_init_idx; - pr_ctx->orig_nr_secs = nr_secs; - r_ctx->private = pr_ctx; - - if (unlikely(nr_holes == 1)) { - pr_ctx->ppa_ptr = rqd->ppa_list; - pr_ctx->dma_ppa_list = rqd->dma_ppa_list; - rqd->ppa_addr = rqd->ppa_list[0]; - } - return 0; - -fail_free_pages: - pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt); -fail_bio_put: - bio_put(new_bio); - - return -ENOMEM; -} - -static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int bio_init_idx, - unsigned long *read_bitmap, int nr_secs) -{ - int nr_holes; - int ret; - - nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); - - if (pblk_setup_partial_read(pblk, rqd, bio_init_idx, read_bitmap, - nr_holes)) - return NVM_IO_ERR; - - rqd->end_io = pblk_end_partial_read; - - ret = pblk_submit_io(pblk, rqd); - if (ret) { - bio_put(rqd->bio); - pblk_err(pblk, "partial read IO submission failed\n"); - goto err; - } - - return NVM_IO_OK; - -err: - pblk_err(pblk, "failed to perform partial read\n"); - - /* Free allocated pages in new bio */ - pblk_bio_free_pages(pblk, rqd->bio, 0, rqd->bio->bi_vcnt); - __pblk_end_io_read(pblk, rqd, false); - return NVM_IO_ERR; -} - static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, - sector_t lba, unsigned long *read_bitmap) + sector_t lba, bool *from_cache) { struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0); struct ppa_addr ppa; - pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); + pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache); #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_inc(&pblk->inflight_reads); @@ -410,7 +238,6 @@ retry: if (pblk_ppa_empty(ppa)) { __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - WARN_ON(test_and_set_bit(0, read_bitmap)); meta->lba = addr_empty; return; } @@ -419,12 +246,11 @@ retry: * write buffer to prevent retrieving overwritten data. */ if (pblk_addr_in_cache(ppa)) { - if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0, 1)) { - pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); + if (!pblk_read_from_cache(pblk, bio, lba, ppa)) { + pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache); goto retry; } - WARN_ON(test_and_set_bit(0, read_bitmap)); meta->lba = cpu_to_le64(lba); #ifdef CONFIG_NVM_PBLK_DEBUG @@ -435,95 +261,92 @@ retry: } } -int pblk_submit_read(struct pblk *pblk, struct bio *bio) +void pblk_submit_read(struct pblk *pblk, struct bio *bio) { struct nvm_tgt_dev *dev = pblk->dev; struct request_queue *q = dev->q; sector_t blba = pblk_get_lba(bio); unsigned int nr_secs = pblk_get_secs(bio); + bool from_cache; struct pblk_g_ctx *r_ctx; struct nvm_rq *rqd; - unsigned int bio_init_idx; - DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA); - int ret = NVM_IO_ERR; + struct bio *int_bio, *split_bio; generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio), &pblk->disk->part0); - bitmap_zero(read_bitmap, nr_secs); - rqd = pblk_alloc_rqd(pblk, PBLK_READ); rqd->opcode = NVM_OP_PREAD; rqd->nr_ppas = nr_secs; - rqd->bio = NULL; /* cloned bio if needed */ rqd->private = pblk; rqd->end_io = pblk_end_io_read; r_ctx = nvm_rq_to_pdu(rqd); r_ctx->start_time = jiffies; r_ctx->lba = blba; - r_ctx->private = bio; /* original bio */ - /* Save the index for this bio's start. This is needed in case - * we need to fill a partial read. - */ - bio_init_idx = pblk_get_bi_idx(bio); + if (pblk_alloc_rqd_meta(pblk, rqd)) { + bio_io_error(bio); + pblk_free_rqd(pblk, rqd, PBLK_READ); + return; + } - if (pblk_alloc_rqd_meta(pblk, rqd)) - goto fail_rqd_free; + /* Clone read bio to deal internally with: + * -read errors when reading from drive + * -bio_advance() calls during cache reads + */ + int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); if (nr_secs > 1) - pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap); + nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba, + &from_cache); else - pblk_read_rq(pblk, rqd, bio, blba, read_bitmap); + pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache); - if (bitmap_full(read_bitmap, nr_secs)) { +split_retry: + r_ctx->private = bio; /* original bio */ + rqd->bio = int_bio; /* internal bio */ + + if (from_cache && nr_secs == rqd->nr_ppas) { + /* All data was read from cache, we can complete the IO. */ + pblk_end_user_read(bio, 0); atomic_inc(&pblk->inflight_io); __pblk_end_io_read(pblk, rqd, false); - return NVM_IO_DONE; - } - - /* All sectors are to be read from the device */ - if (bitmap_empty(read_bitmap, rqd->nr_ppas)) { - struct bio *int_bio = NULL; + } else if (nr_secs != rqd->nr_ppas) { + /* The read bio request could be partially filled by the write + * buffer, but there are some holes that need to be read from + * the drive. In order to handle this, we will use block layer + * mechanism to split this request in to smaller ones and make + * a chain of it. + */ + split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL, + &pblk_bio_set); + bio_chain(split_bio, bio); + generic_make_request(bio); + + /* New bio contains first N sectors of the previous one, so + * we can continue to use existing rqd, but we need to shrink + * the number of PPAs in it. New bio is also guaranteed that + * it contains only either data from cache or from drive, newer + * mix of them. + */ + bio = split_bio; + rqd->nr_ppas = nr_secs; + if (rqd->nr_ppas == 1) + rqd->ppa_addr = rqd->ppa_list[0]; - /* Clone read bio to deal with read errors internally */ + /* Recreate int_bio - existing might have some needed internal + * fields modified already. + */ + bio_put(int_bio); int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); - if (!int_bio) { - pblk_err(pblk, "could not clone read bio\n"); - goto fail_end_io; - } - - rqd->bio = int_bio; - - if (pblk_submit_io(pblk, rqd)) { - pblk_err(pblk, "read IO submission failed\n"); - ret = NVM_IO_ERR; - goto fail_end_io; - } - - return NVM_IO_OK; + goto split_retry; + } else if (pblk_submit_io(pblk, rqd)) { + /* Submitting IO to drive failed, let's report an error */ + rqd->error = -ENODEV; + pblk_end_io_read(rqd); } - - /* The read bio request could be partially filled by the write buffer, - * but there are some holes that need to be read from the drive. - */ - ret = pblk_partial_read_bio(pblk, rqd, bio_init_idx, read_bitmap, - nr_secs); - if (ret) - goto fail_meta_free; - - return NVM_IO_OK; - -fail_meta_free: - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); -fail_rqd_free: - pblk_free_rqd(pblk, rqd, PBLK_READ); - return ret; -fail_end_io: - __pblk_end_io_read(pblk, rqd, false); - return ret; } static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, @@ -568,7 +391,7 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, goto out; /* logic error: lba out-of-bounds */ - if (lba >= pblk->rl.nr_secs) { + if (lba >= pblk->capacity) { WARN(1, "pblk: read lba out of bounds\n"); goto out; } @@ -642,7 +465,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) if (pblk_submit_io_sync(pblk, &rqd)) { ret = -EIO; - pblk_err(pblk, "GC read request failed\n"); goto err_free_bio; } diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index d86f580036d3..e6dda04de144 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -93,10 +93,24 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, u64 written_secs) { + struct pblk_line_mgmt *l_mg = &pblk->l_mg; int i; for (i = 0; i < written_secs; i += pblk->min_write_pgs) - pblk_alloc_page(pblk, line, pblk->min_write_pgs); + __pblk_alloc_page(pblk, line, pblk->min_write_pgs); + + spin_lock(&l_mg->free_lock); + if (written_secs > line->left_msecs) { + /* + * We have all data sectors written + * and some emeta sectors written too. + */ + line->left_msecs = 0; + } else { + /* We have only some data sectors written. */ + line->left_msecs -= written_secs; + } + spin_unlock(&l_mg->free_lock); } static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) @@ -165,6 +179,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, struct pblk_pad_rq *pad_rq; struct nvm_rq *rqd; struct bio *bio; + struct ppa_addr *ppa_list; void *data; __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); u64 w_ptr = line->cur_sec; @@ -194,7 +209,7 @@ next_pad_rq: rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); if (rq_ppas < pblk->min_write_pgs) { pblk_err(pblk, "corrupted pad line %d\n", line->id); - goto fail_free_pad; + goto fail_complete; } rq_len = rq_ppas * geo->csecs; @@ -203,7 +218,7 @@ next_pad_rq: PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto fail_free_pad; + goto fail_complete; } bio->bi_iter.bi_sector = 0; /* internal bio */ @@ -212,8 +227,11 @@ next_pad_rq: rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); ret = pblk_alloc_rqd_meta(pblk, rqd); - if (ret) - goto fail_free_rqd; + if (ret) { + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); + bio_put(bio); + goto fail_complete; + } rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; @@ -222,6 +240,7 @@ next_pad_rq: rqd->end_io = pblk_end_io_recov; rqd->private = pad_rq; + ppa_list = nvm_rq_to_ppa_list(rqd); meta_list = rqd->meta_list; for (i = 0; i < rqd->nr_ppas; ) { @@ -249,18 +268,21 @@ next_pad_rq: lba_list[w_ptr] = addr_empty; meta = pblk_get_meta(pblk, meta_list, i); meta->lba = addr_empty; - rqd->ppa_list[i] = dev_ppa; + ppa_list[i] = dev_ppa; } } kref_get(&pad_rq->ref); - pblk_down_chunk(pblk, rqd->ppa_list[0]); + pblk_down_chunk(pblk, ppa_list[0]); ret = pblk_submit_io(pblk, rqd); if (ret) { pblk_err(pblk, "I/O submission failed: %d\n", ret); - pblk_up_chunk(pblk, rqd->ppa_list[0]); - goto fail_free_rqd; + pblk_up_chunk(pblk, ppa_list[0]); + kref_put(&pad_rq->ref, pblk_recov_complete); + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); + bio_put(bio); + goto fail_complete; } left_line_ppas -= rq_ppas; @@ -268,13 +290,9 @@ next_pad_rq: if (left_ppas && left_line_ppas) goto next_pad_rq; +fail_complete: kref_put(&pad_rq->ref, pblk_recov_complete); - - if (!wait_for_completion_io_timeout(&pad_rq->wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pblk_err(pblk, "pad write timed out\n"); - ret = -ETIME; - } + wait_for_completion(&pad_rq->wait); if (!pblk_line_is_full(line)) pblk_err(pblk, "corrupted padded line: %d\n", line->id); @@ -283,14 +301,6 @@ next_pad_rq: free_rq: kfree(pad_rq); return ret; - -fail_free_rqd: - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - bio_put(bio); -fail_free_pad: - kfree(pad_rq); - vfree(data); - return ret; } static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) @@ -412,6 +422,7 @@ retry_rq: rqd->ppa_list = ppa_list; rqd->dma_ppa_list = dma_ppa_list; rqd->dma_meta_list = dma_meta_list; + ppa_list = nvm_rq_to_ppa_list(rqd); if (pblk_io_aligned(pblk, rq_ppas)) rqd->is_seq = 1; @@ -430,7 +441,7 @@ retry_rq: } for (j = 0; j < pblk->min_write_pgs; j++, i++) - rqd->ppa_list[i] = + ppa_list[i] = addr_to_gen_ppa(pblk, paddr + j, line->id); } @@ -444,7 +455,7 @@ retry_rq: atomic_dec(&pblk->inflight_io); /* If a read fails, do a best effort by padding the line and retrying */ - if (rqd->error) { + if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) { int pad_distance, ret; if (padded) { @@ -474,11 +485,11 @@ retry_rq: lba_list[paddr++] = cpu_to_le64(lba); - if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) + if (lba == ADDR_EMPTY || lba >= pblk->capacity) continue; line->nr_valid_lbas++; - pblk_update_map(pblk, lba, rqd->ppa_list[i]); + pblk_update_map(pblk, lba, ppa_list[i]); } left_ppas -= rq_ppas; @@ -647,10 +658,12 @@ static int pblk_line_was_written(struct pblk_line *line, bppa = pblk->luns[smeta_blk].bppa; chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)]; - if (chunk->state & NVM_CHK_ST_FREE) - return 0; + if (chunk->state & NVM_CHK_ST_CLOSED || + (chunk->state & NVM_CHK_ST_OPEN + && chunk->wp >= lm->smeta_sec)) + return 1; - return 1; + return 0; } static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) @@ -844,6 +857,7 @@ next: spin_unlock(&l_mg->free_lock); } else { spin_lock(&l_mg->free_lock); + l_mg->data_line = data_line; /* Allocate next line for preparation */ l_mg->data_next = pblk_line_get(pblk); if (l_mg->data_next) { diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 6593deab52da..4e63f9b5954c 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -228,6 +228,7 @@ static void pblk_submit_rec(struct work_struct *work) mempool_free(recovery, &pblk->rec_pool); atomic_dec(&pblk->inflight_io); + pblk_write_kick(pblk); } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index ac3ab778e976..a67855387f53 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -43,8 +43,6 @@ #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) -#define PBLK_COMMAND_TIMEOUT_MS 30000 - /* Max 512 LUNs per device */ #define PBLK_MAX_LUNS_BITMAP (4) @@ -123,18 +121,6 @@ struct pblk_g_ctx { u64 lba; }; -/* partial read context */ -struct pblk_pr_ctx { - struct bio *orig_bio; - DECLARE_BITMAP(bitmap, NVM_MAX_VLBA); - unsigned int orig_nr_secs; - unsigned int bio_init_idx; - void *ppa_ptr; - dma_addr_t dma_ppa_list; - u64 lba_list_mem[NVM_MAX_VLBA]; - u64 lba_list_media[NVM_MAX_VLBA]; -}; - /* Pad context */ struct pblk_pad_rq { struct pblk *pblk; @@ -305,7 +291,6 @@ struct pblk_rl { struct timer_list u_timer; - unsigned long long nr_secs; unsigned long total_blocks; atomic_t free_blocks; /* Total number of free blocks (+ OP) */ @@ -440,6 +425,7 @@ struct pblk_smeta { struct pblk_w_err_gc { int has_write_err; + int has_gc_err; __le64 *lba_list; }; @@ -465,7 +451,6 @@ struct pblk_line { int meta_line; /* Metadata line id */ int meta_distance; /* Distance between data and metadata */ - u64 smeta_ssec; /* Sector where smeta starts */ u64 emeta_ssec; /* Sector where emeta starts */ unsigned int sec_in_line; /* Number of usable secs in line */ @@ -762,7 +747,7 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, unsigned int pos, unsigned int nr_entries, unsigned int count); int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, - struct ppa_addr ppa, int bio_iter, bool advanced_bio); + struct ppa_addr ppa); unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); @@ -862,15 +847,15 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, struct pblk_line *gc_line, u64 paddr); void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, u64 *lba_list, int nr_secs); -void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, - sector_t blba, int nr_secs); +int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, + sector_t blba, int nr_secs, bool *from_cache); void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd); void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd); /* * pblk user I/O write path */ -int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, +void pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags); int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); @@ -896,7 +881,7 @@ void pblk_write_kick(struct pblk *pblk); * pblk read path */ extern struct bio_set pblk_bio_set; -int pblk_submit_read(struct pblk *pblk, struct bio *bio); +void pblk_submit_read(struct pblk *pblk, struct bio *bio); int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk recovery @@ -921,6 +906,7 @@ void pblk_gc_free_full_lines(struct pblk *pblk); void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, int *gc_active); int pblk_gc_sysfs_force(struct pblk *pblk, int force); +void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line); /* * pblk rate limiter diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a6644a2c3ef7..7da80f375315 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1257,10 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return 0; } + effects |= nvme_known_admin_effects(opcode); if (ctrl->effects) effects = le32_to_cpu(ctrl->effects->acs[opcode]); - else - effects = nvme_known_admin_effects(opcode); /* * For simplicity, IO to all namespaces is quiesced even if the command @@ -2342,20 +2341,35 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; -static int nvme_active_ctrls(struct nvme_subsystem *subsys) +static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, + struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { - int count = 0; - struct nvme_ctrl *ctrl; + struct nvme_ctrl *tmp; + + lockdep_assert_held(&nvme_subsystems_lock); + + list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) { + if (ctrl->state == NVME_CTRL_DELETING || + ctrl->state == NVME_CTRL_DEAD) + continue; + + if (tmp->cntlid == ctrl->cntlid) { + dev_err(ctrl->device, + "Duplicate cntlid %u with %s, rejecting\n", + ctrl->cntlid, dev_name(tmp->device)); + return false; + } - mutex_lock(&subsys->lock); - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { - if (ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DEAD) - count++; + if ((id->cmic & (1 << 1)) || + (ctrl->opts && ctrl->opts->discovery_nqn)) + continue; + + dev_err(ctrl->device, + "Subsystem does not support multiple controllers\n"); + return false; } - mutex_unlock(&subsys->lock); - return count; + return true; } static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) @@ -2395,22 +2409,13 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) mutex_lock(&nvme_subsystems_lock); found = __nvme_find_get_subsystem(subsys->subnqn); if (found) { - /* - * Verify that the subsystem actually supports multiple - * controllers, else bail out. - */ - if (!(ctrl->opts && ctrl->opts->discovery_nqn) && - nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { - dev_err(ctrl->device, - "ignoring ctrl due to duplicate subnqn (%s).\n", - found->subnqn); - nvme_put_subsystem(found); - ret = -EINVAL; - goto out_unlock; - } - __nvme_release_subsystem(subsys); subsys = found; + + if (!nvme_validate_cntlid(subsys, ctrl, id)) { + ret = -EINVAL; + goto out_put_subsystem; + } } else { ret = device_add(&subsys->dev); if (ret) { @@ -2422,23 +2427,20 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) list_add_tail(&subsys->entry, &nvme_subsystems); } - ctrl->subsys = subsys; - mutex_unlock(&nvme_subsystems_lock); - if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, dev_name(ctrl->device))) { dev_err(ctrl->device, "failed to create sysfs link from subsystem.\n"); - /* the transport driver will eventually put the subsystem */ - return -EINVAL; + goto out_put_subsystem; } - mutex_lock(&subsys->lock); + ctrl->subsys = subsys; list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); - mutex_unlock(&subsys->lock); - + mutex_unlock(&nvme_subsystems_lock); return 0; +out_put_subsystem: + nvme_put_subsystem(subsys); out_unlock: mutex_unlock(&nvme_subsystems_lock); put_device(&subsys->dev); @@ -3605,19 +3607,18 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) { u32 aer_notice_type = (result & 0xff00) >> 8; + trace_nvme_async_event(ctrl, aer_notice_type); + switch (aer_notice_type) { case NVME_AER_NOTICE_NS_CHANGED: - trace_nvme_async_event(ctrl, aer_notice_type); set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: - trace_nvme_async_event(ctrl, aer_notice_type); queue_work(nvme_wq, &ctrl->fw_act_work); break; #ifdef CONFIG_NVME_MULTIPATH case NVME_AER_NOTICE_ANA: - trace_nvme_async_event(ctrl, aer_notice_type); if (!ctrl->ana_log_buf) break; queue_work(nvme_wq, &ctrl->ana_work); @@ -3696,10 +3697,10 @@ static void nvme_free_ctrl(struct device *dev) __free_page(ctrl->discard_page); if (subsys) { - mutex_lock(&subsys->lock); + mutex_lock(&nvme_subsystems_lock); list_del(&ctrl->subsys_entry); - mutex_unlock(&subsys->lock); sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device)); + mutex_unlock(&nvme_subsystems_lock); } ctrl->ops->free_ctrl(ctrl); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 592d1e61ef7e..5838f7cd53ac 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -978,7 +978,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); NVMF_OPT_DISABLE_SQFLOW) static struct nvme_ctrl * -nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) +nvmf_create_ctrl(struct device *dev, const char *buf) { struct nvmf_ctrl_options *opts; struct nvmf_transport_ops *ops; @@ -1073,7 +1073,7 @@ static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf, goto out_unlock; } - ctrl = nvmf_create_ctrl(nvmf_device, buf, count); + ctrl = nvmf_create_ctrl(nvmf_device, buf); if (IS_ERR(ctrl)) { ret = PTR_ERR(ctrl); goto out_unlock; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9544eb60f725..dd8169bbf0d2 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -202,7 +202,7 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); - +static struct workqueue_struct *nvme_fc_wq; /* * These items are short-term. They will eventually be moved into @@ -2054,7 +2054,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) */ if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { active = atomic_xchg(&ctrl->err_work_active, 1); - if (!active && !schedule_work(&ctrl->err_work)) { + if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) { atomic_set(&ctrl->err_work_active, 0); WARN_ON(1); } @@ -3399,6 +3399,10 @@ static int __init nvme_fc_init_module(void) { int ret; + nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); + if (!nvme_fc_wq) + return -ENOMEM; + /* * NOTE: * It is expected that in the future the kernel will combine @@ -3416,7 +3420,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - return ret; + goto out_destroy_wq; } /* @@ -3440,6 +3444,9 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); +out_destroy_wq: + destroy_workqueue(nvme_fc_wq); + return ret; } @@ -3456,6 +3463,7 @@ static void __exit nvme_fc_exit_module(void) device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); + destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 949e29e1d782..4f20a10b39d3 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -977,6 +977,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) geo->csecs = 1 << ns->lba_shift; geo->sos = ns->ms; geo->ext = ns->ext; + geo->mdts = ns->ctrl->max_hw_sectors; dev->q = q; memcpy(dev->name, disk_name, DISK_NAME_LEN); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5c9429d41120..499acf07d61a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -31,7 +31,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); } else if (ns->head->disk) { sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, - ctrl->cntlid, ns->head->instance); + ctrl->instance, ns->head->instance); *flags = GENHD_FL_HIDDEN; } else { sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3e4fb891a95a..2a8708c9ac18 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1296,6 +1296,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) switch (dev->ctrl.state) { case NVME_CTRL_DELETING: shutdown = true; + /* fall through */ case NVME_CTRL_CONNECTING: case NVME_CTRL_RESETTING: dev_warn_ratelimited(dev->ctrl.device, @@ -2280,8 +2281,6 @@ static int nvme_dev_add(struct nvme_dev *dev) return ret; } dev->ctrl.tagset = &dev->tagset; - - nvme_dbbuf_set(dev); } else { blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); @@ -2289,6 +2288,7 @@ static int nvme_dev_add(struct nvme_dev *dev) nvme_free_queues(dev, dev->online_queues); } + nvme_dbbuf_set(dev); return 0; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e1824c2e0a1c..f383146e7d0f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -697,15 +697,6 @@ out_free_queues: return ret; } -static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, - struct blk_mq_tag_set *set) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - - blk_mq_free_tag_set(set); - nvme_rdma_dev_put(ctrl->device); -} - static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, bool admin) { @@ -744,24 +735,9 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, ret = blk_mq_alloc_tag_set(set); if (ret) - goto out; - - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - ret = nvme_rdma_dev_get(ctrl->device); - if (!ret) { - ret = -EINVAL; - goto out_free_tagset; - } + return ERR_PTR(ret); return set; - -out_free_tagset: - blk_mq_free_tag_set(set); -out: - return ERR_PTR(ret); } static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, @@ -769,7 +745,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, { if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); + blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); } if (ctrl->async_event_sqe.data) { nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -847,7 +823,7 @@ out_cleanup_queue: blk_cleanup_queue(ctrl->ctrl.admin_q); out_free_tagset: if (new) - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); + blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); out_free_async_qe: nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -862,7 +838,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, { if (remove) { blk_cleanup_queue(ctrl->ctrl.connect_q); - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); + blk_mq_free_tag_set(ctrl->ctrl.tagset); } nvme_rdma_free_io_queues(ctrl); } @@ -903,7 +879,7 @@ out_cleanup_connect_q: blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: if (new) - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); + blk_mq_free_tag_set(ctrl->ctrl.tagset); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret; diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 97d3c77365b8..e71502d141ed 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -167,6 +167,7 @@ TRACE_EVENT(nvme_async_event, aer_name(NVME_AER_NOTICE_NS_CHANGED), aer_name(NVME_AER_NOTICE_ANA), aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_NOTICE_DISC_CHANGED), aer_name(NVME_AER_ERROR), aer_name(NVME_AER_SMART), aer_name(NVME_AER_CSS), diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index f89f9d02e788..c09039eea707 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3827,7 +3827,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, if ((start_padding_sectors || end_padding_sectors) && (rq_data_dir(req) == WRITE)) { DBF_DEV_EVENT(DBF_ERR, basedev, - "raw write not track aligned (%lu,%lu) req %p", + "raw write not track aligned (%llu,%llu) req %p", start_padding_sectors, end_padding_sectors, req); return ERR_PTR(-EINVAL); } diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 5d865a5d5cdc..4d0d5655c7b2 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -358,6 +358,7 @@ struct nvm_geo { u16 csecs; /* sector size */ u16 sos; /* out-of-band area size */ bool ext; /* metadata in extended data buffer */ + u32 mdts; /* Max data transfer size*/ /* device write constrains */ u32 ws_min; /* minimum write size */ @@ -427,6 +428,7 @@ struct nvm_dev { char name[DISK_NAME_LEN]; void *private_data; + struct kref ref; void *rmap; struct mutex mlock; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c40720cb59ac..8028adacaff3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1246,9 +1246,9 @@ enum { NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, NVME_SC_FW_NEEDS_RESET = 0x111, NVME_SC_FW_NEEDS_MAX_TIME = 0x112, - NVME_SC_FW_ACIVATE_PROHIBITED = 0x113, + NVME_SC_FW_ACTIVATE_PROHIBITED = 0x113, NVME_SC_OVERLAPPING_RANGE = 0x114, - NVME_SC_NS_INSUFFICENT_CAP = 0x115, + NVME_SC_NS_INSUFFICIENT_CAP = 0x115, NVME_SC_NS_ID_UNAVAILABLE = 0x116, NVME_SC_NS_ALREADY_ATTACHED = 0x118, NVME_SC_NS_IS_PRIVATE = 0x119, |