From 900148296b78c61aa8c443dc594c0da968c3be53 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:50 +0200 Subject: lightnvm: prevent target type module removal when in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If target type module e.g. pblk here is unloaded (rmmod) while module is in use (after creating target) system crashes. We fix this by using module API refcnt. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 4 ++++ drivers/lightnvm/pblk-init.c | 1 + 2 files changed, 5 insertions(+) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index ddae430b6eae..60e163be5a89 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -316,6 +317,8 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) list_add_tail(&t->list, &dev->targets); mutex_unlock(&dev->mlock); + __module_get(tt->owner); + return 0; err_sysfs: if (tt->exit) @@ -351,6 +354,7 @@ static void __nvm_remove_target(struct nvm_target *t) nvm_remove_tgt_dev(t->dev, 1); put_disk(tdisk); + module_put(t->type->owner); list_del(&t->list); kfree(t); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 1b0f61233c21..6df65d14a2c5 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1044,6 +1044,7 @@ static struct nvm_tgt_type tt_pblk = { .sysfs_init = pblk_sysfs_init, .sysfs_exit = pblk_sysfs_exit, + .owner = THIS_MODULE, }; static int __init pblk_module_init(void) -- cgit v1.2.3 From bb6aa6f08268bbce4e0185b18cab9e04505d6695 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:51 +0200 Subject: lightnvm: prevent bd removal if busy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a virtual block device is formatted and mounted after creating with "nvme lnvm create... -t pblk", a removal from "nvm lnvm remove" would result in this: 446416.309757] bdi-block not registered [446416.309773] ------------[ cut here ]------------ [446416.309780] WARNING: CPU: 3 PID: 4319 at fs/fs-writeback.c:2159 __mark_inode_dirty+0x268/0x340 Ideally removal should return -EBUSY as block device is mounted after formatting. This patch tries to address this checking if whole device or any partition of it already mounted or not before removal. Whole device is checked using "bd_super" member of block device. This member is always set once block device has been mounted using a filesystem. Another member "bd_part_count" takes care of checking any if any partitions are under use. "bd_part_count" is only updated under locks when partitions are opened or closed (first open and last release). This at least does take care sending -EBUSY if removal is being attempted while whole block device or any partition is mounted. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 60e163be5a89..c490711cf0f4 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -373,6 +373,7 @@ static void __nvm_remove_target(struct nvm_target *t) static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) { struct nvm_target *t; + struct block_device *bdev; mutex_lock(&dev->mlock); t = nvm_find_target(dev, remove->tgtname); @@ -380,6 +381,19 @@ static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) mutex_unlock(&dev->mlock); return 1; } + bdev = bdget_disk(t->disk, 0); + if (!bdev) { + pr_err("nvm: removal failed, allocating bd failed\n"); + mutex_unlock(&dev->mlock); + return -ENOMEM; + } + if (bdev->bd_super || bdev->bd_part_count) { + pr_err("nvm: removal failed, block device busy\n"); + bdput(bdev); + mutex_unlock(&dev->mlock); + return -EBUSY; + } + bdput(bdev); __nvm_remove_target(t); mutex_unlock(&dev->mlock); -- cgit v1.2.3 From 88d31ea2676696ad0802a361c8b824f0762fa34c Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:52 +0200 Subject: lightnvm: protect target type list with correct locks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nvm_tgt_types list was protected by wrong lock for NVM_INFO ioctl call and can race with addition or removal of target types. Also unregistering target type was not protected correctly. Fixes: 5cd907853 ("lightnvm: remove nested lock conflict with mm") Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index c490711cf0f4..ee2b6d771990 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -589,9 +589,9 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt) if (!tt) return; - down_write(&nvm_lock); + down_write(&nvm_tgtt_lock); list_del(&tt->list); - up_write(&nvm_lock); + up_write(&nvm_tgtt_lock); } EXPORT_SYMBOL(nvm_unregister_tgt_type); @@ -1195,7 +1195,7 @@ static long nvm_ioctl_info(struct file *file, void __user *arg) info->version[1] = NVM_VERSION_MINOR; info->version[2] = NVM_VERSION_PATCH; - down_write(&nvm_lock); + down_write(&nvm_tgtt_lock); list_for_each_entry(tt, &nvm_tgt_types, list) { struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter]; @@ -1208,7 +1208,7 @@ static long nvm_ioctl_info(struct file *file, void __user *arg) } info->tgtsize = tgt_iter; - up_write(&nvm_lock); + up_write(&nvm_tgtt_lock); if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) { kfree(info); -- cgit v1.2.3 From a96d50fa0c8d3e399c49a0a90ddbbaabf8a46bb3 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:53 +0200 Subject: lightnvm: remove already calculated nr_chnls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove repeated calculation for number of channels while creating a target device. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index ee2b6d771990..798964f511cd 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -139,7 +139,6 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, int prev_nr_luns; int i, j; - nr_chnls = nr_luns / dev->geo.luns_per_chnl; nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1; dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); -- cgit v1.2.3 From c9d84b350f9b253872fc24f4dcfea166c884ee15 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:54 +0200 Subject: lightnvm: pblk: fix error path in pblk_lines_alloc_metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use appropriate memory free calls based on allocation type used and also fix number of times free is called if kmalloc fails. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 6df65d14a2c5..05665a7e648c 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -630,7 +630,10 @@ static int pblk_lines_alloc_metadata(struct pblk *pblk) fail_free_emeta: while (--i >= 0) { - vfree(l_mg->eline_meta[i]->buf); + if (l_mg->emeta_alloc_type == PBLK_VMALLOC_META) + vfree(l_mg->eline_meta[i]->buf); + else + kfree(l_mg->eline_meta[i]->buf); kfree(l_mg->eline_meta[i]); } -- cgit v1.2.3 From 32c662c58a9b9d0c99e713a14ca323a9a91c73a0 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:55 +0200 Subject: lightnvm: include NVM Express driver if OCSSD is selected for build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because NVM needs BLK_DEV_NVME, select it automatically if we mark NVM in config file before building kernel. Also append PCI to depends as select doesn't automatically add dependencies. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index ead61a93cb4e..2a953efec4e1 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -4,7 +4,8 @@ menuconfig NVM bool "Open-Channel SSD target support" - depends on BLOCK && HAS_DMA + depends on BLOCK && HAS_DMA && PCI + select BLK_DEV_NVME help Say Y here to get to enable Open-channel SSDs. -- cgit v1.2.3 From e57903fd972a398b7140d0bc055714e13a0e58c5 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:56 +0200 Subject: lightnvm: pblk: protect line bitmap while submitting meta io MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems pblk_dealloc_page would race against pblk_alloc_pages for line bitmap for sector allocation.The chances are very low but might as well protect the bitmap properly. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 81501644fb15..b53bb00a9918 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -486,12 +486,14 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) u64 addr; int i; + spin_lock(&line->lock); addr = find_next_zero_bit(line->map_bitmap, pblk->lm.sec_per_line, line->cur_sec); line->cur_sec = addr - nr_secs; for (i = 0; i < nr_secs; i++, line->cur_sec--) WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap)); + spin_unlock(&line->lock); } u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) -- cgit v1.2.3 From 4e76af53e132bff9e2b94f018457fadabf5ab419 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:57 +0200 Subject: lightnvm: pblk: fix message if L2P MAP is in device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This usually happens if we are developing with qemu and ll2pmode has default value. Improve description. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 05665a7e648c..8c85779e9635 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -914,7 +914,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, int ret; if (dev->identity.dom & NVM_RSP_L2P) { - pr_err("pblk: device-side L2P table not supported. (%x)\n", + pr_err("pblk: host-side L2P table not supported. (%x)\n", dev->identity.dom); return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From c5493845b7b303315118fb4ab96654bf7cb897f0 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:58 +0200 Subject: lightnvm: pblk: improve error message if down_timeout fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two pr_err messages are useless as they don't differentiate error code. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index b53bb00a9918..027c42bb1ab9 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1691,16 +1691,8 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, #endif ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); - if (ret) { - switch (ret) { - case -ETIME: - pr_err("pblk: lun semaphore timed out\n"); - break; - case -EINTR: - pr_err("pblk: lun semaphore timed out\n"); - break; - } - } + if (ret == -ETIME || ret == -EINTR) + pr_err("pblk: taking lun semaphore timed out: err %d\n", -ret); } void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) -- cgit v1.2.3 From c79819bc0877e4cbed8013b1abc9697e8805b21b Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:59 +0200 Subject: lightnvm: pblk: print incompatible line version correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct it by converting little endian to cpu endian and also define a macro for line version so that maintenance is easy. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 2 +- drivers/lightnvm/pblk-recovery.c | 4 ++-- drivers/lightnvm/pblk.h | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 027c42bb1ab9..8536d38ef97e 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -978,7 +978,7 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16); smeta_buf->header.id = cpu_to_le32(line->id); smeta_buf->header.type = cpu_to_le16(line->type); - smeta_buf->header.version = cpu_to_le16(1); + smeta_buf->header.version = SMETA_VERSION; /* Start metadata */ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr); diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index cb556e06673e..caf124279575 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -900,9 +900,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC) continue; - if (le16_to_cpu(smeta_buf->header.version) != 1) { + if (smeta_buf->header.version != SMETA_VERSION) { pr_err("pblk: found incompatible line version %u\n", - smeta_buf->header.version); + le16_to_cpu(smeta_buf->header.version)); return ERR_PTR(-EINVAL); } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 67e623bd5c2d..9ece409993fe 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -310,6 +310,7 @@ enum { }; #define PBLK_MAGIC 0x70626c6b /*pblk*/ +#define SMETA_VERSION cpu_to_le16(1) struct line_header { __le32 crc; -- cgit v1.2.3 From 32825ebb06fafeff463ed23e9d0dea459ebd30fe Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:00 +0200 Subject: lightnvm: pblk: reuse pblk_gc_should_kick MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a trivial change which reuses pblk_gc_should_kick instead of repeating it again in pblk_rl_free_lines_inc. Signed-off-by: Rakesh Pandit Made it apply to the common case. Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 2 -- drivers/lightnvm/pblk-rl.c | 33 +++++++++------------------------ drivers/lightnvm/pblk.h | 1 - 3 files changed, 9 insertions(+), 27 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 8536d38ef97e..a68c6ae536e5 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1591,8 +1591,6 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); - - pblk_gc_should_kick(pblk); } void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 2e6a5361baf0..9565c3bc4d0b 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -96,9 +96,11 @@ unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) * * Only the total number of free blocks is used to configure the rate limiter. */ -static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max) +static void pblk_rl_update_rates(struct pblk_rl *rl) { + struct pblk *pblk = container_of(rl, struct pblk, rl); unsigned long free_blocks = pblk_rl_nr_free_blks(rl); + int max = rl->rb_budget; if (free_blocks >= rl->high) { rl->rb_user_max = max; @@ -124,23 +126,18 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max) rl->rb_state = PBLK_RL_LOW; } - return rl->rb_state; + if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW)) + pblk_gc_should_start(pblk); + else + pblk_gc_should_stop(pblk); } void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) { - struct pblk *pblk = container_of(rl, struct pblk, rl); int blk_in_line = atomic_read(&line->blk_in_line); - int ret; atomic_add(blk_in_line, &rl->free_blocks); - /* Rates will not change that often - no need to lock update */ - ret = pblk_rl_update_rates(rl, rl->rb_budget); - - if (ret == (PBLK_RL_MID | PBLK_RL_LOW)) - pblk_gc_should_start(pblk); - else - pblk_gc_should_stop(pblk); + pblk_rl_update_rates(rl); } void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) @@ -148,19 +145,7 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) int blk_in_line = atomic_read(&line->blk_in_line); atomic_sub(blk_in_line, &rl->free_blocks); -} - -void pblk_gc_should_kick(struct pblk *pblk) -{ - struct pblk_rl *rl = &pblk->rl; - int ret; - - /* Rates will not change that often - no need to lock update */ - ret = pblk_rl_update_rates(rl, rl->rb_budget); - if (ret == (PBLK_RL_MID | PBLK_RL_LOW)) - pblk_gc_should_start(pblk); - else - pblk_gc_should_stop(pblk); + pblk_rl_update_rates(rl); } int pblk_rl_high_thrs(struct pblk_rl *rl) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 9ece409993fe..3a07c5b61a0c 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -824,7 +824,6 @@ int pblk_gc_init(struct pblk *pblk); void pblk_gc_exit(struct pblk *pblk); void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk); -void pblk_gc_should_kick(struct pblk *pblk); void pblk_gc_kick(struct pblk *pblk); void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, int *gc_active); -- cgit v1.2.3 From a1121176ff757e3c073490a69608ea0b18a00ec1 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:01 +0200 Subject: lightnvm: pblk: initialize debug stat counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize the stat counter for garbage collected reads. Fixes: a4bd217b43268 ("lightnvm: physical block device (pblk) target") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 8c85779e9635..83445115a922 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -947,6 +947,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, atomic_long_set(&pblk->recov_writes, 0); atomic_long_set(&pblk->recov_writes, 0); atomic_long_set(&pblk->recov_gc_writes, 0); + atomic_long_set(&pblk->recov_gc_reads, 0); #endif atomic_long_set(&pblk->read_failed, 0); -- cgit v1.2.3 From 7d327a9ed6c4dca341ebf99012e0a6b80a3050e6 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:02 +0200 Subject: lightnvm: pblk: use right flag for GC allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data buffer for the GC path allocates virtual memory through vmalloc. When this change was introduced, a flag signaling kmalloc'ed memory was wrongly introduced. Use the right flag when creating a bio from this buffer. Fixes: de54e703a422 ("lightnvm: pblk: use vmalloc for GC data buffer") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-read.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index d682e89e6493..ee8efb55b330 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -499,7 +499,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, data_len = (*secs_to_gc) * geo->sec_size; bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len, - PBLK_KMALLOC_META, GFP_KERNEL); + PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); goto err_free_dma; @@ -519,7 +519,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, if (ret) { bio_endio(bio); pr_err("pblk: GC read request failed\n"); - goto err_free_dma; + goto err_free_bio; } if (!wait_for_completion_io_timeout(&wait, @@ -541,10 +541,13 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, atomic_long_sub(*secs_to_gc, &pblk->inflight_reads); #endif + bio_put(bio); out: nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); return NVM_IO_OK; +err_free_bio: + bio_put(bio); err_free_dma: nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); return NVM_IO_ERR; -- cgit v1.2.3 From cd8ddbf7a5e206fe6995ab0aee245d597dd6a7f2 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:03 +0200 Subject: lightnvm: pblk: free padded entries in write buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a REQ_FLUSH reaches pblk, the bio cannot be directly completed. Instead, data on the write buffer is flushed and the bio is completed on the completion pah. This might require some sectors to be padded in order to guarantee a successful write. This patch fixes a memory leak on the padded pages. A consequence of this bad free was that internal bios not containing data (only a flush) were not being completed. Fixes: a4bd217b4326 ("lightnvm: physical block device (pblk) target") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 1 - drivers/lightnvm/pblk-write.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index a68c6ae536e5..9299a5a75a18 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -190,7 +190,6 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, WARN_ON(off + nr_pages != bio->bi_vcnt); - bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE); for (i = off; i < nr_pages + off; i++) { bv = bio->bi_io_vec[i]; mempool_free(bv.bv_page, pblk->page_pool); diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 3ad9e56d2473..d89ac573f8d8 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -33,6 +33,10 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, bio_endio(original_bio); } + if (c_ctx->nr_padded) + pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, + c_ctx->nr_padded); + #ifdef CONFIG_NVM_DEBUG atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes); #endif @@ -521,7 +525,8 @@ static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd) struct bio *bio = rqd->bio; if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded); + pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid, + c_ctx->nr_padded); } static int pblk_submit_write(struct pblk *pblk) -- cgit v1.2.3 From e0e12a707f02fcde1b77a2417d9fb0ae1ce3b003 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:04 +0200 Subject: lightnvm: pblk: fix write I/O sync stat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix stat counter to collect the right number of I/Os being synced on the completion path. Fixes: 0880a9aa2d91f ("lightnvm: pblk: delete redundant buffer pointer") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index d89ac573f8d8..d82ca8bd8390 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -38,7 +38,7 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, c_ctx->nr_padded); #ifdef CONFIG_NVM_DEBUG - atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes); + atomic_long_add(rqd->nr_ppas, &pblk->sync_writes); #endif ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); -- cgit v1.2.3 From da67e68fb9d37fb9072b20cc75d4337a73bc01b4 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:05 +0200 Subject: lightnvm: pblk: avoid deadlock on low LUN config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On low LUN configurations, make sure not to send bios that are bigger than the buffer size. Fixes: a4bd217b4326 ("lightnvm: physical block device (pblk) target") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- drivers/lightnvm/pblk-rl.c | 6 ++++++ drivers/lightnvm/pblk.h | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 83445115a922..eee4eeb47d07 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -46,7 +46,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, * user I/Os. Unless stalled, the rate limiter leaves at least 256KB * available for user I/O. */ - if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) + if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) blk_queue_split(q, &bio); return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 9565c3bc4d0b..0896439a91b0 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -163,6 +163,11 @@ int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) return rl->rb_user_max; } +int pblk_rl_max_io(struct pblk_rl *rl) +{ + return rl->rb_max_io; +} + static void pblk_rl_u_timer(unsigned long data) { struct pblk_rl *rl = (struct pblk_rl *)data; @@ -199,6 +204,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) /* To start with, all buffer is available to user I/O writers */ rl->rb_budget = budget; rl->rb_user_max = budget; + rl->rb_max_io = budget >> 1; rl->rb_gc_max = 0; rl->rb_state = PBLK_RL_HIGH; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 3a07c5b61a0c..b592e5194b0f 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -267,6 +267,7 @@ struct pblk_rl { int rb_gc_max; /* Max buffer entries available for GC I/O */ int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ int rb_state; /* Rate-limiter current state */ + int rb_max_io; /* Maximum size for an I/O giving the config */ atomic_t rb_user_cnt; /* User I/O buffer counter */ atomic_t rb_gc_cnt; /* GC I/O buffer counter */ @@ -844,6 +845,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); +int pblk_rl_max_io(struct pblk_rl *rl); void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left); -- cgit v1.2.3 From bd432417681a224d9fa4a9d43be7d4edc82135b2 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:06 +0200 Subject: lightnvm: pblk: fix min size for page mempool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk uses an internal page mempool for allocating pages on internal bios. The main two users of this memory pool are partial reads (reads with some sectors in cache and some on media) and padded writes, which need to add dummy pages to an existing bio already containing valid data (and with a large enough bioset allocated). In both cases, the maximum number of pages per bio is defined by the maximum number of physical sectors supported by the underlying device. This patch fixes a bad mempool allocation, where the min_nr of elements on the pool was fixed (to 16), which is lower than the maximum number of sectors supported by NVMe (as of the time for this patch). Instead, use the maximum number of allowed sectors reported by the device. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 6 +++--- drivers/lightnvm/pblk-init.c | 15 ++++++++------- drivers/lightnvm/pblk-read.c | 2 +- drivers/lightnvm/pblk.h | 2 +- 4 files changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 9299a5a75a18..f5fbb9a46784 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -192,7 +192,7 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, for (i = off; i < nr_pages + off; i++) { bv = bio->bi_io_vec[i]; - mempool_free(bv.bv_page, pblk->page_pool); + mempool_free(bv.bv_page, pblk->page_bio_pool); } } @@ -204,14 +204,14 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int i, ret; for (i = 0; i < nr_pages; i++) { - page = mempool_alloc(pblk->page_pool, flags); + page = mempool_alloc(pblk->page_bio_pool, flags); if (!page) goto err; ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); if (ret != PBLK_EXPOSED_PAGE_SIZE) { pr_err("pblk: could not add page to bio\n"); - mempool_free(page, pblk->page_pool); + mempool_free(page, pblk->page_bio_pool); goto err; } } diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index eee4eeb47d07..7b1f29c71338 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -132,7 +132,6 @@ static int pblk_rwb_init(struct pblk *pblk) } /* Minimum pages needed within a lun */ -#define PAGE_POOL_SIZE 16 #define ADDR_POOL_SIZE 64 static int pblk_set_ppaf(struct pblk *pblk) @@ -247,14 +246,16 @@ static int pblk_core_init(struct pblk *pblk) if (pblk_init_global_caches(pblk)) return -ENOMEM; - pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); - if (!pblk->page_pool) + /* internal bios can be at most the sectors signaled by the device. */ + pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), + 0); + if (!pblk->page_bio_pool) return -ENOMEM; pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE, pblk_blk_ws_cache); if (!pblk->line_ws_pool) - goto free_page_pool; + goto free_page_bio_pool; pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); if (!pblk->rec_pool) @@ -309,8 +310,8 @@ free_rec_pool: mempool_destroy(pblk->rec_pool); free_blk_ws_pool: mempool_destroy(pblk->line_ws_pool); -free_page_pool: - mempool_destroy(pblk->page_pool); +free_page_bio_pool: + mempool_destroy(pblk->page_bio_pool); return -ENOMEM; } @@ -322,7 +323,7 @@ static void pblk_core_free(struct pblk *pblk) if (pblk->bb_wq) destroy_workqueue(pblk->bb_wq); - mempool_destroy(pblk->page_pool); + mempool_destroy(pblk->page_bio_pool); mempool_destroy(pblk->line_ws_pool); mempool_destroy(pblk->rec_pool); mempool_destroy(pblk->g_rq_pool); diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index ee8efb55b330..402c732f0970 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -238,7 +238,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, kunmap_atomic(src_p); kunmap_atomic(dst_p); - mempool_free(src_bv.bv_page, pblk->page_pool); + mempool_free(src_bv.bv_page, pblk->page_bio_pool); hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); } while (hole < nr_secs); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index b592e5194b0f..229f6020ad8a 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -620,7 +620,7 @@ struct pblk { struct list_head compl_list; - mempool_t *page_pool; + mempool_t *page_bio_pool; mempool_t *line_ws_pool; mempool_t *rec_pool; mempool_t *g_rq_pool; -- cgit v1.2.3 From b84ae4a8b883b96b95fff0e3979ff2c65bbf96b0 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:07 +0200 Subject: lightnvm: pblk: simplify work_queue mempool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In pblk, we have a mempool to allocate a generic structure that we pass along workqueues. This is heavily used in the GC path in order to have enough inflight reads and fully utilize the GC bandwidth. However, the current GC path copies data to the host memory and puts it back into the write buffer. This requires a vmalloc allocation for the data and a memory copy. Thus, guaranteeing the allocation by using a mempool for the structure in itself does not give us much. Until we implement support for vector copy to avoid moving data through the host, just allocate the workqueue structure using kmalloc. This allows us to have a much smaller mempool. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 13 +++++++------ drivers/lightnvm/pblk-gc.c | 32 ++++++++++++++++---------------- drivers/lightnvm/pblk-init.c | 32 ++++++++++++++++---------------- drivers/lightnvm/pblk-write.c | 4 ++-- drivers/lightnvm/pblk.h | 11 ++++++----- 5 files changed, 47 insertions(+), 45 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index f5fbb9a46784..b92532211866 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -33,7 +33,8 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n", line->id, pos); - pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq); + pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, + GFP_ATOMIC, pblk->bb_wq); } static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) @@ -1623,7 +1624,7 @@ void pblk_line_close_ws(struct work_struct *work) struct pblk_line *line = line_ws->line; pblk_line_close(pblk, line); - mempool_free(line_ws, pblk->line_ws_pool); + mempool_free(line_ws, pblk->gen_ws_pool); } void pblk_line_mark_bb(struct work_struct *work) @@ -1648,16 +1649,16 @@ void pblk_line_mark_bb(struct work_struct *work) } kfree(ppa); - mempool_free(line_ws, pblk->line_ws_pool); + mempool_free(line_ws, pblk->gen_ws_pool); } -void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, - void (*work)(struct work_struct *), +void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, + void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq) { struct pblk_line_ws *line_ws; - line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC); + line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask); if (!line_ws) return; diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 6090d28f7995..f163829ecca8 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -136,12 +136,12 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) static void pblk_gc_line_ws(struct work_struct *work) { - struct pblk_line_ws *line_rq_ws = container_of(work, + struct pblk_line_ws *gc_rq_ws = container_of(work, struct pblk_line_ws, ws); - struct pblk *pblk = line_rq_ws->pblk; + struct pblk *pblk = gc_rq_ws->pblk; struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line = line_rq_ws->line; - struct pblk_gc_rq *gc_rq = line_rq_ws->priv; + struct pblk_line *line = gc_rq_ws->line; + struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; up(&gc->gc_sem); @@ -151,7 +151,7 @@ static void pblk_gc_line_ws(struct work_struct *work) gc_rq->nr_secs); } - mempool_free(line_rq_ws, pblk->line_ws_pool); + kfree(gc_rq_ws); } static void pblk_gc_line_prepare_ws(struct work_struct *work) @@ -164,7 +164,7 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) struct pblk_line_meta *lm = &pblk->lm; struct pblk_gc *gc = &pblk->gc; struct line_emeta *emeta_buf; - struct pblk_line_ws *line_rq_ws; + struct pblk_line_ws *gc_rq_ws; struct pblk_gc_rq *gc_rq; __le64 *lba_list; int sec_left, nr_secs, bit; @@ -223,19 +223,19 @@ next_rq: gc_rq->nr_secs = nr_secs; gc_rq->line = line; - line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); - if (!line_rq_ws) + gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); + if (!gc_rq_ws) goto fail_free_gc_rq; - line_rq_ws->pblk = pblk; - line_rq_ws->line = line; - line_rq_ws->priv = gc_rq; + gc_rq_ws->pblk = pblk; + gc_rq_ws->line = line; + gc_rq_ws->priv = gc_rq; down(&gc->gc_sem); kref_get(&line->ref); - INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws); - queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws); + INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws); + queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws); sec_left -= nr_secs; if (sec_left > 0) @@ -243,7 +243,7 @@ next_rq: out: pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); - mempool_free(line_ws, pblk->line_ws_pool); + kfree(line_ws); kref_put(&line->ref, pblk_line_put); atomic_dec(&gc->inflight_gc); @@ -256,7 +256,7 @@ fail_free_emeta: pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); pblk_put_line_back(pblk, line); kref_put(&line->ref, pblk_line_put); - mempool_free(line_ws, pblk->line_ws_pool); + kfree(line_ws); atomic_dec(&gc->inflight_gc); pr_err("pblk: Failed to GC line %d\n", line->id); @@ -269,7 +269,7 @@ static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); - line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); + line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); if (!line_ws) return -ENOMEM; diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 7b1f29c71338..340552253580 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -20,7 +20,7 @@ #include "pblk.h" -static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, +static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, *pblk_w_rq_cache, *pblk_line_meta_cache; static DECLARE_RWSEM(pblk_lock); struct bio_set *pblk_bio_set; @@ -184,9 +184,9 @@ static int pblk_init_global_caches(struct pblk *pblk) char cache_name[PBLK_CACHE_NAME_LEN]; down_write(&pblk_lock); - pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws", + pblk_ws_cache = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_blk_ws_cache) { + if (!pblk_ws_cache) { up_write(&pblk_lock); return -ENOMEM; } @@ -194,7 +194,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_rec_cache = kmem_cache_create("pblk_rec", sizeof(struct pblk_rec_ctx), 0, 0, NULL); if (!pblk_rec_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); up_write(&pblk_lock); return -ENOMEM; } @@ -202,7 +202,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, 0, 0, NULL); if (!pblk_g_rq_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); up_write(&pblk_lock); return -ENOMEM; @@ -211,7 +211,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, 0, 0, NULL); if (!pblk_w_rq_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); up_write(&pblk_lock); @@ -223,7 +223,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_line_meta_cache = kmem_cache_create(cache_name, pblk->lm.sec_bitmap_len, 0, 0, NULL); if (!pblk_line_meta_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); kmem_cache_destroy(pblk_w_rq_cache); @@ -246,20 +246,20 @@ static int pblk_core_init(struct pblk *pblk) if (pblk_init_global_caches(pblk)) return -ENOMEM; - /* internal bios can be at most the sectors signaled by the device. */ + /* Internal bios can be at most the sectors signaled by the device. */ pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), 0); if (!pblk->page_bio_pool) return -ENOMEM; - pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE, - pblk_blk_ws_cache); - if (!pblk->line_ws_pool) + pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE, + pblk_ws_cache); + if (!pblk->gen_ws_pool) goto free_page_bio_pool; pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); if (!pblk->rec_pool) - goto free_blk_ws_pool; + goto free_gen_ws_pool; pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE, pblk_g_rq_cache); @@ -308,8 +308,8 @@ free_g_rq_pool: mempool_destroy(pblk->g_rq_pool); free_rec_pool: mempool_destroy(pblk->rec_pool); -free_blk_ws_pool: - mempool_destroy(pblk->line_ws_pool); +free_gen_ws_pool: + mempool_destroy(pblk->gen_ws_pool); free_page_bio_pool: mempool_destroy(pblk->page_bio_pool); return -ENOMEM; @@ -324,13 +324,13 @@ static void pblk_core_free(struct pblk *pblk) destroy_workqueue(pblk->bb_wq); mempool_destroy(pblk->page_bio_pool); - mempool_destroy(pblk->line_ws_pool); + mempool_destroy(pblk->gen_ws_pool); mempool_destroy(pblk->rec_pool); mempool_destroy(pblk->g_rq_pool); mempool_destroy(pblk->w_rq_pool); mempool_destroy(pblk->line_meta_pool); - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); kmem_cache_destroy(pblk_w_rq_cache); diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index d82ca8bd8390..c73b17bca06b 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -198,8 +198,8 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); if (sync == emeta->nr_entries) - pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws, - pblk->close_wq); + pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws, + GFP_ATOMIC, pblk->close_wq); bio_put(rqd->bio); nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 229f6020ad8a..efaa781abb06 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -40,7 +40,6 @@ #define PBLK_MAX_REQ_ADDRS (64) #define PBLK_MAX_REQ_ADDRS_PW (6) -#define PBLK_WS_POOL_SIZE (128) #define PBLK_META_POOL_SIZE (128) #define PBLK_READ_REQ_POOL_SIZE (1024) @@ -61,6 +60,8 @@ #define ERASE 2 /* READ = 0, WRITE = 1 */ +#define PBLK_GEN_WS_POOL_SIZE (2) + enum { /* IO Types */ PBLK_IOTYPE_USER = 1 << 0, @@ -621,7 +622,7 @@ struct pblk { struct list_head compl_list; mempool_t *page_bio_pool; - mempool_t *line_ws_pool; + mempool_t *gen_ws_pool; mempool_t *rec_pool; mempool_t *g_rq_pool; mempool_t *w_rq_pool; @@ -725,9 +726,9 @@ void pblk_line_close_meta_sync(struct pblk *pblk); void pblk_line_close_ws(struct work_struct *work); void pblk_pipeline_stop(struct pblk *pblk); void pblk_line_mark_bb(struct work_struct *work); -void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, - void (*work)(struct work_struct *), - struct workqueue_struct *wq); +void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, + void (*work)(struct work_struct *), gfp_t gfp_mask, + struct workqueue_struct *wq); u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, -- cgit v1.2.3 From 0d880398cb6254ab3e110e2a8a659da65a56ffee Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:08 +0200 Subject: lightnvm: pblk: decouple read/erase mempools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since read and erase paths offer different guarantees for inflight I/Os, separate the mempools to set the right min_nr for each on creation. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 8 ++++---- drivers/lightnvm/pblk-init.c | 22 +++++++++++++++------- drivers/lightnvm/pblk.h | 5 +++-- 3 files changed, 22 insertions(+), 13 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index b92532211866..0c22e5ccdfdd 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -64,7 +64,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) struct pblk *pblk = rqd->private; __pblk_end_io_erase(pblk, rqd); - mempool_free(rqd, pblk->g_rq_pool); + mempool_free(rqd, pblk->e_rq_pool); } void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, @@ -161,7 +161,7 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw) pool = pblk->w_rq_pool; rq_size = pblk_w_rq_size; } else { - pool = pblk->g_rq_pool; + pool = pblk->r_rq_pool; rq_size = pblk_g_rq_size; } @@ -178,7 +178,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw) if (rw == WRITE) pool = pblk->w_rq_pool; else - pool = pblk->g_rq_pool; + pool = pblk->r_rq_pool; mempool_free(rqd, pool); } @@ -1479,7 +1479,7 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) struct nvm_rq *rqd; int err; - rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL); + rqd = mempool_alloc(pblk->e_rq_pool, GFP_KERNEL); memset(rqd, 0, pblk_g_rq_size); pblk_setup_e_rq(pblk, rqd, ppa); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 340552253580..2f8d3f9ffbaf 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -261,15 +261,20 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->rec_pool) goto free_gen_ws_pool; - pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE, + pblk->r_rq_pool = mempool_create_slab_pool(geo->nr_luns, pblk_g_rq_cache); - if (!pblk->g_rq_pool) + if (!pblk->r_rq_pool) goto free_rec_pool; - pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2, + pblk->e_rq_pool = mempool_create_slab_pool(geo->nr_luns, + pblk_g_rq_cache); + if (!pblk->e_rq_pool) + goto free_r_rq_pool; + + pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns, pblk_w_rq_cache); if (!pblk->w_rq_pool) - goto free_g_rq_pool; + goto free_e_rq_pool; pblk->line_meta_pool = mempool_create_slab_pool(PBLK_META_POOL_SIZE, @@ -304,8 +309,10 @@ free_line_meta_pool: mempool_destroy(pblk->line_meta_pool); free_w_rq_pool: mempool_destroy(pblk->w_rq_pool); -free_g_rq_pool: - mempool_destroy(pblk->g_rq_pool); +free_e_rq_pool: + mempool_destroy(pblk->e_rq_pool); +free_r_rq_pool: + mempool_destroy(pblk->r_rq_pool); free_rec_pool: mempool_destroy(pblk->rec_pool); free_gen_ws_pool: @@ -326,7 +333,8 @@ static void pblk_core_free(struct pblk *pblk) mempool_destroy(pblk->page_bio_pool); mempool_destroy(pblk->gen_ws_pool); mempool_destroy(pblk->rec_pool); - mempool_destroy(pblk->g_rq_pool); + mempool_destroy(pblk->r_rq_pool); + mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); mempool_destroy(pblk->line_meta_pool); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index efaa781abb06..419e1b7328e4 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -41,7 +41,6 @@ #define PBLK_MAX_REQ_ADDRS_PW (6) #define PBLK_META_POOL_SIZE (128) -#define PBLK_READ_REQ_POOL_SIZE (1024) #define PBLK_NR_CLOSE_JOBS (4) @@ -60,6 +59,7 @@ #define ERASE 2 /* READ = 0, WRITE = 1 */ +/* Static pool sizes */ #define PBLK_GEN_WS_POOL_SIZE (2) enum { @@ -624,8 +624,9 @@ struct pblk { mempool_t *page_bio_pool; mempool_t *gen_ws_pool; mempool_t *rec_pool; - mempool_t *g_rq_pool; + mempool_t *r_rq_pool; mempool_t *w_rq_pool; + mempool_t *e_rq_pool; mempool_t *line_meta_pool; struct workqueue_struct *close_wq; -- cgit v1.2.3 From e72ec1d31bcb6dffe325418c6d96f2fcab7c2654 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:09 +0200 Subject: lightnvm: pblk: do not use a mempool for line bitmaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk holds two sector bitmaps: one to keep track of the mapped sectors while the line is active and another one to keep track of the invalid sectors. The latter is kept during the whole live of the line, until it is recycled. Since we cannot guarantee forward progress for the mempool in this case, get rid of the mempool and simply allocate memory through kmalloc. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 26 ++++++++++---------------- drivers/lightnvm/pblk-init.c | 29 ++--------------------------- drivers/lightnvm/pblk-recovery.c | 2 +- drivers/lightnvm/pblk-write.c | 4 +--- drivers/lightnvm/pblk.h | 3 --- 5 files changed, 14 insertions(+), 50 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 0c22e5ccdfdd..215aadb84c6e 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1095,25 +1095,21 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) struct pblk_line_meta *lm = &pblk->lm; int blk_in_line = atomic_read(&line->blk_in_line); - line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC); + line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC); if (!line->map_bitmap) return -ENOMEM; - memset(line->map_bitmap, 0, lm->sec_bitmap_len); - /* invalid_bitmap is special since it is used when line is closed. No - * need to zeroized; it will be initialized using bb info form - * map_bitmap - */ - line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC); + /* will be initialized using bb info from map_bitmap */ + line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC); if (!line->invalid_bitmap) { - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); return -ENOMEM; } spin_lock(&line->lock); if (line->state != PBLK_LINESTATE_FREE) { - mempool_free(line->invalid_bitmap, pblk->line_meta_pool); - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); + kfree(line->invalid_bitmap); spin_unlock(&line->lock); WARN(1, "pblk: corrupted line %d, state %d\n", line->id, line->state); @@ -1165,7 +1161,7 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) { - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; @@ -1440,10 +1436,8 @@ retry_setup: void pblk_line_free(struct pblk *pblk, struct pblk_line *line) { - if (line->map_bitmap) - mempool_free(line->map_bitmap, pblk->line_meta_pool); - if (line->invalid_bitmap) - mempool_free(line->invalid_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); + kfree(line->invalid_bitmap); *line->vsc = cpu_to_le32(EMPTY_ENTRY); @@ -1584,7 +1578,7 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) list_add_tail(&line->list, move_list); - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 2f8d3f9ffbaf..4d719782f65b 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -21,7 +21,7 @@ #include "pblk.h" static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, - *pblk_w_rq_cache, *pblk_line_meta_cache; + *pblk_w_rq_cache; static DECLARE_RWSEM(pblk_lock); struct bio_set *pblk_bio_set; @@ -181,8 +181,6 @@ static int pblk_set_ppaf(struct pblk *pblk) static int pblk_init_global_caches(struct pblk *pblk) { - char cache_name[PBLK_CACHE_NAME_LEN]; - down_write(&pblk_lock); pblk_ws_cache = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); @@ -217,19 +215,6 @@ static int pblk_init_global_caches(struct pblk *pblk) up_write(&pblk_lock); return -ENOMEM; } - - snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s", - pblk->disk->disk_name); - pblk_line_meta_cache = kmem_cache_create(cache_name, - pblk->lm.sec_bitmap_len, 0, 0, NULL); - if (!pblk_line_meta_cache) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); - up_write(&pblk_lock); - return -ENOMEM; - } up_write(&pblk_lock); return 0; @@ -276,16 +261,10 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->w_rq_pool) goto free_e_rq_pool; - pblk->line_meta_pool = - mempool_create_slab_pool(PBLK_META_POOL_SIZE, - pblk_line_meta_cache); - if (!pblk->line_meta_pool) - goto free_w_rq_pool; - pblk->close_wq = alloc_workqueue("pblk-close-wq", WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS); if (!pblk->close_wq) - goto free_line_meta_pool; + goto free_w_rq_pool; pblk->bb_wq = alloc_workqueue("pblk-bb-wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); @@ -305,8 +284,6 @@ free_bb_wq: destroy_workqueue(pblk->bb_wq); free_close_wq: destroy_workqueue(pblk->close_wq); -free_line_meta_pool: - mempool_destroy(pblk->line_meta_pool); free_w_rq_pool: mempool_destroy(pblk->w_rq_pool); free_e_rq_pool: @@ -336,13 +313,11 @@ static void pblk_core_free(struct pblk *pblk) mempool_destroy(pblk->r_rq_pool); mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); - mempool_destroy(pblk->line_meta_pool); kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); kmem_cache_destroy(pblk_w_rq_cache); - kmem_cache_destroy(pblk_line_meta_cache); } static void pblk_luns_free(struct pblk *pblk) diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index caf124279575..de5270712be7 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -987,7 +987,7 @@ next: list_move_tail(&line->list, move_list); spin_unlock(&l_mg->gc_lock); - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index c73b17bca06b..26c2b8345149 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -411,8 +411,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) if (emeta->mem >= lm->emeta_len[0]) { spin_lock(&l_mg->close_lock); list_del(&meta_line->list); - WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line), - "pblk: corrupt meta line %d\n", meta_line->id); spin_unlock(&l_mg->close_lock); } @@ -456,7 +454,7 @@ retry: return 0; } meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); - if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line)) + if (meta_line->emeta->mem >= lm->emeta_len[0]) goto retry; spin_unlock(&l_mg->close_lock); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 419e1b7328e4..60edcda0fc7f 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -40,8 +40,6 @@ #define PBLK_MAX_REQ_ADDRS (64) #define PBLK_MAX_REQ_ADDRS_PW (6) -#define PBLK_META_POOL_SIZE (128) - #define PBLK_NR_CLOSE_JOBS (4) #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) @@ -627,7 +625,6 @@ struct pblk { mempool_t *r_rq_pool; mempool_t *w_rq_pool; mempool_t *e_rq_pool; - mempool_t *line_meta_pool; struct workqueue_struct *close_wq; struct workqueue_struct *bb_wq; -- cgit v1.2.3 From 2942f50fa389a62865572452dce6214a8aed69dc Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:10 +0200 Subject: lightnvm: pblk: remove checks on mempool alloc. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of the mempool audit on pblk, remove unnecessary mempool allocation checks on mempools. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 4 ---- drivers/lightnvm/pblk-read.c | 8 -------- drivers/lightnvm/pblk-recovery.c | 35 +++++++---------------------------- drivers/lightnvm/pblk-write.c | 24 +++++------------------- 4 files changed, 12 insertions(+), 59 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 215aadb84c6e..0da58869006b 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -206,8 +206,6 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, for (i = 0; i < nr_pages; i++) { page = mempool_alloc(pblk->page_bio_pool, flags); - if (!page) - goto err; ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); if (ret != PBLK_EXPOSED_PAGE_SIZE) { @@ -1653,8 +1651,6 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, struct pblk_line_ws *line_ws; line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask); - if (!line_ws) - return; line_ws->pblk = pblk; line_ws->line = line; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 402c732f0970..d2b6e2a7d7d5 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -168,10 +168,6 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, DECLARE_COMPLETION_ONSTACK(wait); new_bio = bio_alloc(GFP_KERNEL, nr_holes); - if (!new_bio) { - pr_err("pblk: could not alloc read bio\n"); - return NVM_IO_ERR; - } if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) goto err; @@ -321,10 +317,6 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) bitmap_zero(&read_bitmap, nr_secs); rqd = pblk_alloc_rqd(pblk, READ); - if (IS_ERR(rqd)) { - pr_err_ratelimited("pblk: not able to alloc rqd"); - return NVM_IO_ERR; - } rqd->opcode = NVM_OP_PREAD; rqd->bio = bio; diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index de5270712be7..6b6b4183b41e 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -34,10 +34,6 @@ void pblk_submit_rec(struct work_struct *work) max_secs); bio = bio_alloc(GFP_KERNEL, nr_rec_secs); - if (!bio) { - pr_err("pblk: not able to create recovery bio\n"); - return; - } bio->bi_iter.bi_sector = 0; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -85,11 +81,6 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; rec_rqd = pblk_alloc_rqd(pblk, WRITE); - if (IS_ERR(rec_rqd)) { - pr_err("pblk: could not create recovery req.\n"); - return -ENOMEM; - } - rec_ctx = nvm_rq_to_pdu(rec_rqd); /* Copy completion bitmap, but exclude the first X completed entries */ @@ -404,22 +395,18 @@ next_pad_rq: ppa_list = (void *)(meta_list) + pblk_dma_meta_size; dma_ppa_list = dma_meta_list + pblk_dma_meta_size; - rqd = pblk_alloc_rqd(pblk, WRITE); - if (IS_ERR(rqd)) { - ret = PTR_ERR(rqd); - goto fail_free_meta; - } - bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto fail_free_rqd; + goto fail_free_meta; } bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + rqd = pblk_alloc_rqd(pblk, WRITE); + rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; rqd->flags = pblk_set_progr_mode(pblk, WRITE); @@ -490,8 +477,6 @@ free_rq: fail_free_bio: bio_put(bio); -fail_free_rqd: - pblk_free_rqd(pblk, rqd, WRITE); fail_free_meta: nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); fail_free_pad: @@ -785,15 +770,9 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) dma_addr_t dma_ppa_list, dma_meta_list; int done, ret = 0; - rqd = pblk_alloc_rqd(pblk, READ); - if (IS_ERR(rqd)) - return PTR_ERR(rqd); - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); - if (!meta_list) { - ret = -ENOMEM; - goto free_rqd; - } + if (!meta_list) + return -ENOMEM; ppa_list = (void *)(meta_list) + pblk_dma_meta_size; dma_ppa_list = dma_meta_list + pblk_dma_meta_size; @@ -804,6 +783,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) goto free_meta_list; } + rqd = pblk_alloc_rqd(pblk, READ); + p.ppa_list = ppa_list; p.meta_list = meta_list; p.rqd = rqd; @@ -832,8 +813,6 @@ out: kfree(data); free_meta_list: nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); -free_rqd: - pblk_free_rqd(pblk, rqd, READ); return ret; } diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 26c2b8345149..0fb8f26a6311 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -111,10 +111,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) ppa_list = &rqd->ppa_addr; recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC); - if (!recovery) { - pr_err("pblk: could not allocate recovery context\n"); - return; - } + INIT_LIST_HEAD(&recovery->failed); bit = -1; @@ -375,10 +372,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) int ret; rqd = pblk_alloc_rqd(pblk, READ); - if (IS_ERR(rqd)) { - pr_err("pblk: cannot allocate write req.\n"); - return PTR_ERR(rqd); - } + m_ctx = nvm_rq_to_pdu(rqd); m_ctx->private = meta_line; @@ -546,19 +540,12 @@ static int pblk_submit_write(struct pblk *pblk) if (!secs_to_flush && secs_avail < pblk->min_write_pgs) return 1; - rqd = pblk_alloc_rqd(pblk, WRITE); - if (IS_ERR(rqd)) { - pr_err("pblk: cannot allocate write req.\n"); - return 1; - } - bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs); - if (!bio) { - pr_err("pblk: cannot allocate write bio\n"); - goto fail_free_rqd; - } + bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + rqd = pblk_alloc_rqd(pblk, WRITE); rqd->bio = bio; secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); @@ -589,7 +576,6 @@ fail_free_bio: pblk_free_write_rqd(pblk, rqd); fail_put_bio: bio_put(bio); -fail_free_rqd: pblk_free_rqd(pblk, rqd, WRITE); return 1; -- cgit v1.2.3 From 3627896a4b12ea6bb9e0ff77724a24f53726db2d Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:11 +0200 Subject: lightnvm: pblk: use constant for GC max inflight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a constant to set the maximum number of inflight GC requests allowed. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 4 ++-- drivers/lightnvm/pblk.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index f163829ecca8..c21b2077432a 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -93,7 +93,7 @@ static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq) retry: spin_lock(&gc->w_lock); - if (gc->w_entries >= PBLK_GC_W_QD) { + if (gc->w_entries >= PBLK_GC_RQ_QD) { spin_unlock(&gc->w_lock); pblk_gc_writer_kick(&pblk->gc); usleep_range(128, 256); @@ -602,7 +602,7 @@ int pblk_gc_init(struct pblk *pblk) spin_lock_init(&gc->w_lock); spin_lock_init(&gc->r_lock); - sema_init(&gc->gc_sem, 128); + sema_init(&gc->gc_sem, PBLK_GC_RQ_QD); INIT_LIST_HEAD(&gc->w_list); INIT_LIST_HEAD(&gc->r_list); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 60edcda0fc7f..baa6a633990f 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -816,7 +816,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, * pblk gc */ #define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */ -#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */ +#define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */ #define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */ #define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */ -- cgit v1.2.3 From 9f6cb13bb40bd9067498e908a3272aba998c0309 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:12 +0200 Subject: lightnvm: pblk: normalize ppa namings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normalize the way we name ppa variables to improve code readability. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 48 +++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 23 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 0da58869006b..b6d7c6660149 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1746,7 +1746,7 @@ void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) { - struct ppa_addr l2p_ppa; + struct ppa_addr ppa_l2p; /* logic error: lba out-of-bounds. Ignore update */ if (!(lba < pblk->rl.nr_secs)) { @@ -1755,10 +1755,10 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) } spin_lock(&pblk->trans_lock); - l2p_ppa = pblk_trans_map_get(pblk, lba); + ppa_l2p = pblk_trans_map_get(pblk, lba); - if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa)) - pblk_map_invalidate(pblk, l2p_ppa); + if (!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p)) + pblk_map_invalidate(pblk, ppa_l2p); pblk_trans_map_set(pblk, lba, ppa); spin_unlock(&pblk->trans_lock); @@ -1775,16 +1775,16 @@ void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) pblk_update_map(pblk, lba, ppa); } -int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, +int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new, struct pblk_line *gc_line) { - struct ppa_addr l2p_ppa; + struct ppa_addr ppa_l2p; int ret = 1; #ifdef CONFIG_NVM_DEBUG /* Callers must ensure that the ppa points to a cache address */ - BUG_ON(!pblk_addr_in_cache(ppa)); - BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa))); + BUG_ON(!pblk_addr_in_cache(ppa_new)); + BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa_new))); #endif /* logic error: lba out-of-bounds. Ignore update */ @@ -1794,36 +1794,38 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, } spin_lock(&pblk->trans_lock); - l2p_ppa = pblk_trans_map_get(pblk, lba); + ppa_l2p = pblk_trans_map_get(pblk, lba); /* Prevent updated entries to be overwritten by GC */ - if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) || - pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) { + if (pblk_addr_in_cache(ppa_l2p) || pblk_ppa_empty(ppa_l2p) || + pblk_tgt_ppa_to_line(ppa_l2p) != gc_line->id) { + ret = 0; goto out; } - pblk_trans_map_set(pblk, lba, ppa); + pblk_trans_map_set(pblk, lba, ppa_new); out: spin_unlock(&pblk->trans_lock); return ret; } -void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, - struct ppa_addr entry_line) +void pblk_update_map_dev(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa_mapped, struct ppa_addr ppa_cache) { - struct ppa_addr l2p_line; + struct ppa_addr ppa_l2p; #ifdef CONFIG_NVM_DEBUG /* Callers must ensure that the ppa points to a device address */ - BUG_ON(pblk_addr_in_cache(ppa)); + BUG_ON(pblk_addr_in_cache(ppa_mapped)); #endif /* Invalidate and discard padded entries */ if (lba == ADDR_EMPTY) { #ifdef CONFIG_NVM_DEBUG atomic_long_inc(&pblk->padded_wb); #endif - pblk_map_invalidate(pblk, ppa); + if (!pblk_ppa_empty(ppa_mapped)) + pblk_map_invalidate(pblk, ppa_mapped); return; } @@ -1834,22 +1836,22 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, } spin_lock(&pblk->trans_lock); - l2p_line = pblk_trans_map_get(pblk, lba); + ppa_l2p = pblk_trans_map_get(pblk, lba); /* Do not update L2P if the cacheline has been updated. In this case, * the mapped ppa must be invalidated */ - if (l2p_line.ppa != entry_line.ppa) { - if (!pblk_ppa_empty(ppa)) - pblk_map_invalidate(pblk, ppa); + if (!pblk_ppa_comp(ppa_l2p, ppa_cache)) { + if (!pblk_ppa_empty(ppa_mapped)) + pblk_map_invalidate(pblk, ppa_mapped); goto out; } #ifdef CONFIG_NVM_DEBUG - WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line)); + WARN_ON(!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p)); #endif - pblk_trans_map_set(pblk, lba, ppa); + pblk_trans_map_set(pblk, lba, ppa_mapped); out: spin_unlock(&pblk->trans_lock); } -- cgit v1.2.3 From 84454e6de56bb5c8629c41ed09aaf5750ff56f5f Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:13 +0200 Subject: lightnvm: pblk: refactor read lba sanity check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor lba sanity check on read path to avoid code duplication. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-read.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index d2b6e2a7d7d5..eaaf9d55ba97 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -39,21 +39,14 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, } static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned long *read_bitmap) + sector_t blba, unsigned long *read_bitmap) { struct bio *bio = rqd->bio; struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; - sector_t blba = pblk_get_lba(bio); int nr_secs = rqd->nr_ppas; bool advanced_bio = false; int i, j = 0; - /* logic error: lba out-of-bounds. Ignore read request */ - if (blba + nr_secs >= pblk->rl.nr_secs) { - WARN(1, "pblk: read lbas out of bounds\n"); - return; - } - pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); for (i = 0; i < nr_secs; i++) { @@ -259,17 +252,10 @@ err: } static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned long *read_bitmap) + sector_t lba, unsigned long *read_bitmap) { struct bio *bio = rqd->bio; struct ppa_addr ppa; - sector_t lba = pblk_get_lba(bio); - - /* logic error: lba out-of-bounds. Ignore read request */ - if (lba >= pblk->rl.nr_secs) { - WARN(1, "pblk: read lba out of bounds\n"); - return; - } pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); @@ -305,14 +291,19 @@ retry: int pblk_submit_read(struct pblk *pblk, struct bio *bio) { struct nvm_tgt_dev *dev = pblk->dev; + sector_t blba = pblk_get_lba(bio); unsigned int nr_secs = pblk_get_secs(bio); struct nvm_rq *rqd; unsigned long read_bitmap; /* Max 64 ppas per request */ unsigned int bio_init_idx; int ret = NVM_IO_ERR; - if (nr_secs > PBLK_MAX_REQ_ADDRS) + /* logic error: lba out-of-bounds. Ignore read request */ + if (blba >= pblk->rl.nr_secs || nr_secs > PBLK_MAX_REQ_ADDRS) { + WARN(1, "pblk: read lba out of bounds (lba:%llu, nr:%d)\n", + (unsigned long long)blba, nr_secs); return NVM_IO_ERR; + } bitmap_zero(&read_bitmap, nr_secs); @@ -340,9 +331,9 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; - pblk_read_ppalist_rq(pblk, rqd, &read_bitmap); + pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap); } else { - pblk_read_rq(pblk, rqd, &read_bitmap); + pblk_read_rq(pblk, rqd, blba, &read_bitmap); } bio_get(bio); -- cgit v1.2.3 From d340121eb770de3b02bfc73c5f2b00f5345090c2 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:14 +0200 Subject: lightnvm: pblk: simplify data validity check on GC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a line is selected for recycling by the garbage collector (GC), the line state changes and the invalid bitmap is frozen, preventing invalidations from happening. Throughout the GC, the L2P map is checked to verify that not data being recycled has been updated. The last check is done before the new map is being stored on the L2P table. Though this algorithm works, it requires a number of corner cases to be checked each time the L2P table is being updated. This complicates readability and is error prone in case that the recycling algorithm is modified. Instead, this patch makes the invalid bitmap accessible even when the line is being recycled. When recycled data is being remapped, it is enough to check the invalid bitmap for the line before updating the L2P table. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-cache.c | 20 +++++------ drivers/lightnvm/pblk-core.c | 29 +++++++--------- drivers/lightnvm/pblk-gc.c | 58 +++++++++++++++++-------------- drivers/lightnvm/pblk-rb.c | 6 ++-- drivers/lightnvm/pblk-read.c | 79 +++++++++++++++++++++++-------------------- drivers/lightnvm/pblk.h | 23 ++++--------- 6 files changed, 109 insertions(+), 106 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index 024a8fc93069..1d6b8e3585f1 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -73,12 +73,11 @@ out: * On GC the incoming lbas are not necessarily sequential. Also, some of the * lbas might not be valid entries, which are marked as empty by the GC thread */ -int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, - unsigned int nr_entries, unsigned int nr_rec_entries, - struct pblk_line *gc_line, unsigned long flags) +int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq) { struct pblk_w_ctx w_ctx; unsigned int bpos, pos; + void *data = gc_rq->data; int i, valid_entries; /* Update the write buffer head (mem) with the entries that we can @@ -86,28 +85,29 @@ int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, * rollback from here on. */ retry: - if (!pblk_rb_may_write_gc(&pblk->rwb, nr_rec_entries, &bpos)) { + if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) { io_schedule(); goto retry; } - w_ctx.flags = flags; + w_ctx.flags = PBLK_IOTYPE_GC; pblk_ppa_set_empty(&w_ctx.ppa); - for (i = 0, valid_entries = 0; i < nr_entries; i++) { - if (lba_list[i] == ADDR_EMPTY) + for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) { + if (gc_rq->lba_list[i] == ADDR_EMPTY) continue; - w_ctx.lba = lba_list[i]; + w_ctx.lba = gc_rq->lba_list[i]; pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries); - pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_line, pos); + pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line, + gc_rq->paddr_list[i], pos); data += PBLK_EXPOSED_PAGE_SIZE; valid_entries++; } - WARN_ONCE(nr_rec_entries != valid_entries, + WARN_ONCE(gc_rq->secs_to_gc != valid_entries, "pblk: inconsistent GC write\n"); #ifdef CONFIG_NVM_DEBUG diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index b6d7c6660149..6dd4866e579c 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -78,11 +78,7 @@ void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, * that newer updates are not overwritten. */ spin_lock(&line->lock); - if (line->state == PBLK_LINESTATE_GC || - line->state == PBLK_LINESTATE_FREE) { - spin_unlock(&line->lock); - return; - } + WARN_ON(line->state == PBLK_LINESTATE_FREE); if (test_and_set_bit(paddr, line->invalid_bitmap)) { WARN_ONCE(1, "pblk: double invalidate\n"); @@ -99,8 +95,7 @@ void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, spin_lock(&l_mg->gc_lock); spin_lock(&line->lock); /* Prevent moving a line that has just been chosen for GC */ - if (line->state == PBLK_LINESTATE_GC || - line->state == PBLK_LINESTATE_FREE) { + if (line->state == PBLK_LINESTATE_GC) { spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); return; @@ -1766,6 +1761,7 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) { + #ifdef CONFIG_NVM_DEBUG /* Callers must ensure that the ppa points to a cache address */ BUG_ON(!pblk_addr_in_cache(ppa)); @@ -1776,9 +1772,9 @@ void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) } int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new, - struct pblk_line *gc_line) + struct pblk_line *gc_line, u64 paddr_gc) { - struct ppa_addr ppa_l2p; + struct ppa_addr ppa_l2p, ppa_gc; int ret = 1; #ifdef CONFIG_NVM_DEBUG @@ -1795,10 +1791,13 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new, spin_lock(&pblk->trans_lock); ppa_l2p = pblk_trans_map_get(pblk, lba); + ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, gc_line->id); - /* Prevent updated entries to be overwritten by GC */ - if (pblk_addr_in_cache(ppa_l2p) || pblk_ppa_empty(ppa_l2p) || - pblk_tgt_ppa_to_line(ppa_l2p) != gc_line->id) { + if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) { + spin_lock(&gc_line->lock); + WARN(!test_bit(paddr_gc, gc_line->invalid_bitmap), + "pblk: corrupted GC update"); + spin_unlock(&gc_line->lock); ret = 0; goto out; @@ -1870,15 +1869,13 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, u64 *lba_list, int nr_secs) { - sector_t lba; + u64 lba; int i; spin_lock(&pblk->trans_lock); for (i = 0; i < nr_secs; i++) { lba = lba_list[i]; - if (lba == ADDR_EMPTY) { - ppas[i].ppa = ADDR_EMPTY; - } else { + if (lba != ADDR_EMPTY) { /* logic error: lba out-of-bounds. Ignore update */ if (!(lba < pblk->rl.nr_secs)) { WARN(1, "pblk: corrupted L2P map request\n"); diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index c21b2077432a..7ad0cfe58a21 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -20,7 +20,8 @@ static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) { - vfree(gc_rq->data); + if (gc_rq->data) + vfree(gc_rq->data); kfree(gc_rq); } @@ -41,10 +42,7 @@ static int pblk_gc_write(struct pblk *pblk) spin_unlock(&gc->w_lock); list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { - pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list, - gc_rq->nr_secs, gc_rq->secs_to_gc, - gc_rq->line, PBLK_IOTYPE_GC); - + pblk_write_gc_to_cache(pblk, gc_rq); list_del(&gc_rq->list); kref_put(&gc_rq->line->ref, pblk_line_put); pblk_gc_free_gc_rq(gc_rq); @@ -69,27 +67,23 @@ static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq) struct pblk_gc *gc = &pblk->gc; struct pblk_line *line = gc_rq->line; void *data; - unsigned int secs_to_gc; int ret = 0; data = vmalloc(gc_rq->nr_secs * geo->sec_size); if (!data) { ret = -ENOMEM; - goto out; + goto fail; } - /* Read from GC victim block */ - if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs, - &secs_to_gc, line)) { - ret = -EFAULT; - goto free_data; - } + gc_rq->data = data; - if (!secs_to_gc) - goto free_rq; + /* Read from GC victim block */ + ret = pblk_submit_read_gc(pblk, gc_rq); + if (ret) + goto fail; - gc_rq->data = data; - gc_rq->secs_to_gc = secs_to_gc; + if (!gc_rq->secs_to_gc) + goto fail; retry: spin_lock(&gc->w_lock); @@ -107,11 +101,8 @@ retry: return 0; -free_rq: - kfree(gc_rq); -free_data: - vfree(data); -out: +fail: + pblk_gc_free_gc_rq(gc_rq); kref_put(&line->ref, pblk_line_put); return ret; } @@ -167,14 +158,21 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) struct pblk_line_ws *gc_rq_ws; struct pblk_gc_rq *gc_rq; __le64 *lba_list; + unsigned long *invalid_bitmap; int sec_left, nr_secs, bit; int ret; + invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!invalid_bitmap) { + pr_err("pblk: could not allocate GC invalid bitmap\n"); + goto fail_free_ws; + } + emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, GFP_KERNEL); if (!emeta_buf) { pr_err("pblk: cannot use GC emeta\n"); - return; + goto fail_free_bitmap; } ret = pblk_line_read_emeta(pblk, line, emeta_buf); @@ -193,7 +191,11 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) goto fail_free_emeta; } + spin_lock(&line->lock); + bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line); sec_left = pblk_line_vsc(line); + spin_unlock(&line->lock); + if (sec_left < 0) { pr_err("pblk: corrupted GC line (%d)\n", line->id); goto fail_free_emeta; @@ -207,11 +209,12 @@ next_rq: nr_secs = 0; do { - bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line, + bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line, bit + 1); if (bit > line->emeta_ssec) break; + gc_rq->paddr_list[nr_secs] = bit; gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]); } while (nr_secs < pblk->max_write_pgs); @@ -244,6 +247,7 @@ next_rq: out: pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); kfree(line_ws); + kfree(invalid_bitmap); kref_put(&line->ref, pblk_line_put); atomic_dec(&gc->inflight_gc); @@ -254,9 +258,13 @@ fail_free_gc_rq: kfree(gc_rq); fail_free_emeta: pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); +fail_free_bitmap: + kfree(invalid_bitmap); +fail_free_ws: + kfree(line_ws); + pblk_put_line_back(pblk, line); kref_put(&line->ref, pblk_line_put); - kfree(line_ws); atomic_dec(&gc->inflight_gc); pr_err("pblk: Failed to GC line %d\n", line->id); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 9bc32578a766..74c768ce09ef 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -325,8 +325,8 @@ void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, } void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, - unsigned int ring_pos) + struct pblk_w_ctx w_ctx, struct pblk_line *line, + u64 paddr, unsigned int ring_pos) { struct pblk *pblk = container_of(rb, struct pblk, rwb); struct pblk_rb_entry *entry; @@ -341,7 +341,7 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, __pblk_rb_write_entry(rb, data, w_ctx, entry); - if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, gc_line)) + if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr)) entry->w_ctx.lba = ADDR_EMPTY; flags = w_ctx.flags | PBLK_WRITTEN_DATA; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index eaaf9d55ba97..c28d6509312e 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -388,34 +388,40 @@ fail_rqd_free: static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_line *line, u64 *lba_list, - unsigned int nr_secs) + u64 *paddr_list_gc, unsigned int nr_secs) { - struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppa_list_l2p[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppa_gc; int valid_secs = 0; int i; - pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs); + pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs); for (i = 0; i < nr_secs; i++) { - if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id || - pblk_ppa_empty(ppas[i])) { - lba_list[i] = ADDR_EMPTY; + if (lba_list[i] == ADDR_EMPTY) + continue; + + ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id); + if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) { + paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY; continue; } - rqd->ppa_list[valid_secs++] = ppas[i]; + rqd->ppa_list[valid_secs++] = ppa_list_l2p[i]; } #ifdef CONFIG_NVM_DEBUG atomic_long_add(valid_secs, &pblk->inflight_reads); #endif + return valid_secs; } static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_line *line, sector_t lba) + struct pblk_line *line, sector_t lba, + u64 paddr_gc) { - struct ppa_addr ppa; + struct ppa_addr ppa_l2p, ppa_gc; int valid_secs = 0; if (lba == ADDR_EMPTY) @@ -428,15 +434,14 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, } spin_lock(&pblk->trans_lock); - ppa = pblk_trans_map_get(pblk, lba); + ppa_l2p = pblk_trans_map_get(pblk, lba); spin_unlock(&pblk->trans_lock); - /* Ignore updated values until the moment */ - if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id || - pblk_ppa_empty(ppa)) + ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id); + if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) goto out; - rqd->ppa_addr = ppa; + rqd->ppa_addr = ppa_l2p; valid_secs = 1; #ifdef CONFIG_NVM_DEBUG @@ -447,15 +452,14 @@ out: return valid_secs; } -int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, - unsigned int nr_secs, unsigned int *secs_to_gc, - struct pblk_line *line) +int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct bio *bio; struct nvm_rq rqd; - int ret, data_len; + int data_len; + int ret = NVM_IO_OK; DECLARE_COMPLETION_ONSTACK(wait); memset(&rqd, 0, sizeof(struct nvm_rq)); @@ -463,25 +467,29 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &rqd.dma_meta_list); if (!rqd.meta_list) - return NVM_IO_ERR; + return -ENOMEM; - if (nr_secs > 1) { + if (gc_rq->nr_secs > 1) { rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; - *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list, - nr_secs); - if (*secs_to_gc == 1) + gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line, + gc_rq->lba_list, + gc_rq->paddr_list, + gc_rq->nr_secs); + if (gc_rq->secs_to_gc == 1) rqd.ppa_addr = rqd.ppa_list[0]; } else { - *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]); + gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line, + gc_rq->lba_list[0], + gc_rq->paddr_list[0]); } - if (!(*secs_to_gc)) + if (!(gc_rq->secs_to_gc)) goto out; - data_len = (*secs_to_gc) * geo->sec_size; - bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len, + data_len = (gc_rq->secs_to_gc) * geo->sec_size; + bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len, PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); @@ -494,13 +502,12 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, rqd.opcode = NVM_OP_PREAD; rqd.end_io = pblk_end_io_sync; rqd.private = &wait; - rqd.nr_ppas = *secs_to_gc; + rqd.nr_ppas = gc_rq->secs_to_gc; rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); rqd.bio = bio; - ret = pblk_submit_read_io(pblk, &rqd); - if (ret) { - bio_endio(bio); + if (pblk_submit_read_io(pblk, &rqd)) { + ret = -EIO; pr_err("pblk: GC read request failed\n"); goto err_free_bio; } @@ -519,19 +526,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, } #ifdef CONFIG_NVM_DEBUG - atomic_long_add(*secs_to_gc, &pblk->sync_reads); - atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads); - atomic_long_sub(*secs_to_gc, &pblk->inflight_reads); + atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads); + atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads); + atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads); #endif bio_put(bio); out: nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - return NVM_IO_OK; + return ret; err_free_bio: bio_put(bio); err_free_dma: nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - return NVM_IO_ERR; + return ret; } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index baa6a633990f..876b50f97234 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -206,6 +206,7 @@ struct pblk_lun { struct pblk_gc_rq { struct pblk_line *line; void *data; + u64 paddr_list[PBLK_MAX_REQ_ADDRS]; u64 lba_list[PBLK_MAX_REQ_ADDRS]; int nr_secs; int secs_to_gc; @@ -658,8 +659,8 @@ int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, struct pblk_w_ctx w_ctx, unsigned int pos); void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, - unsigned int pos); + struct pblk_w_ctx w_ctx, struct pblk_line *line, + u64 paddr, unsigned int pos); struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); void pblk_rb_flush(struct pblk_rb *rb); @@ -761,7 +762,7 @@ void pblk_update_map_cache(struct pblk *pblk, sector_t lba, void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, struct ppa_addr entry_line); int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, - struct pblk_line *gc_line); + struct pblk_line *gc_line, u64 paddr); void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, u64 *lba_list, int nr_secs); void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, @@ -772,9 +773,7 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, */ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags); -int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, - unsigned int nr_entries, unsigned int nr_rec_entries, - struct pblk_line *gc_line, unsigned long flags); +int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk map @@ -798,9 +797,7 @@ void pblk_write_should_kick(struct pblk *pblk); */ extern struct bio_set *pblk_bio_set; int pblk_submit_read(struct pblk *pblk, struct bio *bio); -int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, - unsigned int nr_secs, unsigned int *secs_to_gc, - struct pblk_line *line); +int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk recovery */ @@ -893,13 +890,7 @@ static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta) static inline int pblk_line_vsc(struct pblk_line *line) { - int vsc; - - spin_lock(&line->lock); - vsc = le32_to_cpu(*line->vsc); - spin_unlock(&line->lock); - - return vsc; + return le32_to_cpu(*line->vsc); } #define NVM_MEM_PAGE_WRITE (8) -- cgit v1.2.3 From 2a19b10d423c6dc47449e905ed3a8eabb49c48a0 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:15 +0200 Subject: lightnvm: pblk: refactor read path on GC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the part of the garbage collector where data is read from the line being recycled and moved into an internal queue before being copied to the memory buffer. This allows to get rid of a dedicated function, which introduces an unnecessary dependency on the code. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 94 +++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 55 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 7ad0cfe58a21..7b103bce58bf 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -56,57 +56,6 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc) wake_up_process(gc->gc_writer_ts); } -/* - * Responsible for managing all memory related to a gc request. Also in case of - * failure - */ -static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line = gc_rq->line; - void *data; - int ret = 0; - - data = vmalloc(gc_rq->nr_secs * geo->sec_size); - if (!data) { - ret = -ENOMEM; - goto fail; - } - - gc_rq->data = data; - - /* Read from GC victim block */ - ret = pblk_submit_read_gc(pblk, gc_rq); - if (ret) - goto fail; - - if (!gc_rq->secs_to_gc) - goto fail; - -retry: - spin_lock(&gc->w_lock); - if (gc->w_entries >= PBLK_GC_RQ_QD) { - spin_unlock(&gc->w_lock); - pblk_gc_writer_kick(&pblk->gc); - usleep_range(128, 256); - goto retry; - } - gc->w_entries++; - list_add_tail(&gc_rq->list, &gc->w_list); - spin_unlock(&gc->w_lock); - - pblk_gc_writer_kick(&pblk->gc); - - return 0; - -fail: - pblk_gc_free_gc_rq(gc_rq); - kref_put(&line->ref, pblk_line_put); - return ret; -} - static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -130,18 +79,53 @@ static void pblk_gc_line_ws(struct work_struct *work) struct pblk_line_ws *gc_rq_ws = container_of(work, struct pblk_line_ws, ws); struct pblk *pblk = gc_rq_ws->pblk; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; struct pblk_gc *gc = &pblk->gc; struct pblk_line *line = gc_rq_ws->line; struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; + int ret; up(&gc->gc_sem); - if (pblk_gc_move_valid_secs(pblk, gc_rq)) { - pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n", - line->id, *line->vsc, - gc_rq->nr_secs); + gc_rq->data = vmalloc(gc_rq->nr_secs * geo->sec_size); + if (!gc_rq->data) { + pr_err("pblk: could not GC line:%d (%d/%d)\n", + line->id, *line->vsc, gc_rq->nr_secs); + goto out; + } + + /* Read from GC victim block */ + ret = pblk_submit_read_gc(pblk, gc_rq); + if (ret) { + pr_err("pblk: failed GC read in line:%d (err:%d)\n", + line->id, ret); + goto out; } + if (!gc_rq->secs_to_gc) + goto out; + +retry: + spin_lock(&gc->w_lock); + if (gc->w_entries >= PBLK_GC_RQ_QD) { + spin_unlock(&gc->w_lock); + pblk_gc_writer_kick(&pblk->gc); + usleep_range(128, 256); + goto retry; + } + gc->w_entries++; + list_add_tail(&gc_rq->list, &gc->w_list); + spin_unlock(&gc->w_lock); + + pblk_gc_writer_kick(&pblk->gc); + + kfree(gc_rq_ws); + return; + +out: + pblk_gc_free_gc_rq(gc_rq); + kref_put(&line->ref, pblk_line_put); kfree(gc_rq_ws); } -- cgit v1.2.3 From 55e836d401601e7903b36db015ce899dc11085ab Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:16 +0200 Subject: lightnvm: pblk: put bio on bio completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify put bio by doing it on bio end_io instead of manually putting it on the completion path. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 10 +++++++--- drivers/lightnvm/pblk-read.c | 1 - drivers/lightnvm/pblk-recovery.c | 1 - drivers/lightnvm/pblk-write.c | 8 +------- 4 files changed, 8 insertions(+), 12 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 6dd4866e579c..3d27d24baa0b 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -417,6 +417,11 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) return nvm_submit_io(dev, rqd); } +static void pblk_bio_map_addr_endio(struct bio *bio) +{ + bio_put(bio); +} + struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, unsigned int nr_secs, unsigned int len, int alloc_type, gfp_t gfp_mask) @@ -453,6 +458,8 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, kaddr += PAGE_SIZE; } + + bio->bi_end_io = pblk_bio_map_addr_endio; out: return bio; } @@ -671,9 +678,6 @@ next_rq: atomic_dec(&pblk->inflight_io); reinit_completion(&wait); - if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META)) - bio_put(bio); - if (rqd.error) { if (dir == WRITE) pblk_log_write_err(pblk, &rqd); diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index c28d6509312e..e7141b1aaded 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -531,7 +531,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads); #endif - bio_put(bio); out: nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); return ret; diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 6b6b4183b41e..e59270e60b58 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -333,7 +333,6 @@ static void pblk_end_io_recov(struct nvm_rq *rqd) pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); - bio_put(rqd->bio); nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); pblk_free_rqd(pblk, rqd, WRITE); diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 0fb8f26a6311..0c0481cf9f5d 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -188,17 +188,12 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) pblk_log_write_err(pblk, rqd); pr_err("pblk: metadata I/O failed. Line %d\n", line->id); } -#ifdef CONFIG_NVM_DEBUG - else - WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n"); -#endif sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); if (sync == emeta->nr_entries) pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws, GFP_ATOMIC, pblk->close_wq); - bio_put(rqd->bio); nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); pblk_free_rqd(pblk, rqd, READ); @@ -427,8 +422,7 @@ fail_rollback: nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); fail_free_bio: - if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META)) - bio_put(bio); + bio_put(bio); fail_free_rqd: pblk_free_rqd(pblk, rqd, READ); return ret; -- cgit v1.2.3 From 6ca2f71f3e3d94d188000b420ce0529b07f3ce95 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:17 +0200 Subject: lightnvm: pblk: simplify path on REQ_PREFLUSH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On REQ_PREFLUSH, directly tag the I/O context flags to signal a flush in the write to cache path, instead of finding the correct entry context and imposing a memory barrier. This simplifies the code and might potentially prevent race conditions when adding functionality to the write path. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-cache.c | 4 +++- drivers/lightnvm/pblk-rb.c | 8 +------- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index 1d6b8e3585f1..0d227ef7d1b9 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -43,8 +43,10 @@ retry: if (unlikely(!bio_has_data(bio))) goto out; - w_ctx.flags = flags; pblk_ppa_set_empty(&w_ctx.ppa); + w_ctx.flags = flags; + if (bio->bi_opf & REQ_PREFLUSH) + w_ctx.flags |= PBLK_FLUSH_ENTRY; for (i = 0; i < nr_entries; i++) { void *data = bio_data(bio); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 74c768ce09ef..05e6b2e9221d 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -355,7 +355,6 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio, { struct pblk_rb_entry *entry; unsigned int subm, sync_point; - int flags; subm = READ_ONCE(rb->subm); @@ -369,12 +368,6 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio, sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); entry = &rb->entries[sync_point]; - flags = READ_ONCE(entry->w_ctx.flags); - flags |= PBLK_FLUSH_ENTRY; - - /* Release flags on context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); - /* Protect syncs */ smp_store_release(&rb->sync_point, sync_point); @@ -454,6 +447,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, /* Protect from read count */ smp_store_release(&rb->mem, mem); + return 1; } -- cgit v1.2.3 From 875d94f3a4838f2243334e5ce55ac8153f9bbf5b Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:18 +0200 Subject: lightnvm: pblk: allocate bio size more accurately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wait until we know the exact number of ppas to be sent to the device, before allocating the bio. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-rb.c | 5 +++-- drivers/lightnvm/pblk-write.c | 20 ++++++++++---------- drivers/lightnvm/pblk.h | 4 ++-- 3 files changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 05e6b2e9221d..1173e2380137 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -552,12 +552,13 @@ out: * persist data on the write buffer to the media. */ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, - struct bio *bio, unsigned int pos, - unsigned int nr_entries, unsigned int count) + unsigned int pos, unsigned int nr_entries, + unsigned int count) { struct pblk *pblk = container_of(rb, struct pblk, rwb); struct request_queue *q = pblk->dev->q; struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + struct bio *bio = rqd->bio; struct pblk_rb_entry *entry; struct page *page; unsigned int pad = 0, to_read = nr_entries; diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 0c0481cf9f5d..140a26edd1d3 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -534,24 +534,24 @@ static int pblk_submit_write(struct pblk *pblk) if (!secs_to_flush && secs_avail < pblk->min_write_pgs) return 1; - bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - - rqd = pblk_alloc_rqd(pblk, WRITE); - rqd->bio = bio; - secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); if (secs_to_sync > pblk->max_write_pgs) { pr_err("pblk: bad buffer sync calculation\n"); - goto fail_put_bio; + return 1; } secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); - if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync, + bio = bio_alloc(GFP_KERNEL, secs_to_sync); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + rqd = pblk_alloc_rqd(pblk, WRITE); + rqd->bio = bio; + + if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync, secs_avail)) { pr_err("pblk: corrupted write bio\n"); goto fail_put_bio; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 876b50f97234..9f162057d497 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -666,8 +666,8 @@ void pblk_rb_flush(struct pblk_rb *rb); void pblk_rb_sync_l2p(struct pblk_rb *rb); unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, - struct bio *bio, unsigned int pos, - unsigned int nr_entries, unsigned int count); + unsigned int pos, unsigned int nr_entries, + unsigned int count); unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, struct list_head *list, unsigned int max); -- cgit v1.2.3 From e2cddf2082e700218b898b1c899f6a1c2130074a Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:19 +0200 Subject: lightnvm: pblk: improve naming for internal req. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each request type sent to the LightNVM subsystem requires different metadata. Until now, we have tailored this metadata based on write, read and erase commands. However, pblk uses different metadata for internal writes that do not hit the write buffer. Instead of abusing the metadata for reads, create a new request type - internal write to improve code readability. In the process, create internal values for each I/O type instead of abusing the READ/WRITE macros, as suggested by Christoph. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 32 ++++++++++++++++---------------- drivers/lightnvm/pblk-read.c | 6 +++--- drivers/lightnvm/pblk-recovery.c | 12 ++++++------ drivers/lightnvm/pblk-write.c | 16 ++++++++-------- drivers/lightnvm/pblk.h | 11 ++++++++--- 5 files changed, 41 insertions(+), 36 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 3d27d24baa0b..a492964abea8 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -152,7 +152,7 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw) struct nvm_rq *rqd; int rq_size; - if (rw == WRITE) { + if (rw == PBLK_WRITE) { pool = pblk->w_rq_pool; rq_size = pblk_w_rq_size; } else { @@ -170,7 +170,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw) { mempool_t *pool; - if (rw == WRITE) + if (rw == PBLK_WRITE) pool = pblk->w_rq_pool; else pool = pblk->r_rq_pool; @@ -569,10 +569,10 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, int ret; DECLARE_COMPLETION_ONSTACK(wait); - if (dir == WRITE) { + if (dir == PBLK_WRITE) { bio_op = REQ_OP_WRITE; cmd_op = NVM_OP_PWRITE; - } else if (dir == READ) { + } else if (dir == PBLK_READ) { bio_op = REQ_OP_READ; cmd_op = NVM_OP_PREAD; } else @@ -612,10 +612,10 @@ next_rq: rqd.end_io = pblk_end_io_sync; rqd.private = &wait; - if (dir == WRITE) { + if (dir == PBLK_WRITE) { struct pblk_sec_meta *meta_list = rqd.meta_list; - rqd.flags = pblk_set_progr_mode(pblk, WRITE); + rqd.flags = pblk_set_progr_mode(pblk, PBLK_WRITE); for (i = 0; i < rqd.nr_ppas; ) { spin_lock(&line->lock); paddr = __pblk_alloc_page(pblk, line, min); @@ -679,7 +679,7 @@ next_rq: reinit_completion(&wait); if (rqd.error) { - if (dir == WRITE) + if (dir == PBLK_WRITE) pblk_log_write_err(pblk, &rqd); else pblk_log_read_err(pblk, &rqd); @@ -722,12 +722,12 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, int flags; DECLARE_COMPLETION_ONSTACK(wait); - if (dir == WRITE) { + if (dir == PBLK_WRITE) { bio_op = REQ_OP_WRITE; cmd_op = NVM_OP_PWRITE; - flags = pblk_set_progr_mode(pblk, WRITE); + flags = pblk_set_progr_mode(pblk, PBLK_WRITE); lba_list = emeta_to_lbas(pblk, line->emeta->buf); - } else if (dir == READ) { + } else if (dir == PBLK_READ) { bio_op = REQ_OP_READ; cmd_op = NVM_OP_PREAD; flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -765,7 +765,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - if (dir == WRITE) { + if (dir == PBLK_WRITE) { __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); meta_list[i].lba = lba_list[paddr] = addr_empty; @@ -791,7 +791,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, atomic_dec(&pblk->inflight_io); if (rqd.error) { - if (dir == WRITE) + if (dir == PBLK_WRITE) pblk_log_write_err(pblk, &rqd); else pblk_log_read_err(pblk, &rqd); @@ -807,14 +807,14 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line) { u64 bpaddr = pblk_line_smeta_start(pblk, line); - return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ); + return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ); } int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, void *emeta_buf) { return pblk_line_submit_emeta_io(pblk, line, emeta_buf, - line->emeta_ssec, READ); + line->emeta_ssec, PBLK_READ); } static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, @@ -823,7 +823,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, rqd->opcode = NVM_OP_ERASE; rqd->ppa_addr = ppa; rqd->nr_ppas = 1; - rqd->flags = pblk_set_progr_mode(pblk, ERASE); + rqd->flags = pblk_set_progr_mode(pblk, PBLK_ERASE); rqd->bio = NULL; } @@ -1045,7 +1045,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, line->smeta_ssec = off; line->cur_sec = off + lm->smeta_sec; - if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) { + if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) { pr_debug("pblk: line smeta I/O failed. Retry\n"); return 1; } diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index e7141b1aaded..4b1722fbe5a0 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -142,7 +142,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd) atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); #endif - pblk_free_rqd(pblk, rqd, READ); + pblk_free_rqd(pblk, rqd, PBLK_READ); atomic_dec(&pblk->inflight_io); } @@ -307,7 +307,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) bitmap_zero(&read_bitmap, nr_secs); - rqd = pblk_alloc_rqd(pblk, READ); + rqd = pblk_alloc_rqd(pblk, PBLK_READ); rqd->opcode = NVM_OP_PREAD; rqd->bio = bio; @@ -382,7 +382,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) return NVM_IO_OK; fail_rqd_free: - pblk_free_rqd(pblk, rqd, READ); + pblk_free_rqd(pblk, rqd, PBLK_READ); return ret; } diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index e59270e60b58..19f2fb1a9e4b 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -67,7 +67,7 @@ void pblk_submit_rec(struct work_struct *work) err: bio_put(bio); - pblk_free_rqd(pblk, rqd, WRITE); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); } int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, @@ -80,7 +80,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, struct pblk_c_ctx *rec_ctx; int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; - rec_rqd = pblk_alloc_rqd(pblk, WRITE); + rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE); rec_ctx = nvm_rq_to_pdu(rec_rqd); /* Copy completion bitmap, but exclude the first X completed entries */ @@ -334,7 +334,7 @@ static void pblk_end_io_recov(struct nvm_rq *rqd) pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); - pblk_free_rqd(pblk, rqd, WRITE); + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); atomic_dec(&pblk->inflight_io); kref_put(&pad_rq->ref, pblk_recov_complete); @@ -404,11 +404,11 @@ next_pad_rq: bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - rqd = pblk_alloc_rqd(pblk, WRITE); + rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; - rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); rqd->meta_list = meta_list; rqd->nr_ppas = rq_ppas; rqd->ppa_list = ppa_list; @@ -782,7 +782,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) goto free_meta_list; } - rqd = pblk_alloc_rqd(pblk, READ); + rqd = pblk_alloc_rqd(pblk, PBLK_READ); p.ppa_list = ppa_list; p.meta_list = meta_list; diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 140a26edd1d3..c1b8b83e149d 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -46,7 +46,7 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, WRITE); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); return ret; } @@ -195,7 +195,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) GFP_ATOMIC, pblk->close_wq); nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); - pblk_free_rqd(pblk, rqd, READ); + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); atomic_dec(&pblk->inflight_io); } @@ -209,7 +209,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, /* Setup write request */ rqd->opcode = NVM_OP_PWRITE; rqd->nr_ppas = nr_secs; - rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); rqd->private = pblk; rqd->end_io = end_io; @@ -275,7 +275,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0); rqd->ppa_status = (u64)0; - rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); return ret; } @@ -366,7 +366,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) int i, j; int ret; - rqd = pblk_alloc_rqd(pblk, READ); + rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); m_ctx = nvm_rq_to_pdu(rqd); m_ctx->private = meta_line; @@ -424,7 +424,7 @@ fail_rollback: fail_free_bio: bio_put(bio); fail_free_rqd: - pblk_free_rqd(pblk, rqd, READ); + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); return ret; } @@ -548,7 +548,7 @@ static int pblk_submit_write(struct pblk *pblk) bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - rqd = pblk_alloc_rqd(pblk, WRITE); + rqd = pblk_alloc_rqd(pblk, PBLK_WRITE); rqd->bio = bio; if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync, @@ -570,7 +570,7 @@ fail_free_bio: pblk_free_write_rqd(pblk, rqd); fail_put_bio: bio_put(bio); - pblk_free_rqd(pblk, rqd, WRITE); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); return 1; } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 9f162057d497..d01e003d3d74 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -55,11 +55,16 @@ for ((i) = 0, rlun = &(pblk)->luns[0]; \ (i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)]) -#define ERASE 2 /* READ = 0, WRITE = 1 */ - /* Static pool sizes */ #define PBLK_GEN_WS_POOL_SIZE (2) +enum { + PBLK_READ = READ, + PBLK_WRITE = WRITE,/* Write from write buffer */ + PBLK_WRITE_INT, /* Internal write - no write buffer */ + PBLK_ERASE, +}; + enum { /* IO Types */ PBLK_IOTYPE_USER = 1 << 0, @@ -1132,7 +1137,7 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type) flags = geo->plane_mode >> 1; - if (type == WRITE) + if (type == PBLK_WRITE) flags |= NVM_IO_SCRAMBLE_ENABLE; return flags; -- cgit v1.2.3 From 67bf26a3220e3bd403a62a9289aa1d065d3db82c Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:20 +0200 Subject: lightnvm: pblk: refactor rqd alloc/free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the rqd allocation and free functions so that all I/O types can use these helper functions. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 40 ++++++++++++++++++++++++++++++---------- drivers/lightnvm/pblk-read.c | 2 -- drivers/lightnvm/pblk-recovery.c | 2 -- drivers/lightnvm/pblk-write.c | 7 ------- drivers/lightnvm/pblk.h | 4 ++-- 5 files changed, 32 insertions(+), 23 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index a492964abea8..0de3875211b1 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -146,18 +146,26 @@ static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, spin_unlock(&pblk->trans_lock); } -struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw) +/* Caller must guarantee that the request is a valid type */ +struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) { mempool_t *pool; struct nvm_rq *rqd; int rq_size; - if (rw == PBLK_WRITE) { + switch (type) { + case PBLK_WRITE: + case PBLK_WRITE_INT: pool = pblk->w_rq_pool; rq_size = pblk_w_rq_size; - } else { + break; + case PBLK_READ: pool = pblk->r_rq_pool; rq_size = pblk_g_rq_size; + break; + default: + pool = pblk->e_rq_pool; + rq_size = pblk_g_rq_size; } rqd = mempool_alloc(pool, GFP_KERNEL); @@ -166,15 +174,30 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw) return rqd; } -void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw) +/* Typically used on completion path. Cannot guarantee request consistency */ +void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) { + struct nvm_tgt_dev *dev = pblk->dev; mempool_t *pool; - if (rw == PBLK_WRITE) + switch (type) { + case PBLK_WRITE: + kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap); + case PBLK_WRITE_INT: pool = pblk->w_rq_pool; - else + break; + case PBLK_READ: pool = pblk->r_rq_pool; + break; + case PBLK_ERASE: + pool = pblk->e_rq_pool; + break; + default: + pr_err("pblk: trying to free unknown rqd type\n"); + return; + } + nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); mempool_free(rqd, pool); } @@ -1470,8 +1493,7 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) struct nvm_rq *rqd; int err; - rqd = mempool_alloc(pblk->e_rq_pool, GFP_KERNEL); - memset(rqd, 0, pblk_g_rq_size); + rqd = pblk_alloc_rqd(pblk, PBLK_ERASE); pblk_setup_e_rq(pblk, rqd, ppa); @@ -1739,8 +1761,6 @@ void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, rlun = &pblk->luns[bit]; up(&rlun->wr_sem); } - - kfree(lun_bitmap); } void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 4b1722fbe5a0..d7c90c303540 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -124,8 +124,6 @@ static void pblk_end_io_read(struct nvm_rq *rqd) WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n"); #endif - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); - bio_put(bio); if (r_ctx->private) { struct bio *orig_bio = r_ctx->private; diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 19f2fb1a9e4b..686bc17f080f 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -329,11 +329,9 @@ static void pblk_end_io_recov(struct nvm_rq *rqd) { struct pblk_pad_rq *pad_rq = rqd->private; struct pblk *pblk = pad_rq->pblk; - struct nvm_tgt_dev *dev = pblk->dev; pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); atomic_dec(&pblk->inflight_io); diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index c1b8b83e149d..f2e846fe9242 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -20,7 +20,6 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx) { - struct nvm_tgt_dev *dev = pblk->dev; struct bio *original_bio; unsigned long ret; int i; @@ -43,8 +42,6 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); - bio_put(rqd->bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); @@ -176,7 +173,6 @@ static void pblk_end_io_write(struct nvm_rq *rqd) static void pblk_end_io_write_meta(struct nvm_rq *rqd) { struct pblk *pblk = rqd->private; - struct nvm_tgt_dev *dev = pblk->dev; struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd); struct pblk_line *line = m_ctx->private; struct pblk_emeta *emeta = line->emeta; @@ -194,7 +190,6 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws, GFP_ATOMIC, pblk->close_wq); - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); atomic_dec(&pblk->inflight_io); @@ -419,8 +414,6 @@ fail_rollback: pblk_dealloc_page(pblk, meta_line, rq_ppas); list_add(&meta_line->list, &meta_line->list); spin_unlock(&l_mg->close_lock); - - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); fail_free_bio: bio_put(bio); fail_free_rqd: diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index d01e003d3d74..12a20f800c26 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -699,11 +699,11 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); /* * pblk core */ -struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw); +struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type); +void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type); void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); -void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw); void pblk_wait_for_meta(struct pblk *pblk); struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba); void pblk_discard(struct pblk *pblk, struct bio *bio); -- cgit v1.2.3 From 26532ee52b77185b095d29b54c83386f737a74ba Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:21 +0200 Subject: lightnvm: pblk: use rqd->end_io for completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For consistency with the rest of pblk, use rqd->end_io to point to the function taking care of ending the request on the completion path. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 7 ------- drivers/lightnvm/pblk-read.c | 5 ++--- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 0de3875211b1..08d166ac4f3c 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -261,13 +261,6 @@ void pblk_write_should_kick(struct pblk *pblk) pblk_write_kick(pblk); } -void pblk_end_bio_sync(struct bio *bio) -{ - struct completion *waiting = bio->bi_private; - - complete(waiting); -} - void pblk_end_io_sync(struct nvm_rq *rqd) { struct completion *waiting = rqd->private; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index d7c90c303540..0299fc08291d 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -170,13 +170,12 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, new_bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(new_bio, REQ_OP_READ, 0); - new_bio->bi_private = &wait; - new_bio->bi_end_io = pblk_end_bio_sync; rqd->bio = new_bio; rqd->nr_ppas = nr_holes; rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); - rqd->end_io = NULL; + rqd->end_io = pblk_end_io_sync; + rqd->private = &wait; if (unlikely(nr_secs > 1 && nr_holes == 1)) { ppa_ptr = rqd->ppa_list; -- cgit v1.2.3 From a4809fee4e774fdf3296cc69c22ce6e6acef36b2 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:22 +0200 Subject: lightnvm: pblk: check lba sanity on read path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of pblk's recovery scheme, we store the lba mapped to each physical sector on the device's out-of-bound (OOB) area. On the read path, we can use this information to validate that the data being delivered to the upper layers corresponds to the lba being requested. The cost of this check is an extra copy on the DMA region on the device and an extra comparison in the host, given that (i) the OOB area is being read together with the data in the media, and (ii) the DMA region allocated for the ppa list can be reused for the metadata stored on the OOB area. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-read.c | 51 +++++++++++++++++++++++++++++++++++++++++--- drivers/lightnvm/pblk.h | 4 +++- 2 files changed, 51 insertions(+), 4 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 0299fc08291d..a465d9980df4 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -41,6 +41,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, sector_t blba, unsigned long *read_bitmap) { + struct pblk_sec_meta *meta_list = rqd->meta_list; struct bio *bio = rqd->bio; struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; int nr_secs = rqd->nr_ppas; @@ -56,6 +57,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, retry: if (pblk_ppa_empty(p)) { WARN_ON(test_and_set_bit(i, read_bitmap)); + meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); if (unlikely(!advanced_bio)) { bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); @@ -75,6 +77,7 @@ retry: goto retry; } WARN_ON(test_and_set_bit(i, read_bitmap)); + meta_list[i].lba = cpu_to_le64(lba); advanced_bio = true; #ifdef CONFIG_NVM_DEBUG atomic_long_inc(&pblk->cache_reads); @@ -110,10 +113,26 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) return NVM_IO_OK; } +static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd, + sector_t blba) +{ + struct pblk_sec_meta *meta_list = rqd->meta_list; + int nr_lbas = rqd->nr_ppas; + int i; + + for (i = 0; i < nr_lbas; i++) { + u64 lba = le64_to_cpu(meta_list[i].lba); + + if (lba == ADDR_EMPTY) + continue; + + WARN(lba != blba + i, "pblk: corrupted read LBA\n"); + } +} + static void pblk_end_io_read(struct nvm_rq *rqd) { struct pblk *pblk = rqd->private; - struct nvm_tgt_dev *dev = pblk->dev; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct bio *bio = rqd->bio; @@ -124,6 +143,8 @@ static void pblk_end_io_read(struct nvm_rq *rqd) WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n"); #endif + pblk_read_check(pblk, rqd, r_ctx->lba); + bio_put(bio); if (r_ctx->private) { struct bio *orig_bio = r_ctx->private; @@ -149,15 +170,21 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, unsigned long *read_bitmap) { struct bio *new_bio, *bio = rqd->bio; + struct pblk_sec_meta *meta_list = rqd->meta_list; struct bio_vec src_bv, dst_bv; void *ppa_ptr = NULL; void *src_p, *dst_p; dma_addr_t dma_ppa_list = 0; + __le64 *lba_list_mem, *lba_list_media; int nr_secs = rqd->nr_ppas; int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); int i, ret, hole; DECLARE_COMPLETION_ONSTACK(wait); + /* Re-use allocated memory for intermediate lbas */ + lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); + lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size); + new_bio = bio_alloc(GFP_KERNEL, nr_holes); if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) @@ -168,6 +195,9 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, goto err; } + for (i = 0; i < nr_secs; i++) + lba_list_mem[i] = meta_list[i].lba; + new_bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(new_bio, REQ_OP_READ, 0); @@ -207,10 +237,17 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, rqd->dma_ppa_list = dma_ppa_list; } + for (i = 0; i < nr_secs; i++) { + lba_list_media[i] = meta_list[i].lba; + meta_list[i].lba = lba_list_mem[i]; + } + /* Fill the holes in the original bio */ i = 0; hole = find_first_zero_bit(read_bitmap, nr_secs); do { + meta_list[hole].lba = lba_list_media[i]; + src_bv = new_bio->bi_io_vec[i++]; dst_bv = bio->bi_io_vec[bio_init_idx + hole]; @@ -251,6 +288,7 @@ err: static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, sector_t lba, unsigned long *read_bitmap) { + struct pblk_sec_meta *meta_list = rqd->meta_list; struct bio *bio = rqd->bio; struct ppa_addr ppa; @@ -263,6 +301,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, retry: if (pblk_ppa_empty(ppa)) { WARN_ON(test_and_set_bit(0, read_bitmap)); + meta_list[0].lba = cpu_to_le64(ADDR_EMPTY); return; } @@ -274,6 +313,9 @@ retry: pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); goto retry; } + + meta_list[0].lba = cpu_to_le64(lba); + WARN_ON(test_and_set_bit(0, read_bitmap)); #ifdef CONFIG_NVM_DEBUG atomic_long_inc(&pblk->cache_reads); @@ -290,9 +332,10 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) struct nvm_tgt_dev *dev = pblk->dev; sector_t blba = pblk_get_lba(bio); unsigned int nr_secs = pblk_get_secs(bio); + struct pblk_g_ctx *r_ctx; struct nvm_rq *rqd; - unsigned long read_bitmap; /* Max 64 ppas per request */ unsigned int bio_init_idx; + unsigned long read_bitmap; /* Max 64 ppas per request */ int ret = NVM_IO_ERR; /* logic error: lba out-of-bounds. Ignore read request */ @@ -312,6 +355,9 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd->private = pblk; rqd->end_io = pblk_end_io_read; + r_ctx = nvm_rq_to_pdu(rqd); + r_ctx->lba = blba; + /* Save the index for this bio's start. This is needed in case * we need to fill a partial read. */ @@ -344,7 +390,6 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) /* All sectors are to be read from the device */ if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { struct bio *int_bio = NULL; - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); /* Clone read bio to deal with read errors internally */ int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 12a20f800c26..4a51e6d4d036 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -99,6 +99,7 @@ enum { }; #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) +#define pblk_dma_ppa_size (sizeof(u64) * PBLK_MAX_REQ_ADDRS) /* write buffer completion context */ struct pblk_c_ctx { @@ -110,9 +111,10 @@ struct pblk_c_ctx { unsigned int nr_padded; }; -/* generic context */ +/* read context */ struct pblk_g_ctx { void *private; + u64 lba; }; /* Pad context */ -- cgit v1.2.3 From 7bd4d370db6090004a06deb526f0f01fa99a3f9f Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:23 +0200 Subject: lightnvm: pblk: guarantee line integrity on reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a line is recycled during garbage collection, reads can still be issued to the line. If the line is freed in the middle of this process, data corruption might occur. This patch guarantees that lines are not freed in the middle of reads that target them (lines). Specifically, we use the existing line reference to decide when a line is eligible for being freed after the recycle process. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 56 ++++++++++++++++++++++++++++++---- drivers/lightnvm/pblk-init.c | 14 +++++++-- drivers/lightnvm/pblk-read.c | 71 +++++++++++++++++++++++++++++++++----------- drivers/lightnvm/pblk.h | 2 ++ 4 files changed, 118 insertions(+), 25 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 08d166ac4f3c..0a41fb998d55 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1460,10 +1460,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line) line->emeta = NULL; } -void pblk_line_put(struct kref *ref) +static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) { - struct pblk_line *line = container_of(ref, struct pblk_line, ref); - struct pblk *pblk = line->pblk; struct pblk_line_mgmt *l_mg = &pblk->l_mg; spin_lock(&line->lock); @@ -1481,6 +1479,43 @@ void pblk_line_put(struct kref *ref) pblk_rl_free_lines_inc(&pblk->rl, line); } +static void pblk_line_put_ws(struct work_struct *work) +{ + struct pblk_line_ws *line_put_ws = container_of(work, + struct pblk_line_ws, ws); + struct pblk *pblk = line_put_ws->pblk; + struct pblk_line *line = line_put_ws->line; + + __pblk_line_put(pblk, line); + mempool_free(line_put_ws, pblk->gen_ws_pool); +} + +void pblk_line_put(struct kref *ref) +{ + struct pblk_line *line = container_of(ref, struct pblk_line, ref); + struct pblk *pblk = line->pblk; + + __pblk_line_put(pblk, line); +} + +void pblk_line_put_wq(struct kref *ref) +{ + struct pblk_line *line = container_of(ref, struct pblk_line, ref); + struct pblk *pblk = line->pblk; + struct pblk_line_ws *line_put_ws; + + line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC); + if (!line_put_ws) + return; + + line_put_ws->pblk = pblk; + line_put_ws->line = line; + line_put_ws->priv = NULL; + + INIT_WORK(&line_put_ws->ws, pblk_line_put_ws); + queue_work(pblk->r_end_wq, &line_put_ws->ws); +} + int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_rq *rqd; @@ -1878,8 +1913,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, int i; spin_lock(&pblk->trans_lock); - for (i = 0; i < nr_secs; i++) - ppas[i] = pblk_trans_map_get(pblk, blba + i); + for (i = 0; i < nr_secs; i++) { + struct ppa_addr ppa; + + ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i); + + /* If the L2P entry maps to a line, the reference is valid */ + if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { + int line_id = pblk_dev_ppa_to_line(ppa); + struct pblk_line *line = &pblk->lines[line_id]; + + kref_get(&line->ref); + } + } spin_unlock(&pblk->trans_lock); } diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 4d719782f65b..34527646c01b 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -271,15 +271,22 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->bb_wq) goto free_close_wq; - if (pblk_set_ppaf(pblk)) + pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (!pblk->r_end_wq) goto free_bb_wq; + if (pblk_set_ppaf(pblk)) + goto free_r_end_wq; + if (pblk_rwb_init(pblk)) - goto free_bb_wq; + goto free_r_end_wq; INIT_LIST_HEAD(&pblk->compl_list); return 0; +free_r_end_wq: + destroy_workqueue(pblk->r_end_wq); free_bb_wq: destroy_workqueue(pblk->bb_wq); free_close_wq: @@ -304,6 +311,9 @@ static void pblk_core_free(struct pblk *pblk) if (pblk->close_wq) destroy_workqueue(pblk->close_wq); + if (pblk->r_end_wq) + destroy_workqueue(pblk->r_end_wq); + if (pblk->bb_wq) destroy_workqueue(pblk->bb_wq); diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index a465d9980df4..402f8eff6a2e 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -130,9 +130,34 @@ static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd, } } -static void pblk_end_io_read(struct nvm_rq *rqd) +static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list; + int i; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + for (i = 0; i < rqd->nr_ppas; i++) { + struct ppa_addr ppa = ppa_list[i]; + struct pblk_line *line; + + line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + kref_put(&line->ref, pblk_line_put_wq); + } +} + +static void pblk_end_user_read(struct bio *bio) +{ +#ifdef CONFIG_NVM_DEBUG + WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); +#endif + bio_endio(bio); + bio_put(bio); +} + +static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, + bool put_line) { - struct pblk *pblk = rqd->private; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct bio *bio = rqd->bio; @@ -146,15 +171,11 @@ static void pblk_end_io_read(struct nvm_rq *rqd) pblk_read_check(pblk, rqd, r_ctx->lba); bio_put(bio); - if (r_ctx->private) { - struct bio *orig_bio = r_ctx->private; + if (r_ctx->private) + pblk_end_user_read((struct bio *)r_ctx->private); -#ifdef CONFIG_NVM_DEBUG - WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n"); -#endif - bio_endio(orig_bio); - bio_put(orig_bio); - } + if (put_line) + pblk_read_put_rqd_kref(pblk, rqd); #ifdef CONFIG_NVM_DEBUG atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); @@ -165,6 +186,13 @@ static void pblk_end_io_read(struct nvm_rq *rqd) atomic_dec(&pblk->inflight_io); } +static void pblk_end_io_read(struct nvm_rq *rqd) +{ + struct pblk *pblk = rqd->private; + + __pblk_end_io_read(pblk, rqd, true); +} + static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, unsigned int bio_init_idx, unsigned long *read_bitmap) @@ -233,8 +261,12 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, } if (unlikely(nr_secs > 1 && nr_holes == 1)) { + struct ppa_addr ppa; + + ppa = rqd->ppa_addr; rqd->ppa_list = ppa_ptr; rqd->dma_ppa_list = dma_ppa_list; + rqd->ppa_list[0] = ppa; } for (i = 0; i < nr_secs; i++) { @@ -246,6 +278,11 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, i = 0; hole = find_first_zero_bit(read_bitmap, nr_secs); do { + int line_id = pblk_dev_ppa_to_line(rqd->ppa_list[i]); + struct pblk_line *line = &pblk->lines[line_id]; + + kref_put(&line->ref, pblk_line_put); + meta_list[hole].lba = lba_list_media[i]; src_bv = new_bio->bi_io_vec[i++]; @@ -269,19 +306,17 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, bio_put(new_bio); /* Complete the original bio and associated request */ + bio_endio(bio); rqd->bio = bio; rqd->nr_ppas = nr_secs; - rqd->private = pblk; - bio_endio(bio); - pblk_end_io_read(rqd); + __pblk_end_io_read(pblk, rqd, false); return NVM_IO_OK; err: /* Free allocated pages in new bio */ pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); - rqd->private = pblk; - pblk_end_io_read(rqd); + __pblk_end_io_read(pblk, rqd, false); return NVM_IO_ERR; } @@ -314,11 +349,11 @@ retry: goto retry; } + WARN_ON(test_and_set_bit(0, read_bitmap)); meta_list[0].lba = cpu_to_le64(lba); - WARN_ON(test_and_set_bit(0, read_bitmap)); #ifdef CONFIG_NVM_DEBUG - atomic_long_inc(&pblk->cache_reads); + atomic_long_inc(&pblk->cache_reads); #endif } else { rqd->ppa_addr = ppa; @@ -383,7 +418,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) if (bitmap_full(&read_bitmap, nr_secs)) { bio_endio(bio); atomic_inc(&pblk->inflight_io); - pblk_end_io_read(rqd); + __pblk_end_io_read(pblk, rqd, false); return NVM_IO_OK; } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 4a51e6d4d036..e4704373398b 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -636,6 +636,7 @@ struct pblk { struct workqueue_struct *close_wq; struct workqueue_struct *bb_wq; + struct workqueue_struct *r_end_wq; struct timer_list wtimer; @@ -741,6 +742,7 @@ int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, void *emeta_buf); int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); void pblk_line_put(struct kref *ref); +void pblk_line_put_wq(struct kref *ref); struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line); void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); -- cgit v1.2.3 From 0f9248cf1e22333b2a0458540aafb1ad3b2b3337 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:24 +0200 Subject: lightnvm: pblk: remove redundant check on read path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A partial read I/O in pblk is an I/O where some sectors reside in the write buffer in main memory and some are persisted on the device. Such an I/O must at least contain 2 lbas, therefore checking for the case where a single lba is mapped is not necessary. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-read.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 402f8eff6a2e..71c58503f1a4 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -235,7 +235,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, rqd->end_io = pblk_end_io_sync; rqd->private = &wait; - if (unlikely(nr_secs > 1 && nr_holes == 1)) { + if (unlikely(nr_holes == 1)) { ppa_ptr = rqd->ppa_list; dma_ppa_list = rqd->dma_ppa_list; rqd->ppa_addr = rqd->ppa_list[0]; @@ -260,7 +260,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, #endif } - if (unlikely(nr_secs > 1 && nr_holes == 1)) { + if (unlikely(nr_holes == 1)) { struct ppa_addr ppa; ppa = rqd->ppa_addr; -- cgit v1.2.3 From 1e82123da6a4c6019ef03bcd47e4b3dc18dd136e Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:25 +0200 Subject: lightnvm: pblk: remove I/O dependency on write path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk schedules user I/O, metadata I/O and erases on the write path in order to minimize collisions at the media level. Until now, there has been a dependency between user and metadata I/Os that could lead to a deadlock as both take the per-LUN semaphore to schedule submission. This path removes this dependency and guarantees forward progress at a per I/O granurality. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-write.c | 145 +++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 80 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index f2e846fe9242..6c1cafafef53 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -220,15 +220,16 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, } static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa) + struct ppa_addr *erase_ppa) { struct pblk_line_meta *lm = &pblk->lm; struct pblk_line *e_line = pblk_line_get_erase(pblk); + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); unsigned int valid = c_ctx->nr_valid; unsigned int padded = c_ctx->nr_padded; unsigned int nr_secs = valid + padded; unsigned long *lun_bitmap; - int ret = 0; + int ret; lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); if (!lun_bitmap) @@ -294,55 +295,6 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, return secs_to_sync; } -static inline int pblk_valid_meta_ppa(struct pblk *pblk, - struct pblk_line *meta_line, - struct ppa_addr *ppa_list, int nr_ppas) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line *data_line; - struct ppa_addr ppa, ppa_opt; - u64 paddr; - int i; - - data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])]; - paddr = pblk_lookup_page(pblk, meta_line); - ppa = addr_to_gen_ppa(pblk, paddr, 0); - - if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap)) - return 1; - - /* Schedule a metadata I/O that is half the distance from the data I/O - * with regards to the number of LUNs forming the pblk instance. This - * balances LUN conflicts across every I/O. - * - * When the LUN configuration changes (e.g., due to GC), this distance - * can align, which would result on a LUN deadlock. In this case, modify - * the distance to not be optimal, but allow metadata I/Os to succeed. - */ - ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0); - if (unlikely(ppa_opt.ppa == ppa.ppa)) { - data_line->meta_distance--; - return 0; - } - - for (i = 0; i < nr_ppas; i += pblk->min_write_pgs) - if (ppa_list[i].g.ch == ppa_opt.g.ch && - ppa_list[i].g.lun == ppa_opt.g.lun) - return 1; - - if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) { - for (i = 0; i < nr_ppas; i += pblk->min_write_pgs) - if (ppa_list[i].g.ch == ppa.g.ch && - ppa_list[i].g.lun == ppa.g.lun) - return 0; - - return 1; - } - - return 0; -} - int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) { struct nvm_tgt_dev *dev = pblk->dev; @@ -421,8 +373,44 @@ fail_free_rqd: return ret; } -static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list, - int prev_n) +static inline bool pblk_valid_meta_ppa(struct pblk *pblk, + struct pblk_line *meta_line, + struct nvm_rq *data_rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd); + struct pblk_line *data_line = pblk_line_get_data(pblk); + struct ppa_addr ppa, ppa_opt; + u64 paddr; + int pos_opt; + + /* Schedule a metadata I/O that is half the distance from the data I/O + * with regards to the number of LUNs forming the pblk instance. This + * balances LUN conflicts across every I/O. + * + * When the LUN configuration changes (e.g., due to GC), this distance + * can align, which would result on metadata and data I/Os colliding. In + * this case, modify the distance to not be optimal, but move the + * optimal in the right direction. + */ + paddr = pblk_lookup_page(pblk, meta_line); + ppa = addr_to_gen_ppa(pblk, paddr, 0); + ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0); + pos_opt = pblk_ppa_to_pos(geo, ppa_opt); + + if (test_bit(pos_opt, data_c_ctx->lun_bitmap) || + test_bit(pos_opt, data_line->blk_bitmap)) + return true; + + if (unlikely(pblk_ppa_comp(ppa_opt, ppa))) + data_line->meta_distance--; + + return false; +} + +static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk, + struct nvm_rq *data_rqd) { struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -432,57 +420,45 @@ static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list, retry: if (list_empty(&l_mg->emeta_list)) { spin_unlock(&l_mg->close_lock); - return 0; + return NULL; } meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); if (meta_line->emeta->mem >= lm->emeta_len[0]) goto retry; spin_unlock(&l_mg->close_lock); - if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n)) - return 0; + if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd)) + return NULL; - return pblk_submit_meta_io(pblk, meta_line); + return meta_line; } static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) { - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); struct ppa_addr erase_ppa; + struct pblk_line *meta_line; int err; ppa_set_empty(&erase_ppa); /* Assign lbas to ppas and populate request structure */ - err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa); + err = pblk_setup_w_rq(pblk, rqd, &erase_ppa); if (err) { pr_err("pblk: could not setup write request: %d\n", err); return NVM_IO_ERR; } - if (likely(ppa_empty(erase_ppa))) { - /* Submit metadata write for previous data line */ - err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas); - if (err) { - pr_err("pblk: metadata I/O submission failed: %d", err); - return NVM_IO_ERR; - } + meta_line = pblk_should_submit_meta_io(pblk, rqd); - /* Submit data write for current data line */ - err = pblk_submit_io(pblk, rqd); - if (err) { - pr_err("pblk: data I/O submission failed: %d\n", err); - return NVM_IO_ERR; - } - } else { - /* Submit data write for current data line */ - err = pblk_submit_io(pblk, rqd); - if (err) { - pr_err("pblk: data I/O submission failed: %d\n", err); - return NVM_IO_ERR; - } + /* Submit data write for current data line */ + err = pblk_submit_io(pblk, rqd); + if (err) { + pr_err("pblk: data I/O submission failed: %d\n", err); + return NVM_IO_ERR; + } - /* Submit available erase for next data line */ + if (!ppa_empty(erase_ppa)) { + /* Submit erase for next data line */ if (pblk_blk_erase_async(pblk, erase_ppa)) { struct pblk_line *e_line = pblk_line_get_erase(pblk); struct nvm_tgt_dev *dev = pblk->dev; @@ -495,6 +471,15 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) } } + if (meta_line) { + /* Submit metadata write for previous data line */ + err = pblk_submit_meta_io(pblk, meta_line); + if (err) { + pr_err("pblk: metadata I/O submission failed: %d", err); + return NVM_IO_ERR; + } + } + return NVM_IO_OK; } -- cgit v1.2.3 From 21d2287119e843929c29fb1adbd271bde1fac7ae Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:26 +0200 Subject: lightnvm: pblk: enable 1 LUN configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Metadata I/Os are scheduled to minimize their impact on user data I/Os. When there are enough LUNs instantiated (i.e., enough bandwidth), it is easy to interleave metadata and data one after the other so that metadata I/Os are the ones being blocked and not vice-versa. We do this by calculating the distance between the I/Os in terms of the LUNs that are not in used, and selecting a free LUN that satisfies a the simple heuristic that metadata is scheduled behind. The per-LUN semaphores guarantee consistency. This works fine on >1 LUN configuration. However, when a single LUN is instantiated, this design leads to a deadlock, where metadata waits to be scheduled on a free LUN. This patch implements the 1 LUN case by simply scheduling the metadada I/O after the data I/O. In the process, we refactor the way a line is replaced to ensure that metadata writes are submitted after data writes in order to guarantee block sequentiality. Note that, since there is only one LUN, both I/Os will block each other by design. However, such configuration only pursues tight read latencies, not write bandwidth. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 17 ++++++++++------- drivers/lightnvm/pblk-init.c | 8 ++++++-- drivers/lightnvm/pblk-map.c | 21 ++++++++++++--------- drivers/lightnvm/pblk.h | 2 +- 4 files changed, 29 insertions(+), 19 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 0a41fb998d55..e38e91897246 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1372,17 +1372,17 @@ void pblk_pipeline_stop(struct pblk *pblk) spin_unlock(&l_mg->free_lock); } -void pblk_line_replace_data(struct pblk *pblk) +struct pblk_line *pblk_line_replace_data(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *cur, *new; + struct pblk_line *cur, *new = NULL; unsigned int left_seblks; int is_next = 0; cur = l_mg->data_line; new = l_mg->data_next; if (!new) - return; + goto out; l_mg->data_line = new; spin_lock(&l_mg->free_lock); @@ -1390,7 +1390,7 @@ void pblk_line_replace_data(struct pblk *pblk) l_mg->data_line = NULL; l_mg->data_next = NULL; spin_unlock(&l_mg->free_lock); - return; + goto out; } pblk_line_setup_metadata(new, l_mg, &pblk->lm); @@ -1402,7 +1402,7 @@ retry_erase: /* If line is not fully erased, erase it */ if (atomic_read(&new->left_eblks)) { if (pblk_line_erase(pblk, new)) - return; + goto out; } else { io_schedule(); } @@ -1413,7 +1413,7 @@ retry_setup: if (!pblk_line_init_metadata(pblk, new, cur)) { new = pblk_line_retry(pblk, new); if (!new) - return; + goto out; goto retry_setup; } @@ -1421,7 +1421,7 @@ retry_setup: if (!pblk_line_init_bb(pblk, new, 1)) { new = pblk_line_retry(pblk, new); if (!new) - return; + goto out; goto retry_setup; } @@ -1445,6 +1445,9 @@ retry_setup: if (is_next) pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); + +out: + return new; } void pblk_line_free(struct pblk *pblk, struct pblk_line *line) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 34527646c01b..3c3749186053 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -711,8 +711,12 @@ add_emeta_page: } lm->emeta_bb = geo->nr_luns - i; - lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0], - geo->sec_per_blk); + + lm->min_blk_line = 1; + if (geo->nr_luns > 1) + lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + + lm->emeta_sec[0], geo->sec_per_blk); + if (lm->min_blk_line > lm->blk_per_line) { pr_err("pblk: config. not supported. Min. LUN in line:%d\n", lm->blk_per_line); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index fddb924f6dde..3bc4c94f9cf2 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -25,13 +25,23 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, unsigned int valid_secs) { struct pblk_line *line = pblk_line_get_data(pblk); - struct pblk_emeta *emeta = line->emeta; + struct pblk_emeta *emeta; struct pblk_w_ctx *w_ctx; - __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf); + __le64 *lba_list; u64 paddr; int nr_secs = pblk->min_write_pgs; int i; + if (pblk_line_is_full(line)) { + struct pblk_line *prev_line = line; + + line = pblk_line_replace_data(pblk); + pblk_line_close_meta(pblk, prev_line); + } + + emeta = line->emeta; + lba_list = emeta_to_lbas(pblk, emeta->buf); + paddr = pblk_alloc_page(pblk, line, nr_secs); for (i = 0; i < nr_secs; i++, paddr++) { @@ -60,13 +70,6 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, } } - if (pblk_line_is_full(line)) { - struct pblk_line *prev_line = line; - - pblk_line_replace_data(pblk); - pblk_line_close_meta(pblk, prev_line); - } - pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index e4704373398b..191b1ec0627b 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -719,7 +719,7 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, int alloc_type, gfp_t gfp_mask); struct pblk_line *pblk_line_get(struct pblk *pblk); struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); -void pblk_line_replace_data(struct pblk *pblk); +struct pblk_line *pblk_line_replace_data(struct pblk *pblk); int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); struct pblk_line *pblk_line_get_data(struct pblk *pblk); -- cgit v1.2.3 From e6b754c252bacebdfbe3c57e790431ab8f445d1f Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:27 +0200 Subject: lightnvm: pblk: ensure right bad block calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that the variable controlling block threshold for allocating extra metadata sectors in case of a line with bad blocks does not get a negative value. Otherwise, the line will be marked as corrupted and wasted. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 3c3749186053..c7239c41ba40 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -710,7 +710,7 @@ add_emeta_page: goto add_emeta_page; } - lm->emeta_bb = geo->nr_luns - i; + lm->emeta_bb = geo->nr_luns > i ? geo->nr_luns - i : 0; lm->min_blk_line = 1; if (geo->nr_luns > 1) -- cgit v1.2.3 From 27b978725d895e704aab44b99242a0514485d798 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:28 +0200 Subject: lightnvm: pblk: fix changing GC group list for a line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk_line_gc_list seems to had a bug since the introduction of pblk in getting GC list for a line. In b20ba1bc7 while redesigning the GC algorithm, the naming for the GC thresholds was altered, but the values for high_thrs and mid_thrs were not. The result is that when moving to the GC lists, the mid threshold is never evaluated. Fixes: a4bd217b4("lightnvm: physical block device (pblk) target") Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index c7239c41ba40..56ece7dfac0e 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -678,8 +678,8 @@ static int pblk_lines_init(struct pblk *pblk) lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); - lm->high_thrs = lm->sec_per_line / 2; - lm->mid_thrs = lm->sec_per_line / 4; + lm->mid_thrs = lm->sec_per_line / 2; + lm->high_thrs = lm->sec_per_line / 4; lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs; /* Calculate necessary pages for smeta. See comment over struct -- cgit v1.2.3 From e480689bd1cc35f6ed3fa628bc8d913177b0726a Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:29 +0200 Subject: lightnvm: pblk: remove useless line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index e38e91897246..43866ad87586 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1071,7 +1071,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, /* Mark emeta metadata sectors as bad sectors. We need to consider bad * blocks to make sure that there are enough sectors to store emeta */ - bit = lm->sec_per_line; off = lm->sec_per_line - lm->emeta_sec[0]; bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]); while (nr_bb) { -- cgit v1.2.3 From ef56b9ce562753cacf518f081a4ff3227efdab25 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:30 +0200 Subject: lightnvm: remove unused argument from nvm_set_tgt_bb_tbl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vblk isn't being used anyway and if we ever have a usecase we can introduce this again. This makes the logic easier and removes unnecessary checks. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 29 ++++++++++++----------------- include/linux/lightnvm.h | 2 +- 2 files changed, 13 insertions(+), 18 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 798964f511cd..231c92899431 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -633,7 +633,7 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, memset(&rqd, 0, sizeof(struct nvm_rq)); - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1); + nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); nvm_rq_tgt_to_dev(tgt_dev, &rqd); ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); @@ -697,7 +697,7 @@ int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, rqd.private = &wait; rqd.flags = geo->plane_mode >> 1; - ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1); + ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); if (ret) return ret; @@ -793,14 +793,14 @@ void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin) EXPORT_SYMBOL(nvm_put_area); int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, - const struct ppa_addr *ppas, int nr_ppas, int vblk) + const struct ppa_addr *ppas, int nr_ppas) { struct nvm_dev *dev = tgt_dev->parent; struct nvm_geo *geo = &tgt_dev->geo; int i, plane_cnt, pl_idx; struct ppa_addr ppa; - if ((!vblk || geo->plane_mode == NVM_PLANE_SINGLE) && nr_ppas == 1) { + if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { rqd->nr_ppas = nr_ppas; rqd->ppa_addr = ppas[0]; @@ -814,19 +814,14 @@ int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, return -ENOMEM; } - if (!vblk) { - for (i = 0; i < nr_ppas; i++) - rqd->ppa_list[i] = ppas[i]; - } else { - plane_cnt = geo->plane_mode; - rqd->nr_ppas *= plane_cnt; - - for (i = 0; i < nr_ppas; i++) { - for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { - ppa = ppas[i]; - ppa.g.pl = pl_idx; - rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; - } + plane_cnt = geo->plane_mode; + rqd->nr_ppas *= plane_cnt; + + for (i = 0; i < nr_ppas; i++) { + for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { + ppa = ppas[i]; + ppa.g.pl = pl_idx; + rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; } } diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7b80ac911d26..32ec35b5e18b 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -481,7 +481,7 @@ extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *, - const struct ppa_addr *, int, int); + const struct ppa_addr *, int); extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); -- cgit v1.2.3 From eb6f168f97438bf1cac8b9b1301c662eace9e39f Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:31 +0200 Subject: lightnvm: remove stale extern and unused exported symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all exported symbols are being used outside core and there were some stale entries in lightnvm.h Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 129 +++++++++++++++++++++++------------------------ include/linux/lightnvm.h | 7 --- 2 files changed, 64 insertions(+), 72 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 231c92899431..0e5f77234c79 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -226,6 +226,24 @@ static const struct block_device_operations nvm_fops = { .owner = THIS_MODULE, }; +static struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) +{ + struct nvm_tgt_type *tmp, *tt = NULL; + + if (lock) + down_write(&nvm_tgtt_lock); + + list_for_each_entry(tmp, &nvm_tgt_types, list) + if (!strcmp(name, tmp->name)) { + tt = tmp; + break; + } + + if (lock) + up_write(&nvm_tgtt_lock); + return tt; +} + static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) { struct nvm_ioctl_create_simple *s = &create->conf.s; @@ -549,25 +567,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, } EXPORT_SYMBOL(nvm_part_to_tgt); -struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) -{ - struct nvm_tgt_type *tmp, *tt = NULL; - - if (lock) - down_write(&nvm_tgtt_lock); - - list_for_each_entry(tmp, &nvm_tgt_types, list) - if (!strcmp(name, tmp->name)) { - tt = tmp; - break; - } - - if (lock) - up_write(&nvm_tgtt_lock); - return tt; -} -EXPORT_SYMBOL(nvm_find_target_type); - int nvm_register_tgt_type(struct nvm_tgt_type *tt) { int ret = 0; @@ -619,6 +618,52 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name) return NULL; } +static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, + const struct ppa_addr *ppas, int nr_ppas) +{ + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_geo *geo = &tgt_dev->geo; + int i, plane_cnt, pl_idx; + struct ppa_addr ppa; + + if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { + rqd->nr_ppas = nr_ppas; + rqd->ppa_addr = ppas[0]; + + return 0; + } + + rqd->nr_ppas = nr_ppas; + rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list); + if (!rqd->ppa_list) { + pr_err("nvm: failed to allocate dma memory\n"); + return -ENOMEM; + } + + plane_cnt = geo->plane_mode; + rqd->nr_ppas *= plane_cnt; + + for (i = 0; i < nr_ppas; i++) { + for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { + ppa = ppas[i]; + ppa.g.pl = pl_idx; + rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; + } + } + + return 0; +} + +static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, + struct nvm_rq *rqd) +{ + if (!rqd->ppa_list) + return; + + nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); +} + + int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int nr_ppas, int type) { @@ -792,52 +837,6 @@ void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin) } EXPORT_SYMBOL(nvm_put_area); -int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, - const struct ppa_addr *ppas, int nr_ppas) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_geo *geo = &tgt_dev->geo; - int i, plane_cnt, pl_idx; - struct ppa_addr ppa; - - if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { - rqd->nr_ppas = nr_ppas; - rqd->ppa_addr = ppas[0]; - - return 0; - } - - rqd->nr_ppas = nr_ppas; - rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list); - if (!rqd->ppa_list) { - pr_err("nvm: failed to allocate dma memory\n"); - return -ENOMEM; - } - - plane_cnt = geo->plane_mode; - rqd->nr_ppas *= plane_cnt; - - for (i = 0; i < nr_ppas; i++) { - for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { - ppa = ppas[i]; - ppa.g.pl = pl_idx; - rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; - } - } - - return 0; -} -EXPORT_SYMBOL(nvm_set_rqd_ppalist); - -void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) -{ - if (!rqd->ppa_list) - return; - - nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); -} -EXPORT_SYMBOL(nvm_free_rqd_ppalist); - void nvm_end_io(struct nvm_rq *rqd) { struct nvm_tgt_dev *tgt_dev = rqd->dev; diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 32ec35b5e18b..4f0e4a0fd204 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -463,8 +463,6 @@ struct nvm_tgt_type { struct module *owner; }; -extern struct nvm_tgt_type *nvm_find_target_type(const char *, int); - extern int nvm_register_tgt_type(struct nvm_tgt_type *); extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); @@ -480,9 +478,6 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); -extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *, - const struct ppa_addr *, int); -extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); @@ -491,8 +486,6 @@ extern void nvm_end_io(struct nvm_rq *); extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); -extern int nvm_dev_factory(struct nvm_dev *, int flags); - extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int); #else /* CONFIG_NVM */ -- cgit v1.2.3 From 05ed3447698203219319ec9d1c46303aff5932a2 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:32 +0200 Subject: lightnvm: pblk: reduce arguments in __pblk_rb_update_l2p MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already pass the structure pointer so no need to pass the member. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-rb.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 1173e2380137..b8f78e401482 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -201,8 +201,7 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) return subm; } -static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, - unsigned int to_update) +static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) { struct pblk *pblk = container_of(rb, struct pblk, rwb); struct pblk_line *line; @@ -213,7 +212,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, int flags; for (i = 0; i < to_update; i++) { - entry = &rb->entries[*l2p_upd]; + entry = &rb->entries[rb->l2p_update]; w_ctx = &entry->w_ctx; flags = READ_ONCE(entry->w_ctx.flags); @@ -230,7 +229,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)]; kref_put(&line->ref, pblk_line_put); clean_wctx(w_ctx); - *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1); + rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1); } pblk_rl_out(&pblk->rl, user_io, gc_io); @@ -258,7 +257,7 @@ static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries, count = nr_entries - space; /* l2p_update used exclusively under rb->w_lock */ - ret = __pblk_rb_update_l2p(rb, &rb->l2p_update, count); + ret = __pblk_rb_update_l2p(rb, count); out: return ret; @@ -280,7 +279,7 @@ void pblk_rb_sync_l2p(struct pblk_rb *rb) sync = smp_load_acquire(&rb->sync); to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries); - __pblk_rb_update_l2p(rb, &rb->l2p_update, to_update); + __pblk_rb_update_l2p(rb, to_update); spin_unlock(&rb->w_lock); } -- cgit v1.2.3 From 22a4e061ea11cc754785a12b3ba5f3e135bc0c63 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:33 +0200 Subject: lightnvm: pblk: fix releases of kmem cache in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If pblk_core_init fails lets destroy all global caches. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 56ece7dfac0e..2e599738372d 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -220,6 +220,14 @@ static int pblk_init_global_caches(struct pblk *pblk) return 0; } +static void pblk_free_global_caches(struct pblk *pblk) +{ + kmem_cache_destroy(pblk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_g_rq_cache); + kmem_cache_destroy(pblk_w_rq_cache); +} + static int pblk_core_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; @@ -235,7 +243,7 @@ static int pblk_core_init(struct pblk *pblk) pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), 0); if (!pblk->page_bio_pool) - return -ENOMEM; + goto free_global_caches; pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE, pblk_ws_cache); @@ -303,6 +311,8 @@ free_gen_ws_pool: mempool_destroy(pblk->gen_ws_pool); free_page_bio_pool: mempool_destroy(pblk->page_bio_pool); +free_global_caches: + pblk_free_global_caches(pblk); return -ENOMEM; } @@ -324,10 +334,7 @@ static void pblk_core_free(struct pblk *pblk) mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); + pblk_free_global_caches(pblk); } static void pblk_luns_free(struct pblk *pblk) -- cgit v1.2.3 From 3e3a5b8ebd5d3b1d68facc58b0674a2564653222 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:34 +0200 Subject: lightnvm: pblk: prevent gc kicks when gc is not operational MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GC can be kicked after it has been shut down when closing the last line during exit, resulting in accesses to freed structures. Make sure that GC is not triggered while it is not operational. Also make sure that GC won't be re-activated during exit when running on another processor by using timer_del_sync. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 9 +++++---- drivers/lightnvm/pblk-init.c | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 7b103bce58bf..81efac18ff57 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -478,10 +478,10 @@ void pblk_gc_should_start(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; - if (gc->gc_enabled && !gc->gc_active) + if (gc->gc_enabled && !gc->gc_active) { pblk_gc_start(pblk); - - pblk_gc_kick(pblk); + pblk_gc_kick(pblk); + } } /* @@ -620,7 +620,8 @@ void pblk_gc_exit(struct pblk *pblk) flush_workqueue(gc->gc_reader_wq); flush_workqueue(gc->gc_line_reader_wq); - del_timer(&gc->gc_timer); + gc->gc_enabled = 0; + del_timer_sync(&gc->gc_timer); pblk_gc_stop(pblk, 1); if (gc->gc_ts) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 2e599738372d..27eb430958ff 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -931,6 +931,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->dev = dev; pblk->disk = tdisk; pblk->state = PBLK_STATE_RUNNING; + pblk->gc.gc_enabled = 0; spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); -- cgit v1.2.3 From 92957091e93931c91fccb7cce456312edeeea36c Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:35 +0200 Subject: lightnvm: pblk: recover partially written lines correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When recovering partially written lines, the valid sector count must be decreased by the number of padded sectors in the line. Update line recovery to take all ADDR_EMPTY(padded) sectors into account. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-recovery.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 686bc17f080f..a080cf888982 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -133,8 +133,8 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) struct pblk_emeta *emeta = line->emeta; struct line_emeta *emeta_buf = emeta->buf; __le64 *lba_list; - int data_start; - int nr_data_lbas, nr_valid_lbas, nr_lbas = 0; + int data_start, data_end; + int nr_valid_lbas, nr_lbas = 0; int i; lba_list = pblk_recov_get_lba_list(pblk, emeta_buf); @@ -142,10 +142,10 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) return 1; data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; - nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0]; + data_end = lm->sec_per_line - lm->emeta_sec[0]; nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas); - for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) { + for (i = data_start; i < data_end; i++) { struct ppa_addr ppa; int pos; -- cgit v1.2.3 From 37ce33d5756f4ba8bdd45371a1918ceeeba5b158 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:36 +0200 Subject: lightnvm: pblk: free full lines during recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When rebuilding the L2P table, any full lines (lines without any valid sectors) will be identified. If these lines are not freed, we risk not being able to allocate the first data line. This patch refactors the part of GC that frees empty lines into a separate function and adds a call to this after the L2P table has been rebuilt. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 32 ++++++++++++++++++++------------ drivers/lightnvm/pblk-init.c | 3 +++ drivers/lightnvm/pblk.h | 1 + 3 files changed, 24 insertions(+), 12 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 81efac18ff57..374089fe4326 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -330,26 +330,16 @@ static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)); } -/* - * Lines with no valid sectors will be returned to the free list immediately. If - * GC is activated - either because the free block count is under the determined - * threshold, or because it is being forced from user space - only lines with a - * high count of invalid sectors will be recycled. - */ -static void pblk_gc_run(struct pblk *pblk) +void pblk_gc_free_full_lines(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_gc *gc = &pblk->gc; struct pblk_line *line; - struct list_head *group_list; - bool run_gc; - int inflight_gc, gc_group = 0, prev_group = 0; do { spin_lock(&l_mg->gc_lock); if (list_empty(&l_mg->gc_full_list)) { spin_unlock(&l_mg->gc_lock); - break; + return; } line = list_first_entry(&l_mg->gc_full_list, @@ -365,6 +355,24 @@ static void pblk_gc_run(struct pblk *pblk) kref_put(&line->ref, pblk_line_put); } while (1); +} + +/* + * Lines with no valid sectors will be returned to the free list immediately. If + * GC is activated - either because the free block count is under the determined + * threshold, or because it is being forced from user space - only lines with a + * high count of invalid sectors will be recycled. + */ +static void pblk_gc_run(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_gc *gc = &pblk->gc; + struct pblk_line *line; + struct list_head *group_list; + bool run_gc; + int inflight_gc, gc_group = 0, prev_group = 0; + + pblk_gc_free_full_lines(pblk); run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD)) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 27eb430958ff..f08fa2083fbc 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -508,6 +508,9 @@ static int pblk_lines_configure(struct pblk *pblk, int flags) } } + /* Free full lines directly as GC has not been started yet */ + pblk_gc_free_full_lines(pblk); + if (!line) { /* Configure next line for user data */ line = pblk_line_get_first_data(pblk); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 191b1ec0627b..21438d1550a2 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -831,6 +831,7 @@ void pblk_gc_exit(struct pblk *pblk); void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk); void pblk_gc_kick(struct pblk *pblk); +void pblk_gc_free_full_lines(struct pblk *pblk); void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, int *gc_active); int pblk_gc_sysfs_force(struct pblk *pblk, int force); -- cgit v1.2.3 From 03661b5f756c92b9924869334a2afa19753c4fe7 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:37 +0200 Subject: lightnvm: pblk: start gc if needed during init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start GC if needed, directly after init, as we might need to garbage collect in order to make room for user writes. Create a helper function that allows to kick GC without exposing the internals of the GC/rate-limiter interaction. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 5 +++++ drivers/lightnvm/pblk-init.c | 4 ++++ drivers/lightnvm/pblk-rl.c | 2 +- drivers/lightnvm/pblk.h | 2 ++ 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 374089fe4326..4bac9e1531f5 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -510,6 +510,11 @@ void pblk_gc_should_stop(struct pblk *pblk) pblk_gc_stop(pblk, 0); } +void pblk_gc_should_kick(struct pblk *pblk) +{ + pblk_rl_update_rates(&pblk->rl); +} + void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, int *gc_active) { diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index f08fa2083fbc..ad9f014a086b 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1025,6 +1025,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->rwb.nr_entries); wake_up_process(pblk->writer_ts); + + /* Check if we need to start GC */ + pblk_gc_should_kick(pblk); + return pblk; fail_stop_writer: diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 0896439a91b0..739f855d4216 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -96,7 +96,7 @@ unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) * * Only the total number of free blocks is used to configure the rate limiter. */ -static void pblk_rl_update_rates(struct pblk_rl *rl) +void pblk_rl_update_rates(struct pblk_rl *rl) { struct pblk *pblk = container_of(rl, struct pblk, rl); unsigned long free_blocks = pblk_rl_nr_free_blks(rl); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 21438d1550a2..29ba7ec32b20 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -830,6 +830,7 @@ int pblk_gc_init(struct pblk *pblk); void pblk_gc_exit(struct pblk *pblk); void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk); +void pblk_gc_should_kick(struct pblk *pblk); void pblk_gc_kick(struct pblk *pblk); void pblk_gc_free_full_lines(struct pblk *pblk); void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, @@ -841,6 +842,7 @@ int pblk_gc_sysfs_force(struct pblk *pblk, int force); */ void pblk_rl_init(struct pblk_rl *rl, int budget); void pblk_rl_free(struct pblk_rl *rl); +void pblk_rl_update_rates(struct pblk_rl *rl); int pblk_rl_high_thrs(struct pblk_rl *rl); int pblk_rl_low_thrs(struct pblk_rl *rl); unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); -- cgit v1.2.3 From 75610cd974aba4fadc9a8500d5470e8f28a3626f Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:38 +0200 Subject: lightnvm: pblk: consider bad sectors in emeta during recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When recovering lines we need to consider that bad blocks in a line affect the emeta area size. Previously it was assumed that the emeta area would grow by the number of sectors per page * number of bad blocks in the line. This assumption is not correct - the number of "extra" pages that are consumed could be both smaller (depending on emeta size) and bigger (depending on the placement of the bad blocks). Fix this by calculating the emeta start by iterating backwards through the line, skipping ppas that map to bad blocks. Also fix the data types used for ppa indices/counts in pblk_recov_l2p_from_emeta - we should use u64. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-recovery.c | 44 +++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index a080cf888982..9772a947ca4f 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -133,16 +133,16 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) struct pblk_emeta *emeta = line->emeta; struct line_emeta *emeta_buf = emeta->buf; __le64 *lba_list; - int data_start, data_end; - int nr_valid_lbas, nr_lbas = 0; - int i; + u64 data_start, data_end; + u64 nr_valid_lbas, nr_lbas = 0; + u64 i; lba_list = pblk_recov_get_lba_list(pblk, emeta_buf); if (!lba_list) return 1; data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; - data_end = lm->sec_per_line - lm->emeta_sec[0]; + data_end = line->emeta_ssec; nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas); for (i = data_start; i < data_end; i++) { @@ -172,8 +172,8 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) } if (nr_valid_lbas != nr_lbas) - pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n", - line->id, emeta_buf->nr_valid_lbas, nr_lbas); + pr_err("pblk: line %d - inconsistent lba list(%llu/%llu)\n", + line->id, nr_valid_lbas, nr_lbas); line->left_msecs = 0; @@ -827,10 +827,32 @@ static void pblk_recov_line_add_ordered(struct list_head *head, __list_add(&line->list, t->list.prev, &t->list); } -struct pblk_line *pblk_recov_l2p(struct pblk *pblk) +static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + unsigned int emeta_secs; + u64 emeta_start; + struct ppa_addr ppa; + int pos; + + emeta_secs = lm->emeta_sec[0]; + emeta_start = lm->sec_per_line; + + while (emeta_secs) { + emeta_start--; + ppa = addr_to_pblk_ppa(pblk, emeta_start, line->id); + pos = pblk_ppa_to_pos(geo, ppa); + if (!test_bit(pos, line->blk_bitmap)) + emeta_secs--; + } + + return emeta_start; +} + +struct pblk_line *pblk_recov_l2p(struct pblk *pblk) +{ struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line, *tline, *data_line = NULL; @@ -930,15 +952,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) /* Verify closed blocks and recover this portion of L2P table*/ list_for_each_entry_safe(line, tline, &recov_list, list) { - int off, nr_bb; - recovered_lines++; - /* Calculate where emeta starts based on the line bb */ - off = lm->sec_per_line - lm->emeta_sec[0]; - nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); - off -= nr_bb * geo->sec_per_pl; - line->emeta_ssec = off; + line->emeta_ssec = pblk_line_emeta_start(pblk, line); line->emeta = emeta; memset(line->emeta->buf, 0, lm->emeta_len[0]); -- cgit v1.2.3 From 1edebacf8b736774d2f160512aec721f47e1f5ac Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:39 +0200 Subject: lightnvm: pblk: shut down gc gracefully during exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shut down the GC workqueues and tasks in the right order. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 4bac9e1531f5..e00e5a0743e9 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -422,10 +422,15 @@ void pblk_gc_kick(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; - wake_up_process(gc->gc_ts); pblk_gc_writer_kick(gc); pblk_gc_reader_kick(gc); - mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); + + /* If we're shutting down GC, let's not start it up again */ + if (gc->gc_enabled) { + wake_up_process(gc->gc_ts); + mod_timer(&gc->gc_timer, + jiffies + msecs_to_jiffies(GC_TIME_MSECS)); + } } static void pblk_gc_timer(unsigned long data) @@ -630,9 +635,6 @@ void pblk_gc_exit(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; - flush_workqueue(gc->gc_reader_wq); - flush_workqueue(gc->gc_line_reader_wq); - gc->gc_enabled = 0; del_timer_sync(&gc->gc_timer); pblk_gc_stop(pblk, 1); @@ -640,15 +642,17 @@ void pblk_gc_exit(struct pblk *pblk) if (gc->gc_ts) kthread_stop(gc->gc_ts); + if (gc->gc_reader_ts) + kthread_stop(gc->gc_reader_ts); + + flush_workqueue(gc->gc_reader_wq); if (gc->gc_reader_wq) destroy_workqueue(gc->gc_reader_wq); + flush_workqueue(gc->gc_line_reader_wq); if (gc->gc_line_reader_wq) destroy_workqueue(gc->gc_line_reader_wq); if (gc->gc_writer_ts) kthread_stop(gc->gc_writer_ts); - - if (gc->gc_reader_ts) - kthread_stop(gc->gc_reader_ts); } -- cgit v1.2.3 From c55861926a78bf129e06bd3372b34225f4968757 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:40 +0200 Subject: lightnvm: pblk: add l2p crc debug printouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Print the CRC of the logical-to-physical mapping during exit and after recovering the L2P table to facilitate detection of meta data corruption/recovery issues. The CRC printed after recovery should match the CRC printed during the previous exit - if it doesn't this indicates that either the meta data written to the disk is corrupt or recovery failed. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index ad9f014a086b..52c85f4f672d 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -76,6 +76,28 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } +static size_t pblk_trans_map_size(struct pblk *pblk) +{ + int entry_size = 8; + + if (pblk->ppaf_bitsize < 32) + entry_size = 4; + + return entry_size * pblk->rl.nr_secs; +} + +#ifdef CONFIG_NVM_DEBUG +static u32 pblk_l2p_crc(struct pblk *pblk) +{ + size_t map_size; + u32 crc = ~(u32)0; + + map_size = pblk_trans_map_size(pblk); + crc = crc32_le(crc, pblk->trans_map, map_size); + return crc; +} +#endif + static void pblk_l2p_free(struct pblk *pblk) { vfree(pblk->trans_map); @@ -85,12 +107,10 @@ static int pblk_l2p_init(struct pblk *pblk) { sector_t i; struct ppa_addr ppa; - int entry_size = 8; - - if (pblk->ppaf_bitsize < 32) - entry_size = 4; + size_t map_size; - pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs); + map_size = pblk_trans_map_size(pblk); + pblk->trans_map = vmalloc(map_size); if (!pblk->trans_map) return -ENOMEM; @@ -508,6 +528,10 @@ static int pblk_lines_configure(struct pblk *pblk, int flags) } } +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + /* Free full lines directly as GC has not been started yet */ pblk_gc_free_full_lines(pblk); @@ -901,6 +925,11 @@ static void pblk_exit(void *private) down_write(&pblk_lock); pblk_gc_exit(pblk); pblk_tear_down(pblk); + +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + pblk_free(pblk); up_write(&pblk_lock); } -- cgit v1.2.3 From d6b992f7ab6279884238d4e2babf100c0879b3d6 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:41 +0200 Subject: lightnvm: pblk: gc all lines in the pipeline before exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finish garbage collect of the lines that are in the gc pipeline before exiting. Ensure that all lines already in in the pipeline goes through, from read to write. Do this by keeping track of how many lines are in the pipeline and waiting for that number to reach zero before exiting the gc reader task. Since we're adding a new gc line counter, change the name of inflight_gc to read_inflight_gc to make the distinction clear. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 3 +++ drivers/lightnvm/pblk-gc.c | 31 ++++++++++++++++++++++++------- drivers/lightnvm/pblk-sysfs.c | 2 +- drivers/lightnvm/pblk.h | 5 ++++- 4 files changed, 32 insertions(+), 9 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 43866ad87586..1cd27e38fc46 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1465,6 +1465,7 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line) static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_gc *gc = &pblk->gc; spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); @@ -1473,6 +1474,8 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) pblk_line_free(pblk, line); spin_unlock(&line->lock); + atomic_dec(&gc->pipeline_gc); + spin_lock(&l_mg->free_lock); list_add_tail(&line->list, &l_mg->free_list); l_mg->nr_free_lines++; diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index e00e5a0743e9..e6fae1959e25 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -234,7 +234,7 @@ out: kfree(invalid_bitmap); kref_put(&line->ref, pblk_line_put); - atomic_dec(&gc->inflight_gc); + atomic_dec(&gc->read_inflight_gc); return; @@ -249,7 +249,7 @@ fail_free_ws: pblk_put_line_back(pblk, line); kref_put(&line->ref, pblk_line_put); - atomic_dec(&gc->inflight_gc); + atomic_dec(&gc->read_inflight_gc); pr_err("pblk: Failed to GC line %d\n", line->id); } @@ -268,6 +268,7 @@ static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) line_ws->pblk = pblk; line_ws->line = line; + atomic_inc(&gc->pipeline_gc); INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws); queue_work(gc->gc_reader_wq, &line_ws->ws); @@ -333,6 +334,7 @@ static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) void pblk_gc_free_full_lines(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_gc *gc = &pblk->gc; struct pblk_line *line; do { @@ -353,6 +355,7 @@ void pblk_gc_free_full_lines(struct pblk *pblk) list_del(&line->list); spin_unlock(&l_mg->gc_lock); + atomic_inc(&gc->pipeline_gc); kref_put(&line->ref, pblk_line_put); } while (1); } @@ -370,12 +373,12 @@ static void pblk_gc_run(struct pblk *pblk) struct pblk_line *line; struct list_head *group_list; bool run_gc; - int inflight_gc, gc_group = 0, prev_group = 0; + int read_inflight_gc, gc_group = 0, prev_group = 0; pblk_gc_free_full_lines(pblk); run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); - if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD)) + if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD)) return; next_gc_group: @@ -402,14 +405,14 @@ next_gc_group: list_add_tail(&line->list, &gc->r_list); spin_unlock(&gc->r_lock); - inflight_gc = atomic_inc_return(&gc->inflight_gc); + read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc); pblk_gc_reader_kick(gc); prev_group = 1; /* No need to queue up more GC lines than we can handle */ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); - if (!run_gc || inflight_gc >= PBLK_GC_L_QD) + if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD) break; } while (1); @@ -470,6 +473,7 @@ static int pblk_gc_writer_ts(void *data) static int pblk_gc_reader_ts(void *data) { struct pblk *pblk = data; + struct pblk_gc *gc = &pblk->gc; while (!kthread_should_stop()) { if (!pblk_gc_read(pblk)) @@ -478,6 +482,18 @@ static int pblk_gc_reader_ts(void *data) io_schedule(); } +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk: flushing gc pipeline, %d lines left\n", + atomic_read(&gc->pipeline_gc)); +#endif + + do { + if (!atomic_read(&gc->pipeline_gc)) + break; + + schedule(); + } while (1); + return 0; } @@ -586,7 +602,8 @@ int pblk_gc_init(struct pblk *pblk) gc->gc_forced = 0; gc->gc_enabled = 1; gc->w_entries = 0; - atomic_set(&gc->inflight_gc, 0); + atomic_set(&gc->read_inflight_gc, 0); + atomic_set(&gc->pipeline_gc, 0); /* Workqueue that reads valid sectors from a line and submit them to the * GC writer to be recycled. diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 95fb434e2f01..cd49e8875d4e 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -253,7 +253,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) sz += snprintf(page + sz, PAGE_SIZE - sz, "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n", gc_full, gc_high, gc_mid, gc_low, gc_empty, - atomic_read(&pblk->gc.inflight_gc)); + atomic_read(&pblk->gc.read_inflight_gc)); sz += snprintf(page + sz, PAGE_SIZE - sz, "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n", diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 29ba7ec32b20..c6f8841973a0 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -238,7 +238,10 @@ struct pblk_gc { struct timer_list gc_timer; struct semaphore gc_sem; - atomic_t inflight_gc; + atomic_t read_inflight_gc; /* Number of lines with inflight GC reads */ + atomic_t pipeline_gc; /* Number of lines in the GC pipeline - + * started reads to finished writes + */ int w_entries; struct list_head w_list; -- cgit v1.2.3 From 03e868eb8adb28e34f6e695667d230786bfdb653 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:42 +0200 Subject: lightnvm: pblk: correct valid lba count calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During garbage collect, lbas being written can end up being invalidated. Make sure that this is reflected in the valid lba count. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-map.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 3bc4c94f9cf2..6f3ecde2140f 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -45,6 +45,8 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, paddr = pblk_alloc_page(pblk, line, nr_secs); for (i = 0; i < nr_secs; i++, paddr++) { + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); + /* ppa to be sent to the device */ ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); @@ -61,10 +63,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, w_ctx->ppa = ppa_list[i]; meta_list[i].lba = cpu_to_le64(w_ctx->lba); lba_list[paddr] = cpu_to_le64(w_ctx->lba); - line->nr_valid_lbas++; + if (lba_list[paddr] != addr_empty) + line->nr_valid_lbas++; } else { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - lba_list[paddr] = meta_list[i].lba = addr_empty; __pblk_map_invalidate(pblk, line, paddr); } -- cgit v1.2.3 From 28bd109411eaa4c541f2e240d1285c154de4dfb7 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:43 +0200 Subject: lightnvm: pblk: remove spinlock when freeing line metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lockdep complains about being in atomic context while freeing line metadata - and rightly so as we take a spinlock and end up calling vfree that might sleep(in pblk_mfree). There is no need for holding the line manager free_lock while freeing line metadata as the pipeline as stopped, so remove the lock. Fixes: 588726d3ec68 ("lightnvm: pblk: fail gracefully on irrec. error") Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 52c85f4f672d..f62112ba5482 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -393,13 +393,11 @@ static void pblk_line_meta_free(struct pblk *pblk) kfree(l_mg->bb_aux); kfree(l_mg->vsc_list); - spin_lock(&l_mg->free_lock); for (i = 0; i < PBLK_DATA_LINES; i++) { kfree(l_mg->sline_meta[i]); pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); kfree(l_mg->eline_meta[i]); } - spin_unlock(&l_mg->free_lock); kfree(pblk->lines); } -- cgit v1.2.3 From 8bd400204bd500bb2aea7b551f7c33bad2455340 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:44 +0200 Subject: lightnvm: pblk: cleanup unused and static functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup up unused and static functions across the whole codebase. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 133 ++++++++++++++++++++----------------------- drivers/lightnvm/pblk-gc.c | 40 ++++++------- drivers/lightnvm/pblk-rl.c | 10 ---- drivers/lightnvm/pblk.h | 14 ++--- 4 files changed, 86 insertions(+), 111 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 1cd27e38fc46..4199119a0754 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -18,6 +18,31 @@ #include "pblk.h" +static void pblk_line_mark_bb(struct work_struct *work) +{ + struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, + ws); + struct pblk *pblk = line_ws->pblk; + struct nvm_tgt_dev *dev = pblk->dev; + struct ppa_addr *ppa = line_ws->priv; + int ret; + + ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); + if (ret) { + struct pblk_line *line; + int pos; + + line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)]; + pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa); + + pr_err("pblk: failed to mark bb, line:%d, pos:%d\n", + line->id, pos); + } + + kfree(ppa); + mempool_free(line_ws, pblk->gen_ws_pool); +} + static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, struct ppa_addr *ppa) { @@ -268,7 +293,7 @@ void pblk_end_io_sync(struct nvm_rq *rqd) complete(waiting); } -void pblk_wait_for_meta(struct pblk *pblk) +static void pblk_wait_for_meta(struct pblk *pblk) { do { if (!atomic_read(&pblk->inflight_io)) @@ -345,17 +370,6 @@ void pblk_discard(struct pblk *pblk, struct bio *bio) pblk_invalidate_range(pblk, slba, nr_secs); } -struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba) -{ - struct ppa_addr ppa; - - spin_lock(&pblk->trans_lock); - ppa = pblk_trans_map_get(pblk, lba); - spin_unlock(&pblk->trans_lock); - - return ppa; -} - void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd) { atomic_long_inc(&pblk->write_failed); @@ -1338,6 +1352,41 @@ static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) pblk->state = PBLK_STATE_STOPPING; } +static void pblk_line_close_meta_sync(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line *line, *tline; + LIST_HEAD(list); + + spin_lock(&l_mg->close_lock); + if (list_empty(&l_mg->emeta_list)) { + spin_unlock(&l_mg->close_lock); + return; + } + + list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev); + spin_unlock(&l_mg->close_lock); + + list_for_each_entry_safe(line, tline, &list, list) { + struct pblk_emeta *emeta = line->emeta; + + while (emeta->mem < lm->emeta_len[0]) { + int ret; + + ret = pblk_submit_meta_io(pblk, line); + if (ret) { + pr_err("pblk: sync meta line %d failed (%d)\n", + line->id, ret); + return; + } + } + } + + pblk_wait_for_meta(pblk); + flush_workqueue(pblk->close_wq); +} + void pblk_pipeline_stop(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1565,41 +1614,6 @@ int pblk_line_is_full(struct pblk_line *line) return (line->left_msecs == 0); } -void pblk_line_close_meta_sync(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *line, *tline; - LIST_HEAD(list); - - spin_lock(&l_mg->close_lock); - if (list_empty(&l_mg->emeta_list)) { - spin_unlock(&l_mg->close_lock); - return; - } - - list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev); - spin_unlock(&l_mg->close_lock); - - list_for_each_entry_safe(line, tline, &list, list) { - struct pblk_emeta *emeta = line->emeta; - - while (emeta->mem < lm->emeta_len[0]) { - int ret; - - ret = pblk_submit_meta_io(pblk, line); - if (ret) { - pr_err("pblk: sync meta line %d failed (%d)\n", - line->id, ret); - return; - } - } - } - - pblk_wait_for_meta(pblk); - flush_workqueue(pblk->close_wq); -} - static void pblk_line_should_sync_meta(struct pblk *pblk) { if (pblk_rl_is_limit(&pblk->rl)) @@ -1673,31 +1687,6 @@ void pblk_line_close_ws(struct work_struct *work) mempool_free(line_ws, pblk->gen_ws_pool); } -void pblk_line_mark_bb(struct work_struct *work) -{ - struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, - ws); - struct pblk *pblk = line_ws->pblk; - struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa = line_ws->priv; - int ret; - - ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); - if (ret) { - struct pblk_line *line; - int pos; - - line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)]; - pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa); - - pr_err("pblk: failed to mark bb, line:%d, pos:%d\n", - line->id, pos); - } - - kfree(ppa); - mempool_free(line_ws, pblk->gen_ws_pool); -} - void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq) diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index e6fae1959e25..b8323e34d1bc 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -275,6 +275,26 @@ static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) return 0; } +static void pblk_gc_reader_kick(struct pblk_gc *gc) +{ + wake_up_process(gc->gc_reader_ts); +} + +static void pblk_gc_kick(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + pblk_gc_writer_kick(gc); + pblk_gc_reader_kick(gc); + + /* If we're shutting down GC, let's not start it up again */ + if (gc->gc_enabled) { + wake_up_process(gc->gc_ts); + mod_timer(&gc->gc_timer, + jiffies + msecs_to_jiffies(GC_TIME_MSECS)); + } +} + static int pblk_gc_read(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; @@ -298,11 +318,6 @@ static int pblk_gc_read(struct pblk *pblk) return 0; } -static void pblk_gc_reader_kick(struct pblk_gc *gc) -{ - wake_up_process(gc->gc_reader_ts); -} - static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, struct list_head *group_list) { @@ -421,21 +436,6 @@ next_gc_group: goto next_gc_group; } -void pblk_gc_kick(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - - pblk_gc_writer_kick(gc); - pblk_gc_reader_kick(gc); - - /* If we're shutting down GC, let's not start it up again */ - if (gc->gc_enabled) { - wake_up_process(gc->gc_ts); - mod_timer(&gc->gc_timer, - jiffies + msecs_to_jiffies(GC_TIME_MSECS)); - } -} - static void pblk_gc_timer(unsigned long data) { struct pblk *pblk = (struct pblk *)data; diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 739f855d4216..abae31fd434e 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -153,16 +153,6 @@ int pblk_rl_high_thrs(struct pblk_rl *rl) return rl->high; } -int pblk_rl_low_thrs(struct pblk_rl *rl) -{ - return rl->low; -} - -int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) -{ - return rl->rb_user_max; -} - int pblk_rl_max_io(struct pblk_rl *rl) { return rl->rb_max_io; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index c6f8841973a0..6c9ea9a93704 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -710,8 +710,6 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type); void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); -void pblk_wait_for_meta(struct pblk *pblk); -struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba); void pblk_discard(struct pblk *pblk, struct bio *bio); void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); @@ -732,10 +730,8 @@ int pblk_line_is_full(struct pblk_line *line); void pblk_line_free(struct pblk *pblk, struct pblk_line *line); void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line); void pblk_line_close(struct pblk *pblk, struct pblk_line *line); -void pblk_line_close_meta_sync(struct pblk *pblk); void pblk_line_close_ws(struct work_struct *work); void pblk_pipeline_stop(struct pblk *pblk); -void pblk_line_mark_bb(struct work_struct *work); void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq); @@ -759,7 +755,6 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, unsigned long *lun_bitmap); -void pblk_end_bio_sync(struct bio *bio); void pblk_end_io_sync(struct nvm_rq *rqd); int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int nr_pages); @@ -834,7 +829,6 @@ void pblk_gc_exit(struct pblk *pblk); void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk); void pblk_gc_should_kick(struct pblk *pblk); -void pblk_gc_kick(struct pblk *pblk); void pblk_gc_free_full_lines(struct pblk *pblk); void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, int *gc_active); @@ -847,7 +841,6 @@ void pblk_rl_init(struct pblk_rl *rl, int budget); void pblk_rl_free(struct pblk_rl *rl); void pblk_rl_update_rates(struct pblk_rl *rl); int pblk_rl_high_thrs(struct pblk_rl *rl); -int pblk_rl_low_thrs(struct pblk_rl *rl); unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries); @@ -855,11 +848,9 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); -int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); int pblk_rl_max_io(struct pblk_rl *rl); void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); -void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left); int pblk_rl_is_limit(struct pblk_rl *rl); /* @@ -868,6 +859,11 @@ int pblk_rl_is_limit(struct pblk_rl *rl); int pblk_sysfs_init(struct gendisk *tdisk); void pblk_sysfs_exit(struct gendisk *tdisk); +static inline void test(size_t a) +{ + a += 1; +} + static inline void *pblk_malloc(size_t size, int type, gfp_t flags) { if (type == PBLK_KMALLOC_META) -- cgit v1.2.3 From 8da10cce7c7f7f9f5edc77271cf6e0c45b762004 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:45 +0200 Subject: lightnvm: pblk: avoid being reported as hung on rated GC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The amount of GC I/O on the write buffer is managed by the rate-limiter, which is calculated as a function of the number of available free blocks. When reaching the stable point, we risk having scheduled more I/Os for GC than are allowed on the write buffer. This would result on the GC semaphore balancing the outstanding read GC I/Os to be reported as "hung", though the behavior is normal. Solve this by allowing to schedule when we detect that the read GC path is not moving forward. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index b8323e34d1bc..00d5698d64a9 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -218,7 +218,13 @@ next_rq: gc_rq_ws->line = line; gc_rq_ws->priv = gc_rq; - down(&gc->gc_sem); + /* The write GC path can be much slower than the read GC one due to + * the budget imposed by the rate-limiter. Balance in case that we get + * back pressure from the write GC path. + */ + while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000))) + io_schedule(); + kref_get(&line->ref); INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws); -- cgit v1.2.3 From 1a94b2d484677dc559c96251dd0e7c7b8811c378 Mon Sep 17 00:00:00 2001 From: Javier González Date: Fri, 13 Oct 2017 14:46:47 +0200 Subject: lightnvm: implement generic path for sync I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a generic path for sending sync I/O on LightNVM. This allows to reuse the standard synchronous path trough blk_execute_rq(), instead of implementing a wait_for_completion on the target side (e.g., pblk). Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 25 +++++++++----- drivers/lightnvm/pblk-core.c | 74 ++++++++++++---------------------------- drivers/lightnvm/pblk-read.c | 21 ++---------- drivers/lightnvm/pblk-recovery.c | 31 ++--------------- drivers/lightnvm/pblk.h | 42 +++++++++++++++++++++-- drivers/nvme/host/lightnvm.c | 70 +++++++++++++++++++++++++++++-------- include/linux/lightnvm.h | 3 ++ 7 files changed, 143 insertions(+), 123 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 0e5f77234c79..fe21f4dd33e9 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -720,12 +720,25 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_submit_io); -static void nvm_end_io_sync(struct nvm_rq *rqd) +int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { - struct completion *waiting = rqd->private; + struct nvm_dev *dev = tgt_dev->parent; + int ret; + + if (!dev->ops->submit_io_sync) + return -ENODEV; + + nvm_rq_tgt_to_dev(tgt_dev, rqd); - complete(waiting); + rqd->dev = tgt_dev; + + /* In case of error, fail with right address format */ + ret = dev->ops->submit_io_sync(dev, rqd); + nvm_rq_dev_to_tgt(tgt_dev, rqd); + + return ret; } +EXPORT_SYMBOL(nvm_submit_io_sync); int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int nr_ppas) @@ -733,25 +746,21 @@ int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, struct nvm_geo *geo = &tgt_dev->geo; struct nvm_rq rqd; int ret; - DECLARE_COMPLETION_ONSTACK(wait); memset(&rqd, 0, sizeof(struct nvm_rq)); rqd.opcode = NVM_OP_ERASE; - rqd.end_io = nvm_end_io_sync; - rqd.private = &wait; rqd.flags = geo->plane_mode >> 1; ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); if (ret) return ret; - ret = nvm_submit_io(tgt_dev, &rqd); + ret = nvm_submit_io_sync(tgt_dev, &rqd); if (ret) { pr_err("rrpr: erase I/O submission failed: %d\n", ret); goto free_ppa_list; } - wait_for_completion_io(&wait); free_ppa_list: nvm_free_rqd_ppalist(tgt_dev, &rqd); diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 4199119a0754..ce90213a42fa 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -412,39 +412,33 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) struct nvm_tgt_dev *dev = pblk->dev; #ifdef CONFIG_NVM_DEBUG - struct ppa_addr *ppa_list; + int ret; - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; - if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { - WARN_ON(1); - return -EINVAL; - } + ret = pblk_check_io(pblk, rqd); + if (ret) + return ret; +#endif - if (rqd->opcode == NVM_OP_PWRITE) { - struct pblk_line *line; - struct ppa_addr ppa; - int i; + atomic_inc(&pblk->inflight_io); - for (i = 0; i < rqd->nr_ppas; i++) { - ppa = ppa_list[i]; - line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + return nvm_submit_io(dev, rqd); +} - spin_lock(&line->lock); - if (line->state != PBLK_LINESTATE_OPEN) { - pr_err("pblk: bad ppa: line:%d,state:%d\n", - line->id, line->state); - WARN_ON(1); - spin_unlock(&line->lock); - return -EINVAL; - } - spin_unlock(&line->lock); - } - } +int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + +#ifdef CONFIG_NVM_DEBUG + int ret; + + ret = pblk_check_io(pblk, rqd); + if (ret) + return ret; #endif atomic_inc(&pblk->inflight_io); - return nvm_submit_io(dev, rqd); + return nvm_submit_io_sync(dev, rqd); } static void pblk_bio_map_addr_endio(struct bio *bio) @@ -597,7 +591,6 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, int cmd_op, bio_op; int i, j; int ret; - DECLARE_COMPLETION_ONSTACK(wait); if (dir == PBLK_WRITE) { bio_op = REQ_OP_WRITE; @@ -639,8 +632,6 @@ next_rq: rqd.dma_ppa_list = dma_ppa_list; rqd.opcode = cmd_op; rqd.nr_ppas = rq_ppas; - rqd.end_io = pblk_end_io_sync; - rqd.private = &wait; if (dir == PBLK_WRITE) { struct pblk_sec_meta *meta_list = rqd.meta_list; @@ -694,19 +685,14 @@ next_rq: } } - ret = pblk_submit_io(pblk, &rqd); + ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { pr_err("pblk: emeta I/O submission failed: %d\n", ret); bio_put(bio); goto free_rqd_dma; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: emeta I/O timed out\n"); - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); if (rqd.error) { if (dir == PBLK_WRITE) @@ -750,7 +736,6 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, int i, ret; int cmd_op, bio_op; int flags; - DECLARE_COMPLETION_ONSTACK(wait); if (dir == PBLK_WRITE) { bio_op = REQ_OP_WRITE; @@ -787,8 +772,6 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, rqd.opcode = cmd_op; rqd.flags = flags; rqd.nr_ppas = lm->smeta_sec; - rqd.end_io = pblk_end_io_sync; - rqd.private = &wait; for (i = 0; i < lm->smeta_sec; i++, paddr++) { struct pblk_sec_meta *meta_list = rqd.meta_list; @@ -807,17 +790,13 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, * the write thread is the only one sending write and erase commands, * there is no need to take the LUN semaphore. */ - ret = pblk_submit_io(pblk, &rqd); + ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { pr_err("pblk: smeta I/O submission failed: %d\n", ret); bio_put(bio); goto free_ppa_list; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: smeta I/O timed out\n"); - } atomic_dec(&pblk->inflight_io); if (rqd.error) { @@ -861,19 +840,15 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_rq rqd; int ret = 0; - DECLARE_COMPLETION_ONSTACK(wait); memset(&rqd, 0, sizeof(struct nvm_rq)); pblk_setup_e_rq(pblk, &rqd, ppa); - rqd.end_io = pblk_end_io_sync; - rqd.private = &wait; - /* The write thread schedules erases so that it minimizes disturbances * with writes. Thus, there is no need to take the LUN semaphore. */ - ret = pblk_submit_io(pblk, &rqd); + ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -886,11 +861,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) goto out; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: sync erase timed out\n"); - } - out: rqd.private = pblk; __pblk_end_io_erase(pblk, &rqd); diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 71c58503f1a4..ca79d8fb3e60 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -207,7 +207,6 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, int nr_secs = rqd->nr_ppas; int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); int i, ret, hole; - DECLARE_COMPLETION_ONSTACK(wait); /* Re-use allocated memory for intermediate lbas */ lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); @@ -232,8 +231,6 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, rqd->bio = new_bio; rqd->nr_ppas = nr_holes; rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); - rqd->end_io = pblk_end_io_sync; - rqd->private = &wait; if (unlikely(nr_holes == 1)) { ppa_ptr = rqd->ppa_list; @@ -241,18 +238,13 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, rqd->ppa_addr = rqd->ppa_list[0]; } - ret = pblk_submit_read_io(pblk, rqd); + ret = pblk_submit_io_sync(pblk, rqd); if (ret) { bio_put(rqd->bio); - pr_err("pblk: read IO submission failed\n"); + pr_err("pblk: sync read IO submission failed\n"); goto err; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: partial read I/O timed out\n"); - } - if (rqd->error) { atomic_long_inc(&pblk->read_failed); #ifdef CONFIG_NVM_DEBUG @@ -537,7 +529,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) struct nvm_rq rqd; int data_len; int ret = NVM_IO_OK; - DECLARE_COMPLETION_ONSTACK(wait); memset(&rqd, 0, sizeof(struct nvm_rq)); @@ -577,22 +568,16 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) bio_set_op_attrs(bio, REQ_OP_READ, 0); rqd.opcode = NVM_OP_PREAD; - rqd.end_io = pblk_end_io_sync; - rqd.private = &wait; rqd.nr_ppas = gc_rq->secs_to_gc; rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); rqd.bio = bio; - if (pblk_submit_read_io(pblk, &rqd)) { + if (pblk_submit_io_sync(pblk, &rqd)) { ret = -EIO; pr_err("pblk: GC read request failed\n"); goto err_free_bio; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: GC read I/O timed out\n"); - } atomic_dec(&pblk->inflight_io); if (rqd.error) { diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 9772a947ca4f..eadb3eb5d4dc 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -216,7 +216,6 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, int rq_ppas, rq_len; int i, j; int ret = 0; - DECLARE_COMPLETION_ONSTACK(wait); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -253,8 +252,6 @@ next_read_rq: rqd->ppa_list = ppa_list; rqd->dma_ppa_list = dma_ppa_list; rqd->dma_meta_list = dma_meta_list; - rqd->end_io = pblk_end_io_sync; - rqd->private = &wait; if (pblk_io_aligned(pblk, rq_ppas)) rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -280,19 +277,13 @@ next_read_rq: } /* If read fails, more padding is needed */ - ret = pblk_submit_io(pblk, rqd); + ret = pblk_submit_io_sync(pblk, rqd); if (ret) { pr_err("pblk: I/O submission failed: %d\n", ret); return ret; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: L2P recovery read timed out\n"); - return -EINTR; - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); /* At this point, the read should not fail. If it does, it is a problem * we cannot recover from here. Need FTL log. @@ -504,7 +495,6 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, int ret = 0; int rec_round; int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec; - DECLARE_COMPLETION_ONSTACK(wait); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -539,8 +529,6 @@ next_rq: rqd->ppa_list = ppa_list; rqd->dma_ppa_list = dma_ppa_list; rqd->dma_meta_list = dma_meta_list; - rqd->end_io = pblk_end_io_sync; - rqd->private = &wait; if (pblk_io_aligned(pblk, rq_ppas)) rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -566,18 +554,13 @@ next_rq: addr_to_gen_ppa(pblk, w_ptr, line->id); } - ret = pblk_submit_io(pblk, rqd); + ret = pblk_submit_io_sync(pblk, rqd); if (ret) { pr_err("pblk: I/O submission failed: %d\n", ret); return ret; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: L2P recovery read timed out\n"); - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); /* This should not happen since the read failed during normal recovery, * but the media works funny sometimes... @@ -645,7 +628,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, int i, j; int ret = 0; int left_ppas = pblk_calc_sec_in_line(pblk, line); - DECLARE_COMPLETION_ONSTACK(wait); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -678,8 +660,6 @@ next_rq: rqd->ppa_list = ppa_list; rqd->dma_ppa_list = dma_ppa_list; rqd->dma_meta_list = dma_meta_list; - rqd->end_io = pblk_end_io_sync; - rqd->private = &wait; if (pblk_io_aligned(pblk, rq_ppas)) rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -705,19 +685,14 @@ next_rq: addr_to_gen_ppa(pblk, paddr, line->id); } - ret = pblk_submit_io(pblk, rqd); + ret = pblk_submit_io_sync(pblk, rqd); if (ret) { pr_err("pblk: I/O submission failed: %d\n", ret); bio_put(bio); return ret; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: L2P recovery read timed out\n"); - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); /* Reached the end of the written line */ if (rqd->error) { diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 6c9ea9a93704..6b64288de6f7 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -714,6 +714,7 @@ void pblk_discard(struct pblk *pblk, struct bio *bio); void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); +int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line); struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, unsigned int nr_secs, unsigned int len, @@ -1203,7 +1204,6 @@ static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd, pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status); } -#endif static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int nr_ppas) @@ -1224,14 +1224,50 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, ppa->g.sec < geo->sec_per_pg) continue; -#ifdef CONFIG_NVM_DEBUG print_ppa(ppa, "boundary", i); -#endif + return 1; } return 0; } +static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct ppa_addr *ppa_list; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { + WARN_ON(1); + return -EINVAL; + } + + if (rqd->opcode == NVM_OP_PWRITE) { + struct pblk_line *line; + struct ppa_addr ppa; + int i; + + for (i = 0; i < rqd->nr_ppas; i++) { + ppa = ppa_list[i]; + line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + + spin_lock(&line->lock); + if (line->state != PBLK_LINESTATE_OPEN) { + pr_err("pblk: bad ppa: line:%d,state:%d\n", + line->id, line->state); + WARN_ON(1); + spin_unlock(&line->lock); + return -EINVAL; + } + spin_unlock(&line->lock); + } + } + + return 0; +} +#endif + static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr) { struct pblk_line_meta *lm = &pblk->lm; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 6017153c2439..8fc949c5b49b 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -492,34 +492,47 @@ static void nvme_nvm_end_io(struct request *rq, blk_status_t status) blk_mq_free_request(rq); } -static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) +static struct request *nvme_nvm_alloc_request(struct request_queue *q, + struct nvm_rq *rqd, + struct nvme_nvm_command *cmd) { - struct request_queue *q = dev->q; struct nvme_ns *ns = q->queuedata; struct request *rq; - struct bio *bio = rqd->bio; - struct nvme_nvm_command *cmd; - - cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); - if (!cmd) - return -ENOMEM; nvme_nvm_rqtocmd(rqd, ns, cmd); rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); - if (IS_ERR(rq)) { - kfree(cmd); - return PTR_ERR(rq); - } + if (IS_ERR(rq)) + return rq; + rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - if (bio) { - blk_init_request_from_bio(rq, bio); + if (rqd->bio) { + blk_init_request_from_bio(rq, rqd->bio); } else { rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); rq->__data_len = 0; } + return rq; +} + +static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) +{ + struct request_queue *q = dev->q; + struct nvme_nvm_command *cmd; + struct request *rq; + + cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + rq = nvme_nvm_alloc_request(q, rqd, cmd); + if (IS_ERR(rq)) { + kfree(cmd); + return PTR_ERR(rq); + } + rq->end_io_data = rqd; blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io); @@ -527,6 +540,34 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) return 0; } +static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd) +{ + struct request_queue *q = dev->q; + struct request *rq; + struct nvme_nvm_command cmd; + int ret = 0; + + memset(&cmd, 0, sizeof(struct nvme_nvm_command)); + + rq = nvme_nvm_alloc_request(q, rqd, &cmd); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + /* I/Os can fail and the error is signaled through rqd. Callers must + * handle the error accordingly. + */ + blk_execute_rq(q, NULL, rq, 0); + if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + + rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64); + rqd->error = nvme_req(rq)->status; + + blk_mq_free_request(rq); + + return ret; +} + static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) { struct nvme_ns *ns = nvmdev->q->queuedata; @@ -562,6 +603,7 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .set_bb_tbl = nvme_nvm_set_bb_tbl, .submit_io = nvme_nvm_submit_io, + .submit_io_sync = nvme_nvm_submit_io_sync, .create_dma_pool = nvme_nvm_create_dma_pool, .destroy_dma_pool = nvme_nvm_destroy_dma_pool, diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 4f0e4a0fd204..b7f111ff4d3b 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -56,6 +56,7 @@ typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); +typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); typedef void (nvm_destroy_dma_pool_fn)(void *); typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, @@ -69,6 +70,7 @@ struct nvm_dev_ops { nvm_op_set_bb_fn *set_bb_tbl; nvm_submit_io_fn *submit_io; + nvm_submit_io_sync_fn *submit_io_sync; nvm_create_dma_pool_fn *create_dma_pool; nvm_destroy_dma_pool_fn *destroy_dma_pool; @@ -477,6 +479,7 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); +extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); -- cgit v1.2.3 From cdd094fd0ad750c94ccaa5b2ee84fd8264f6f433 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Oct 2017 09:36:06 -0600 Subject: Revert "lightnvm: prevent bd removal if busy" Christoph correctly points out that this issue is no different for other block devices, and poking at cross layer internals is not the right way to solve it. This reverts commit bb6aa6f08268bbce4e0185b18cab9e04505d6695. Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index fe21f4dd33e9..83249b43dd06 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -390,7 +390,6 @@ static void __nvm_remove_target(struct nvm_target *t) static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) { struct nvm_target *t; - struct block_device *bdev; mutex_lock(&dev->mlock); t = nvm_find_target(dev, remove->tgtname); @@ -398,19 +397,6 @@ static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) mutex_unlock(&dev->mlock); return 1; } - bdev = bdget_disk(t->disk, 0); - if (!bdev) { - pr_err("nvm: removal failed, allocating bd failed\n"); - mutex_unlock(&dev->mlock); - return -ENOMEM; - } - if (bdev->bd_super || bdev->bd_part_count) { - pr_err("nvm: removal failed, block device busy\n"); - bdput(bdev); - mutex_unlock(&dev->mlock); - return -EBUSY; - } - bdput(bdev); __nvm_remove_target(t); mutex_unlock(&dev->mlock); -- cgit v1.2.3 From 75bc5f06617fe1bc9a79ba9e3baccdcae3743404 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 24 Oct 2017 15:56:13 +0200 Subject: lightnvm: pblk: remove leftover testing function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A previous patch inadvertently left an unused test function in the header, kill it. Fixes: 8bd400204bd5 ("lightnvm: pblk: cleanup unused and static functions") Signed-off-by: Javier González Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/lightnvm') diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 6b64288de6f7..90961033a79f 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -860,11 +860,6 @@ int pblk_rl_is_limit(struct pblk_rl *rl); int pblk_sysfs_init(struct gendisk *tdisk); void pblk_sysfs_exit(struct gendisk *tdisk); -static inline void test(size_t a) -{ - a += 1; -} - static inline void *pblk_malloc(size_t size, int type, gfp_t flags) { if (type == PBLK_KMALLOC_META) -- cgit v1.2.3