diff options
Diffstat (limited to 'drivers')
106 files changed, 2379 insertions, 2623 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 01306c018398..938ed513b070 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -919,8 +919,6 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) void ata_qc_schedule_eh(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - struct request_queue *q = qc->scsicmd->device->request_queue; - unsigned long flags; WARN_ON(!ap->ops->error_handler); @@ -932,9 +930,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) * Note that ATA_QCFLAG_FAILED is unconditionally set after * this function completes. */ - spin_lock_irqsave(q->queue_lock, flags); blk_abort_request(qc->scsicmd->request); - spin_unlock_irqrestore(q->queue_lock, flags); } /** diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 7ca76ed2e71a..84d0fcebd6af 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -100,6 +100,10 @@ enum { MAX_TAINT = 1000, /* cap on aoetgt taint */ }; +struct aoe_req { + unsigned long nr_bios; +}; + struct buf { ulong nframesout; struct bio *bio; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index ed26b7287256..e2c6aae2d636 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -387,6 +387,7 @@ aoeblk_gdalloc(void *vp) set = &d->tag_set; set->ops = &aoeblk_mq_ops; + set->cmd_size = sizeof(struct aoe_req); set->nr_hw_queues = 1; set->queue_depth = 128; set->numa_node = NUMA_NO_NODE; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index bb2fba651bd2..3cf9bc5d8d95 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -822,17 +822,6 @@ out: spin_unlock_irqrestore(&d->lock, flags); } -static unsigned long -rqbiocnt(struct request *r) -{ - struct bio *bio; - unsigned long n = 0; - - __rq_for_each_bio(bio, r) - n++; - return n; -} - static void bufinit(struct buf *buf, struct request *rq, struct bio *bio) { @@ -847,6 +836,7 @@ nextbuf(struct aoedev *d) { struct request *rq; struct request_queue *q; + struct aoe_req *req; struct buf *buf; struct bio *bio; @@ -865,7 +855,11 @@ nextbuf(struct aoedev *d) blk_mq_start_request(rq); d->ip.rq = rq; d->ip.nxbio = rq->bio; - rq->special = (void *) rqbiocnt(rq); + + req = blk_mq_rq_to_pdu(rq); + req->nr_bios = 0; + __rq_for_each_bio(bio, rq) + req->nr_bios++; } buf = mempool_alloc(d->bufpool, GFP_ATOMIC); if (buf == NULL) { @@ -1069,16 +1063,13 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) static void aoe_end_buf(struct aoedev *d, struct buf *buf) { - struct request *rq; - unsigned long n; + struct request *rq = buf->rq; + struct aoe_req *req = blk_mq_rq_to_pdu(rq); if (buf == d->ip.buf) d->ip.buf = NULL; - rq = buf->rq; mempool_free(buf, d->bufpool); - n = (unsigned long) rq->special; - rq->special = (void *) --n; - if (n == 0) + if (--req->nr_bios == 0) aoe_end_request(d, rq, 0); } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 9063f8efbd3b..5b49f1b33ebe 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -160,21 +160,22 @@ static void aoe_failip(struct aoedev *d) { struct request *rq; + struct aoe_req *req; struct bio *bio; - unsigned long n; aoe_failbuf(d, d->ip.buf); - rq = d->ip.rq; if (rq == NULL) return; + + req = blk_mq_rq_to_pdu(rq); while ((bio = d->ip.nxbio)) { bio->bi_status = BLK_STS_IOERR; d->ip.nxbio = bio->bi_next; - n = (unsigned long) rq->special; - rq->special = (void *) --n; + req->nr_bios--; } - if ((unsigned long) rq->special == 0) + + if (!req->nr_bios) aoe_end_request(d, rq, 0); } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index f88b4c26d422..b0dbbdfeb33e 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1471,6 +1471,15 @@ static void setup_req_params( int drive ) ReqTrack, ReqSector, (unsigned long)ReqData )); } +static void ataflop_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + spin_lock_irq(&ataflop_lock); + atari_disable_irq(IRQ_MFP_FDC); + finish_fdc(); + atari_enable_irq(IRQ_MFP_FDC); + spin_unlock_irq(&ataflop_lock); +} + static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1947,6 +1956,7 @@ static const struct block_device_operations floppy_fops = { static const struct blk_mq_ops ataflop_mq_ops = { .queue_rq = ataflop_queue_rq, + .commit_rqs = ataflop_commit_rqs, }; static struct kobject *floppy_find(dev_t dev, int *part, void *data) @@ -1982,6 +1992,7 @@ static int __init atari_floppy_init (void) &ataflop_mq_ops, 2, BLK_MQ_F_SHOULD_MERGE); if (IS_ERR(unit[i].disk->queue)) { + put_disk(unit[i].disk); ret = PTR_ERR(unit[i].disk->queue); unit[i].disk->queue = NULL; goto err; @@ -2033,18 +2044,13 @@ static int __init atari_floppy_init (void) return 0; err: - do { + while (--i >= 0) { struct gendisk *disk = unit[i].disk; - if (disk) { - if (disk->queue) { - blk_cleanup_queue(disk->queue); - disk->queue = NULL; - } - blk_mq_free_tag_set(&unit[i].tag_set); - put_disk(unit[i].disk); - } - } while (i--); + blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&unit[i].tag_set); + put_disk(unit[i].disk); + } unregister_blkdev(FLOPPY_MAJOR, "fd"); return ret; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index fa8204214ac0..f973a2a845c8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2792,7 +2792,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); if (!q) goto out_no_q; device->rq_queue = q; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index fb23578e9a41..6f2856c6d0f2 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2231,7 +2231,6 @@ static void request_done(int uptodate) { struct request *req = current_req; struct request_queue *q; - unsigned long flags; int block; char msg[sizeof("request done ") + sizeof(int) * 3]; @@ -2254,10 +2253,7 @@ static void request_done(int uptodate) if (block > _floppy->sect) DRS->maxtrack = 1; - /* unlock chained buffers */ - spin_lock_irqsave(q->queue_lock, flags); floppy_end_request(req, 0); - spin_unlock_irqrestore(q->queue_lock, flags); } else { if (rq_data_dir(req) == WRITE) { /* record write error information */ @@ -2269,9 +2265,7 @@ static void request_done(int uptodate) DRWE->last_error_sector = blk_rq_pos(req); DRWE->last_error_generation = DRS->generation; } - spin_lock_irqsave(q->queue_lock, flags); floppy_end_request(req, BLK_STS_IOERR); - spin_unlock_irqrestore(q->queue_lock, flags); } } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cb0cc8685076..0770004616de 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -77,13 +77,14 @@ #include <linux/falloc.h> #include <linux/uio.h> #include <linux/ioprio.h> +#include <linux/blk-cgroup.h> #include "loop.h" #include <linux/uaccess.h> static DEFINE_IDR(loop_index_idr); -static DEFINE_MUTEX(loop_index_mutex); +static DEFINE_MUTEX(loop_ctl_mutex); static int max_part; static int part_shift; @@ -630,18 +631,7 @@ static void loop_reread_partitions(struct loop_device *lo, { int rc; - /* - * bd_mutex has been held already in release path, so don't - * acquire it if this function is called in such case. - * - * If the reread partition isn't from release path, lo_refcnt - * must be at least one and it can only become zero when the - * current holder is released. - */ - if (!atomic_read(&lo->lo_refcnt)) - rc = __blkdev_reread_part(bdev); - else - rc = blkdev_reread_part(bdev); + rc = blkdev_reread_part(bdev); if (rc) pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", __func__, lo->lo_number, lo->lo_file_name, rc); @@ -688,26 +678,30 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file, *old_file; + struct file *file = NULL, *old_file; int error; + bool partscan; + error = mutex_lock_killable(&loop_ctl_mutex); + if (error) + return error; error = -ENXIO; if (lo->lo_state != Lo_bound) - goto out; + goto out_err; /* the loop device has to be read-only */ error = -EINVAL; if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) - goto out; + goto out_err; error = -EBADF; file = fget(arg); if (!file) - goto out; + goto out_err; error = loop_validate_file(file, bdev); if (error) - goto out_putf; + goto out_err; old_file = lo->lo_backing_file; @@ -715,7 +709,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, /* size of the new backing store needs to be the same */ if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) - goto out_putf; + goto out_err; /* and ... switch */ blk_mq_freeze_queue(lo->lo_queue); @@ -726,15 +720,22 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); - + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; + mutex_unlock(&loop_ctl_mutex); + /* + * We must drop file reference outside of loop_ctl_mutex as dropping + * the file ref can take bd_mutex which creates circular locking + * dependency. + */ fput(old_file); - if (lo->lo_flags & LO_FLAGS_PARTSCAN) + if (partscan) loop_reread_partitions(lo, bdev); return 0; - out_putf: - fput(file); - out: +out_err: + mutex_unlock(&loop_ctl_mutex); + if (file) + fput(file); return error; } @@ -909,6 +910,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, int lo_flags = 0; int error; loff_t size; + bool partscan; /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); @@ -918,13 +920,17 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!file) goto out; + error = mutex_lock_killable(&loop_ctl_mutex); + if (error) + goto out_putf; + error = -EBUSY; if (lo->lo_state != Lo_unbound) - goto out_putf; + goto out_unlock; error = loop_validate_file(file, bdev); if (error) - goto out_putf; + goto out_unlock; mapping = file->f_mapping; inode = mapping->host; @@ -936,10 +942,10 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, error = -EFBIG; size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) - goto out_putf; + goto out_unlock; error = loop_prepare_queue(lo); if (error) - goto out_putf; + goto out_unlock; error = 0; @@ -971,18 +977,22 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; - if (lo->lo_flags & LO_FLAGS_PARTSCAN) - loop_reread_partitions(lo, bdev); + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; /* Grab the block_device to prevent its destruction after we - * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). + * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); + mutex_unlock(&loop_ctl_mutex); + if (partscan) + loop_reread_partitions(lo, bdev); return 0; - out_putf: +out_unlock: + mutex_unlock(&loop_ctl_mutex); +out_putf: fput(file); - out: +out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1025,39 +1035,31 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, return err; } -static int loop_clr_fd(struct loop_device *lo) +static int __loop_clr_fd(struct loop_device *lo, bool release) { - struct file *filp = lo->lo_backing_file; + struct file *filp = NULL; gfp_t gfp = lo->old_gfp_mask; struct block_device *bdev = lo->lo_device; + int err = 0; + bool partscan = false; + int lo_number; - if (lo->lo_state != Lo_bound) - return -ENXIO; - - /* - * If we've explicitly asked to tear down the loop device, - * and it has an elevated reference count, set it for auto-teardown when - * the last reference goes away. This stops $!~#$@ udev from - * preventing teardown because it decided that it needs to run blkid on - * the loopback device whenever they appear. xfstests is notorious for - * failing tests because blkid via udev races with a losetup - * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d - * command to fail with EBUSY. - */ - if (atomic_read(&lo->lo_refcnt) > 1) { - lo->lo_flags |= LO_FLAGS_AUTOCLEAR; - mutex_unlock(&lo->lo_ctl_mutex); - return 0; + mutex_lock(&loop_ctl_mutex); + if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { + err = -ENXIO; + goto out_unlock; } - if (filp == NULL) - return -EINVAL; + filp = lo->lo_backing_file; + if (filp == NULL) { + err = -EINVAL; + goto out_unlock; + } /* freeze request queue during the transition */ blk_mq_freeze_queue(lo->lo_queue); spin_lock_irq(&lo->lo_lock); - lo->lo_state = Lo_rundown; lo->lo_backing_file = NULL; spin_unlock_irq(&lo->lo_lock); @@ -1093,21 +1095,73 @@ static int loop_clr_fd(struct loop_device *lo) module_put(THIS_MODULE); blk_mq_unfreeze_queue(lo->lo_queue); - if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) - loop_reread_partitions(lo, bdev); + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev; + lo_number = lo->lo_number; lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; loop_unprepare_queue(lo); - mutex_unlock(&lo->lo_ctl_mutex); +out_unlock: + mutex_unlock(&loop_ctl_mutex); + if (partscan) { + /* + * bd_mutex has been held already in release path, so don't + * acquire it if this function is called in such case. + * + * If the reread partition isn't from release path, lo_refcnt + * must be at least one and it can only become zero when the + * current holder is released. + */ + if (release) + err = __blkdev_reread_part(bdev); + else + err = blkdev_reread_part(bdev); + pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", + __func__, lo_number, err); + /* Device is gone, no point in returning error */ + err = 0; + } /* - * Need not hold lo_ctl_mutex to fput backing file. - * Calling fput holding lo_ctl_mutex triggers a circular + * Need not hold loop_ctl_mutex to fput backing file. + * Calling fput holding loop_ctl_mutex triggers a circular * lock dependency possibility warning as fput can take - * bd_mutex which is usually taken before lo_ctl_mutex. + * bd_mutex which is usually taken before loop_ctl_mutex. */ - fput(filp); - return 0; + if (filp) + fput(filp); + return err; +} + +static int loop_clr_fd(struct loop_device *lo) +{ + int err; + + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; + if (lo->lo_state != Lo_bound) { + mutex_unlock(&loop_ctl_mutex); + return -ENXIO; + } + /* + * If we've explicitly asked to tear down the loop device, + * and it has an elevated reference count, set it for auto-teardown when + * the last reference goes away. This stops $!~#$@ udev from + * preventing teardown because it decided that it needs to run blkid on + * the loopback device whenever they appear. xfstests is notorious for + * failing tests because blkid via udev races with a losetup + * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d + * command to fail with EBUSY. + */ + if (atomic_read(&lo->lo_refcnt) > 1) { + lo->lo_flags |= LO_FLAGS_AUTOCLEAR; + mutex_unlock(&loop_ctl_mutex); + return 0; + } + lo->lo_state = Lo_rundown; + mutex_unlock(&loop_ctl_mutex); + + return __loop_clr_fd(lo, false); } static int @@ -1116,47 +1170,58 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; struct loop_func_table *xfer; kuid_t uid = current_uid(); + struct block_device *bdev; + bool partscan = false; + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; if (lo->lo_encrypt_key_size && !uid_eq(lo->lo_key_owner, uid) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - if (lo->lo_state != Lo_bound) - return -ENXIO; - if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) - return -EINVAL; + !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out_unlock; + } + if (lo->lo_state != Lo_bound) { + err = -ENXIO; + goto out_unlock; + } + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) { + err = -EINVAL; + goto out_unlock; + } /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); err = loop_release_xfer(lo); if (err) - goto exit; + goto out_unfreeze; if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; if (type >= MAX_LO_CRYPT) { err = -EINVAL; - goto exit; + goto out_unfreeze; } xfer = xfer_funcs[type]; if (xfer == NULL) { err = -EINVAL; - goto exit; + goto out_unfreeze; } } else xfer = NULL; err = loop_init_xfer(lo, xfer, info); if (err) - goto exit; + goto out_unfreeze; if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; - goto exit; + goto out_unfreeze; } } @@ -1188,15 +1253,20 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); - exit: +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_flags |= LO_FLAGS_PARTSCAN; lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - loop_reread_partitions(lo, lo->lo_device); + bdev = lo->lo_device; + partscan = true; } +out_unlock: + mutex_unlock(&loop_ctl_mutex); + if (partscan) + loop_reread_partitions(lo, bdev); return err; } @@ -1204,12 +1274,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) static int loop_get_status(struct loop_device *lo, struct loop_info64 *info) { - struct file *file; + struct path path; struct kstat stat; int ret; + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; if (lo->lo_state != Lo_bound) { - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return -ENXIO; } @@ -1228,17 +1301,17 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) lo->lo_encrypt_key_size); } - /* Drop lo_ctl_mutex while we call into the filesystem. */ - file = get_file(lo->lo_backing_file); - mutex_unlock(&lo->lo_ctl_mutex); - ret = vfs_getattr(&file->f_path, &stat, STATX_INO, - AT_STATX_SYNC_AS_STAT); + /* Drop loop_ctl_mutex while we call into the filesystem. */ + path = lo->lo_backing_file->f_path; + path_get(&path); + mutex_unlock(&loop_ctl_mutex); + ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); if (!ret) { info->lo_device = huge_encode_dev(stat.dev); info->lo_inode = stat.ino; info->lo_rdevice = huge_encode_dev(stat.rdev); } - fput(file); + path_put(&path); return ret; } @@ -1322,10 +1395,8 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, &info64); if (!err) err = loop_info64_to_old(&info64, &info); @@ -1340,10 +1411,8 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, &info64); if (!err && copy_to_user(arg, &info64, sizeof(info64))) err = -EFAULT; @@ -1393,70 +1462,73 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) return 0; } +static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, + unsigned long arg) +{ + int err; + + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; + switch (cmd) { + case LOOP_SET_CAPACITY: + err = loop_set_capacity(lo); + break; + case LOOP_SET_DIRECT_IO: + err = loop_set_dio(lo, arg); + break; + case LOOP_SET_BLOCK_SIZE: + err = loop_set_block_size(lo, arg); + break; + default: + err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; + } + mutex_unlock(&loop_ctl_mutex); + return err; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct loop_device *lo = bdev->bd_disk->private_data; int err; - err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1); - if (err) - goto out_unlocked; - switch (cmd) { case LOOP_SET_FD: - err = loop_set_fd(lo, mode, bdev, arg); - break; + return loop_set_fd(lo, mode, bdev, arg); case LOOP_CHANGE_FD: - err = loop_change_fd(lo, bdev, arg); - break; + return loop_change_fd(lo, bdev, arg); case LOOP_CLR_FD: - /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ - err = loop_clr_fd(lo); - if (!err) - goto out_unlocked; - break; + return loop_clr_fd(lo); case LOOP_SET_STATUS: err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { err = loop_set_status_old(lo, (struct loop_info __user *)arg); + } break; case LOOP_GET_STATUS: - err = loop_get_status_old(lo, (struct loop_info __user *) arg); - /* loop_get_status() unlocks lo_ctl_mutex */ - goto out_unlocked; + return loop_get_status_old(lo, (struct loop_info __user *) arg); case LOOP_SET_STATUS64: err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { err = loop_set_status64(lo, (struct loop_info64 __user *) arg); + } break; case LOOP_GET_STATUS64: - err = loop_get_status64(lo, (struct loop_info64 __user *) arg); - /* loop_get_status() unlocks lo_ctl_mutex */ - goto out_unlocked; + return loop_get_status64(lo, (struct loop_info64 __user *) arg); case LOOP_SET_CAPACITY: - err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) - err = loop_set_capacity(lo); - break; case LOOP_SET_DIRECT_IO: - err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) - err = loop_set_dio(lo, arg); - break; case LOOP_SET_BLOCK_SIZE: - err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) - err = loop_set_block_size(lo, arg); - break; + if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* Fall through */ default: - err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; + err = lo_simple_ioctl(lo, cmd, arg); + break; } - mutex_unlock(&lo->lo_ctl_mutex); -out_unlocked: return err; } @@ -1570,10 +1642,8 @@ loop_get_status_compat(struct loop_device *lo, struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(&lo->lo_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, &info64); if (!err) err = loop_info64_to_compat(&info64, arg); @@ -1588,20 +1658,12 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, switch(cmd) { case LOOP_SET_STATUS: - err = mutex_lock_killable(&lo->lo_ctl_mutex); - if (!err) { - err = loop_set_status_compat(lo, - (const struct compat_loop_info __user *)arg); - mutex_unlock(&lo->lo_ctl_mutex); - } + err = loop_set_status_compat(lo, + (const struct compat_loop_info __user *)arg); break; case LOOP_GET_STATUS: - err = mutex_lock_killable(&lo->lo_ctl_mutex); - if (!err) { - err = loop_get_status_compat(lo, - (struct compat_loop_info __user *)arg); - /* loop_get_status() unlocks lo_ctl_mutex */ - } + err = loop_get_status_compat(lo, + (struct compat_loop_info __user *)arg); break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: @@ -1625,9 +1687,11 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { struct loop_device *lo; - int err = 0; + int err; - mutex_lock(&loop_index_mutex); + err = mutex_lock_killable(&loop_ctl_mutex); + if (err) + return err; lo = bdev->bd_disk->private_data; if (!lo) { err = -ENXIO; @@ -1636,26 +1700,30 @@ static int lo_open(struct block_device *bdev, fmode_t mode) atomic_inc(&lo->lo_refcnt); out: - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); return err; } -static void __lo_release(struct loop_device *lo) +static void lo_release(struct gendisk *disk, fmode_t mode) { - int err; + struct loop_device *lo; + mutex_lock(&loop_ctl_mutex); + lo = disk->private_data; if (atomic_dec_return(&lo->lo_refcnt)) - return; + goto out_unlock; - mutex_lock(&lo->lo_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { + if (lo->lo_state != Lo_bound) + goto out_unlock; + lo->lo_state = Lo_rundown; + mutex_unlock(&loop_ctl_mutex); /* * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - err = loop_clr_fd(lo); - if (!err) - return; + __loop_clr_fd(lo, true); + return; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, @@ -1665,14 +1733,8 @@ static void __lo_release(struct loop_device *lo) blk_mq_unfreeze_queue(lo->lo_queue); } - mutex_unlock(&lo->lo_ctl_mutex); -} - -static void lo_release(struct gendisk *disk, fmode_t mode) -{ - mutex_lock(&loop_index_mutex); - __lo_release(disk->private_data); - mutex_unlock(&loop_index_mutex); +out_unlock: + mutex_unlock(&loop_ctl_mutex); } static const struct block_device_operations lo_fops = { @@ -1711,10 +1773,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data) struct loop_device *lo = ptr; struct loop_func_table *xfer = data; - mutex_lock(&lo->lo_ctl_mutex); + mutex_lock(&loop_ctl_mutex); if (lo->lo_encryption == xfer) loop_release_xfer(lo); - mutex_unlock(&lo->lo_ctl_mutex); + mutex_unlock(&loop_ctl_mutex); return 0; } @@ -1759,8 +1821,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_css) { - cmd->css = rq->bio->bi_css; + if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { + cmd->css = &bio_blkcg(rq->bio)->css; css_get(cmd->css); } else #endif @@ -1895,7 +1957,6 @@ static int loop_add(struct loop_device **l, int i) if (!part_shift) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; - mutex_init(&lo->lo_ctl_mutex); atomic_set(&lo->lo_refcnt, 0); lo->lo_number = i; spin_lock_init(&lo->lo_lock); @@ -1974,7 +2035,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) struct kobject *kobj; int err; - mutex_lock(&loop_index_mutex); + mutex_lock(&loop_ctl_mutex); err = loop_lookup(&lo, MINOR(dev) >> part_shift); if (err < 0) err = loop_add(&lo, MINOR(dev) >> part_shift); @@ -1982,7 +2043,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) kobj = NULL; else kobj = get_disk_and_module(lo->lo_disk); - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); *part = 0; return kobj; @@ -1992,9 +2053,13 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { struct loop_device *lo; - int ret = -ENOSYS; + int ret; - mutex_lock(&loop_index_mutex); + ret = mutex_lock_killable(&loop_ctl_mutex); + if (ret) + return ret; + + ret = -ENOSYS; switch (cmd) { case LOOP_CTL_ADD: ret = loop_lookup(&lo, parm); @@ -2008,21 +2073,15 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; - ret = mutex_lock_killable(&lo->lo_ctl_mutex); - if (ret) - break; if (lo->lo_state != Lo_unbound) { ret = -EBUSY; - mutex_unlock(&lo->lo_ctl_mutex); break; } if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; - mutex_unlock(&lo->lo_ctl_mutex); break; } lo->lo_disk->private_data = NULL; - mutex_unlock(&lo->lo_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); break; @@ -2032,7 +2091,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; ret = loop_add(&lo, -1); } - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); return ret; } @@ -2116,10 +2175,10 @@ static int __init loop_init(void) THIS_MODULE, loop_probe, NULL, NULL); /* pre-create number of devices given by config or max_loop */ - mutex_lock(&loop_index_mutex); + mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) loop_add(&lo, i); - mutex_unlock(&loop_index_mutex); + mutex_unlock(&loop_ctl_mutex); printk(KERN_INFO "loop: module loaded\n"); return 0; diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 4d42c7af7de7..af75a5ee4094 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -54,7 +54,6 @@ struct loop_device { spinlock_t lo_lock; int lo_state; - struct mutex lo_ctl_mutex; struct kthread_worker worker; struct task_struct *worker_task; bool use_dio; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index a7daa8acbab3..2b0ac9d01e51 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -168,41 +168,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) return false; /* device present */ } -/* we have to use runtime tag to setup command header */ -static void mtip_init_cmd_header(struct request *rq) -{ - struct driver_data *dd = rq->q->queuedata; - struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - - /* Point the command headers at the command tables. */ - cmd->command_header = dd->port->command_list + - (sizeof(struct mtip_cmd_hdr) * rq->tag); - cmd->command_header_dma = dd->port->command_list_dma + - (sizeof(struct mtip_cmd_hdr) * rq->tag); - - if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags)) - cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); - - cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); -} - -static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) -{ - struct request *rq; - - if (mtip_check_surprise_removal(dd->pdev)) - return NULL; - - rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED); - if (IS_ERR(rq)) - return NULL; - - /* Internal cmd isn't submitted via .queue_rq */ - mtip_init_cmd_header(rq); - - return blk_mq_rq_to_pdu(rq); -} - static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd, unsigned int tag) { @@ -1023,13 +988,14 @@ static int mtip_exec_internal_command(struct mtip_port *port, return -EFAULT; } - int_cmd = mtip_get_int_command(dd); - if (!int_cmd) { + if (mtip_check_surprise_removal(dd->pdev)) + return -EFAULT; + + rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED); + if (IS_ERR(rq)) { dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n"); return -EFAULT; } - rq = blk_mq_rq_from_pdu(int_cmd); - rq->special = &icmd; set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1050,6 +1016,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, } /* Copy the command to the command table */ + int_cmd = blk_mq_rq_to_pdu(rq); + int_cmd->icmd = &icmd; memcpy(int_cmd->command, fis, fis_len*4); rq->timeout = timeout; @@ -1423,23 +1391,19 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, * @dd pointer to driver_data structure * @lba starting lba * @len # of 512b sectors to trim - * - * return value - * -ENOMEM Out of dma memory - * -EINVAL Invalid parameters passed in, trim not supported - * -EIO Error submitting trim request to hw */ -static int mtip_send_trim(struct driver_data *dd, unsigned int lba, - unsigned int len) +static blk_status_t mtip_send_trim(struct driver_data *dd, unsigned int lba, + unsigned int len) { - int i, rv = 0; u64 tlba, tlen, sect_left; struct mtip_trim_entry *buf; dma_addr_t dma_addr; struct host_to_dev_fis fis; + blk_status_t ret = BLK_STS_OK; + int i; if (!len || dd->trim_supp == false) - return -EINVAL; + return BLK_STS_IOERR; /* Trim request too big */ WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES)); @@ -1454,7 +1418,7 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr, GFP_KERNEL); if (!buf) - return -ENOMEM; + return BLK_STS_RESOURCE; memset(buf, 0, ATA_SECT_SIZE); for (i = 0, sect_left = len, tlba = lba; @@ -1463,8 +1427,8 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ? MTIP_MAX_TRIM_ENTRY_LEN : sect_left); - buf[i].lba = __force_bit2int cpu_to_le32(tlba); - buf[i].range = __force_bit2int cpu_to_le16(tlen); + buf[i].lba = cpu_to_le32(tlba); + buf[i].range = cpu_to_le16(tlen); tlba += tlen; sect_left -= tlen; } @@ -1486,10 +1450,10 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, ATA_SECT_SIZE, 0, MTIP_TRIM_TIMEOUT_MS) < 0) - rv = -EIO; + ret = BLK_STS_IOERR; dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr); - return rv; + return ret; } /* @@ -1585,23 +1549,20 @@ static inline void fill_command_sg(struct driver_data *dd, int n; unsigned int dma_len; struct mtip_cmd_sg *command_sg; - struct scatterlist *sg = command->sg; + struct scatterlist *sg; command_sg = command->command + AHCI_CMD_TBL_HDR_SZ; - for (n = 0; n < nents; n++) { + for_each_sg(command->sg, sg, nents, n) { dma_len = sg_dma_len(sg); if (dma_len > 0x400000) dev_err(&dd->pdev->dev, "DMA segment length truncated\n"); - command_sg->info = __force_bit2int - cpu_to_le32((dma_len-1) & 0x3FFFFF); - command_sg->dba = __force_bit2int - cpu_to_le32(sg_dma_address(sg)); - command_sg->dba_upper = __force_bit2int + command_sg->info = cpu_to_le32((dma_len-1) & 0x3FFFFF); + command_sg->dba = cpu_to_le32(sg_dma_address(sg)); + command_sg->dba_upper = cpu_to_le32((sg_dma_address(sg) >> 16) >> 16); command_sg++; - sg++; } } @@ -2171,7 +2132,6 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, * @dd Pointer to the driver data structure. * @start First sector to read. * @nsect Number of sectors to read. - * @nents Number of entries in scatter list for the read command. * @tag The tag of this read command. * @callback Pointer to the function that should be called * when the read completes. @@ -2183,16 +2143,20 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, * None */ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, - struct mtip_cmd *command, int nents, + struct mtip_cmd *command, struct blk_mq_hw_ctx *hctx) { + struct mtip_cmd_hdr *hdr = + dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag; struct host_to_dev_fis *fis; struct mtip_port *port = dd->port; int dma_dir = rq_data_dir(rq) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE; u64 start = blk_rq_pos(rq); unsigned int nsect = blk_rq_sectors(rq); + unsigned int nents; /* Map the scatter list for DMA access */ + nents = blk_rq_map_sg(hctx->queue, rq, command->sg); nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); prefetch(&port->flags); @@ -2233,10 +2197,11 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, fis->device |= 1 << 7; /* Populate the command header */ - command->command_header->opts = - __force_bit2int cpu_to_le32( - (nents << 16) | 5 | AHCI_CMD_PREFETCH); - command->command_header->byte_count = 0; + hdr->ctba = cpu_to_le32(command->command_dma & 0xFFFFFFFF); + if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags)) + hdr->ctbau = cpu_to_le32((command->command_dma >> 16) >> 16); + hdr->opts = cpu_to_le32((nents << 16) | 5 | AHCI_CMD_PREFETCH); + hdr->byte_count = 0; command->direction = dma_dir; @@ -2720,7 +2685,7 @@ static void mtip_softirq_done_fn(struct request *rq) blk_mq_end_request(rq, cmd->status); } -static void mtip_abort_cmd(struct request *req, void *data, bool reserved) +static bool mtip_abort_cmd(struct request *req, void *data, bool reserved) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; @@ -2730,14 +2695,16 @@ static void mtip_abort_cmd(struct request *req, void *data, bool reserved) clear_bit(req->tag, dd->port->cmds_to_issue); cmd->status = BLK_STS_IOERR; mtip_softirq_done_fn(req); + return true; } -static void mtip_queue_cmd(struct request *req, void *data, bool reserved) +static bool mtip_queue_cmd(struct request *req, void *data, bool reserved) { struct driver_data *dd = data; set_bit(req->tag, dd->port->cmds_to_issue); blk_abort_request(req); + return true; } /* @@ -2803,10 +2770,7 @@ restart_eh: blk_mq_quiesce_queue(dd->queue); - spin_lock(dd->queue->queue_lock); - blk_mq_tagset_busy_iter(&dd->tags, - mtip_queue_cmd, dd); - spin_unlock(dd->queue->queue_lock); + blk_mq_tagset_busy_iter(&dd->tags, mtip_queue_cmd, dd); set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags); @@ -3531,58 +3495,24 @@ static inline bool is_se_active(struct driver_data *dd) return false; } -/* - * Block layer make request function. - * - * This function is called by the kernel to process a BIO for - * the P320 device. - * - * @queue Pointer to the request queue. Unused other than to obtain - * the driver data structure. - * @rq Pointer to the request. - * - */ -static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) +static inline bool is_stopped(struct driver_data *dd, struct request *rq) { - struct driver_data *dd = hctx->queue->queuedata; - struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - unsigned int nents; - - if (is_se_active(dd)) - return -ENODATA; - - if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { - if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &dd->dd_flag))) { - return -ENXIO; - } - if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) { - return -ENODATA; - } - if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT, - &dd->dd_flag) && - rq_data_dir(rq))) { - return -ENODATA; - } - if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) || - test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))) - return -ENODATA; - } - - if (req_op(rq) == REQ_OP_DISCARD) { - int err; - - err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); - blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK); - return 0; - } + if (likely(!(dd->dd_flag & MTIP_DDF_STOP_IO))) + return false; - /* Create the scatter list for this request. */ - nents = blk_rq_map_sg(hctx->queue, rq, cmd->sg); + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) + return true; + if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag)) + return true; + if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag) && + rq_data_dir(rq)) + return true; + if (test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) + return true; + if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) + return true; - /* Issue the read/write. */ - mtip_hw_submit_io(dd, rq, cmd, nents, hctx); - return 0; + return false; } static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, @@ -3613,32 +3543,33 @@ static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, struct request *rq) { struct driver_data *dd = hctx->queue->queuedata; - struct mtip_int_cmd *icmd = rq->special; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct mtip_int_cmd *icmd = cmd->icmd; + struct mtip_cmd_hdr *hdr = + dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag; struct mtip_cmd_sg *command_sg; if (mtip_commands_active(dd->port)) return BLK_STS_RESOURCE; + hdr->ctba = cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); + if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags)) + hdr->ctbau = cpu_to_le32((cmd->command_dma >> 16) >> 16); /* Populate the SG list */ - cmd->command_header->opts = - __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len); + hdr->opts = cpu_to_le32(icmd->opts | icmd->fis_len); if (icmd->buf_len) { command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ; - command_sg->info = - __force_bit2int cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF); - command_sg->dba = - __force_bit2int cpu_to_le32(icmd->buffer & 0xFFFFFFFF); + command_sg->info = cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF); + command_sg->dba = cpu_to_le32(icmd->buffer & 0xFFFFFFFF); command_sg->dba_upper = - __force_bit2int cpu_to_le32((icmd->buffer >> 16) >> 16); + cpu_to_le32((icmd->buffer >> 16) >> 16); - cmd->command_header->opts |= - __force_bit2int cpu_to_le32((1 << 16)); + hdr->opts |= cpu_to_le32((1 << 16)); } /* Populate the command header */ - cmd->command_header->byte_count = 0; + hdr->byte_count = 0; blk_mq_start_request(rq); mtip_issue_non_ncq_command(dd->port, rq->tag); @@ -3648,10 +3579,9 @@ static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { + struct driver_data *dd = hctx->queue->queuedata; struct request *rq = bd->rq; - int ret; - - mtip_init_cmd_header(rq); + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); if (blk_rq_is_passthrough(rq)) return mtip_issue_reserved_cmd(hctx, rq); @@ -3659,12 +3589,15 @@ static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(mtip_check_unal_depth(hctx, rq))) return BLK_STS_RESOURCE; + if (is_se_active(dd) || is_stopped(dd, rq)) + return BLK_STS_IOERR; + blk_mq_start_request(rq); - ret = mtip_submit_request(hctx, rq); - if (likely(!ret)) - return BLK_STS_OK; - return BLK_STS_IOERR; + if (req_op(rq) == REQ_OP_DISCARD) + return mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); + mtip_hw_submit_io(dd, rq, cmd, hctx); + return BLK_STS_OK; } static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq, @@ -3920,12 +3853,13 @@ protocol_init_error: return rv; } -static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) +static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(rq); + return true; } /* diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index e20e55dab443..c33f8c3d9fb4 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -126,8 +126,6 @@ #define MTIP_DFS_MAX_BUF_SIZE 1024 -#define __force_bit2int (unsigned int __force) - enum { /* below are bit numbers in 'flags' defined in mtip_port */ MTIP_PF_IC_ACTIVE_BIT = 0, /* pio/ioctl */ @@ -174,10 +172,10 @@ enum { struct smart_attr { u8 attr_id; - u16 flags; + __le16 flags; u8 cur; u8 worst; - u32 data; + __le32 data; u8 res[3]; } __packed; @@ -200,9 +198,9 @@ struct mtip_work { #define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 struct mtip_trim_entry { - u32 lba; /* starting lba of region */ - u16 rsvd; /* unused */ - u16 range; /* # of 512b blocks to trim */ + __le32 lba; /* starting lba of region */ + __le16 rsvd; /* unused */ + __le16 range; /* # of 512b blocks to trim */ } __packed; struct mtip_trim { @@ -278,24 +276,24 @@ struct mtip_cmd_hdr { * - Bit 5 Unused in this implementation. * - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes). */ - unsigned int opts; + __le32 opts; /* This field is unsed when using NCQ. */ union { - unsigned int byte_count; - unsigned int status; + __le32 byte_count; + __le32 status; }; /* * Lower 32 bits of the command table address associated with this * header. The command table addresses must be 128 byte aligned. */ - unsigned int ctba; + __le32 ctba; /* * If 64 bit addressing is used this field is the upper 32 bits * of the command table address associated with this command. */ - unsigned int ctbau; + __le32 ctbau; /* Reserved and unused. */ - unsigned int res[4]; + u32 res[4]; }; /* Command scatter gather structure (PRD). */ @@ -305,31 +303,28 @@ struct mtip_cmd_sg { * address must be 8 byte aligned signified by bits 2:0 being * set to 0. */ - unsigned int dba; + __le32 dba; /* * When 64 bit addressing is used this field is the upper * 32 bits of the data buffer address. */ - unsigned int dba_upper; + __le32 dba_upper; /* Unused. */ - unsigned int reserved; + __le32 reserved; /* * Bit 31: interrupt when this data block has been transferred. * Bits 30..22: reserved * Bits 21..0: byte count (minus 1). For P320 the byte count must be * 8 byte aligned signified by bits 2:0 being set to 1. */ - unsigned int info; + __le32 info; }; struct mtip_port; +struct mtip_int_cmd; + /* Structure used to describe a command. */ struct mtip_cmd { - - struct mtip_cmd_hdr *command_header; /* ptr to command header entry */ - - dma_addr_t command_header_dma; /* corresponding physical address */ - void *command; /* ptr to command table entry */ dma_addr_t command_dma; /* corresponding physical address */ @@ -338,7 +333,10 @@ struct mtip_cmd { int unaligned; /* command is unaligned on 4k boundary */ - struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */ + union { + struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */ + struct mtip_int_cmd *icmd; + }; int retries; /* The number of retries left for this command. */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4d4d6129ff66..08696f5f00bb 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -734,12 +734,13 @@ static void recv_work(struct work_struct *work) kfree(args); } -static void nbd_clear_req(struct request *req, void *data, bool reserved) +static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(req); + return true; } static void nbd_clear_que(struct nbd_device *nbd) diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index 7685df43f1ef..b3df2793e7cd 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -49,6 +49,7 @@ struct nullb_device { unsigned long completion_nsec; /* time in ns to complete a request */ unsigned long cache_size; /* disk cache size in MB */ unsigned long zone_size; /* zone size in MB if device is zoned */ + unsigned int zone_nr_conv; /* number of conventional zones */ unsigned int submit_queues; /* number of submission queues */ unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 09339203dfba..62c9654b9ce8 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -188,6 +188,10 @@ static unsigned long g_zone_size = 256; module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); +static unsigned int g_zone_nr_conv; +module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444); +MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0"); + static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); @@ -293,6 +297,7 @@ NULLB_DEVICE_ATTR(mbps, uint); NULLB_DEVICE_ATTR(cache_size, ulong); NULLB_DEVICE_ATTR(zoned, bool); NULLB_DEVICE_ATTR(zone_size, ulong); +NULLB_DEVICE_ATTR(zone_nr_conv, uint); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -407,6 +412,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_badblocks, &nullb_device_attr_zoned, &nullb_device_attr_zone_size, + &nullb_device_attr_zone_nr_conv, NULL, }; @@ -520,6 +526,7 @@ static struct nullb_device *null_alloc_dev(void) dev->use_per_node_hctx = g_use_per_node_hctx; dev->zoned = g_zoned; dev->zone_size = g_zone_size; + dev->zone_nr_conv = g_zone_nr_conv; return dev; } @@ -635,14 +642,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL); } -static void null_softirq_done_fn(struct request *rq) +static void null_complete_rq(struct request *rq) { - struct nullb *nullb = rq->q->queuedata; - - if (nullb->dev->queue_mode == NULL_Q_MQ) - end_cmd(blk_mq_rq_to_pdu(rq)); - else - end_cmd(rq->special); + end_cmd(blk_mq_rq_to_pdu(rq)); } static struct nullb_page *null_alloc_page(gfp_t gfp_flags) @@ -1350,7 +1352,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, static const struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, - .complete = null_softirq_done_fn, + .complete = null_complete_rq, .timeout = null_timeout_rq, }; @@ -1657,8 +1659,7 @@ static int null_add_dev(struct nullb_device *dev) } null_init_queues(nullb); } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node, - NULL); + nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index c0b0e4a3fa8f..5d1c261a2cfd 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -29,7 +29,25 @@ int null_zone_init(struct nullb_device *dev) if (!dev->zones) return -ENOMEM; - for (i = 0; i < dev->nr_zones; i++) { + if (dev->zone_nr_conv >= dev->nr_zones) { + dev->zone_nr_conv = dev->nr_zones - 1; + pr_info("null_blk: changed the number of conventional zones to %u", + dev->zone_nr_conv); + } + + for (i = 0; i < dev->zone_nr_conv; i++) { + struct blk_zone *zone = &dev->zones[i]; + + zone->start = sector; + zone->len = dev->zone_size_sects; + zone->wp = zone->start + zone->len; + zone->type = BLK_ZONE_TYPE_CONVENTIONAL; + zone->cond = BLK_ZONE_COND_NOT_WP; + + sector += dev->zone_size_sects; + } + + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { struct blk_zone *zone = &dev->zones[i]; zone->start = zone->wp = sector; @@ -98,6 +116,8 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector, if (zone->wp == zone->start + zone->len) zone->cond = BLK_ZONE_COND_FULL; break; + case BLK_ZONE_COND_NOT_WP: + break; default: /* Invalid zone condition */ cmd->error = BLK_STS_IOERR; @@ -111,6 +131,11 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) unsigned int zno = null_zone_no(dev, sector); struct blk_zone *zone = &dev->zones[zno]; + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { + cmd->error = BLK_STS_IOERR; + return; + } + zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index ae4971e5d9a8..0ff9b12d0e35 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -242,6 +242,11 @@ struct pd_unit { static struct pd_unit pd[PD_UNITS]; +struct pd_req { + /* for REQ_OP_DRV_IN: */ + enum action (*func)(struct pd_unit *disk); +}; + static char pd_scratch[512]; /* scratch block buffer */ static char *pd_errs[17] = { "ERR", "INDEX", "ECC", "DRQ", "SEEK", "WRERR", @@ -502,8 +507,9 @@ static enum action do_pd_io_start(void) static enum action pd_special(void) { - enum action (*func)(struct pd_unit *) = pd_req->special; - return func(pd_current); + struct pd_req *req = blk_mq_rq_to_pdu(pd_req); + + return req->func(pd_current); } static int pd_next_buf(void) @@ -767,12 +773,14 @@ static int pd_special_command(struct pd_unit *disk, enum action (*func)(struct pd_unit *disk)) { struct request *rq; + struct pd_req *req; rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); + req = blk_mq_rq_to_pdu(rq); - rq->special = func; + req->func = func; blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); blk_put_request(rq); return 0; @@ -892,9 +900,21 @@ static void pd_probe_drive(struct pd_unit *disk) disk->gd = p; p->private_data = disk; - p->queue = blk_mq_init_sq_queue(&disk->tag_set, &pd_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + memset(&disk->tag_set, 0, sizeof(disk->tag_set)); + disk->tag_set.ops = &pd_mq_ops; + disk->tag_set.cmd_size = sizeof(struct pd_req); + disk->tag_set.nr_hw_queues = 1; + disk->tag_set.nr_maps = 1; + disk->tag_set.queue_depth = 2; + disk->tag_set.numa_node = NUMA_NO_NODE; + disk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + + if (blk_mq_alloc_tag_set(&disk->tag_set)) + return; + + p->queue = blk_mq_init_queue(&disk->tag_set); if (IS_ERR(p->queue)) { + blk_mq_free_tag_set(&disk->tag_set); p->queue = NULL; return; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 9381f4e3b221..f5a71023f76c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2203,9 +2203,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * Some CDRW drives can not handle writes larger than one packet, * even if the size is a multiple of the packet size. */ - spin_lock_irq(q->queue_lock); blk_queue_max_hw_sectors(q, pd->settings.size); - spin_unlock_irq(q->queue_lock); set_bit(PACKET_WRITABLE, &pd->flags); } else { pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 2459dcc04b1c..a10d5736d8f7 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -181,6 +181,7 @@ struct skd_request_context { struct fit_completion_entry_v1 completion; struct fit_comp_error_info err_info; + int retries; blk_status_t status; }; @@ -382,11 +383,12 @@ static void skd_log_skreq(struct skd_device *skdev, * READ/WRITE REQUESTS ***************************************************************************** */ -static void skd_inc_in_flight(struct request *rq, void *data, bool reserved) +static bool skd_inc_in_flight(struct request *rq, void *data, bool reserved) { int *count = data; count++; + return true; } static int skd_in_flight(struct skd_device *skdev) @@ -494,6 +496,11 @@ static blk_status_t skd_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(skdev->state != SKD_DRVR_STATE_ONLINE)) return skd_fail_all(q) ? BLK_STS_IOERR : BLK_STS_RESOURCE; + if (!(req->rq_flags & RQF_DONTPREP)) { + skreq->retries = 0; + req->rq_flags |= RQF_DONTPREP; + } + blk_mq_start_request(req); WARN_ONCE(tag >= skd_max_queue_depth, "%#x > %#x (nr_requests = %lu)\n", @@ -1425,7 +1432,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, break; case SKD_CHECK_STATUS_REQUEUE_REQUEST: - if ((unsigned long) ++req->special < SKD_MAX_RETRIES) { + if (++skreq->retries < SKD_MAX_RETRIES) { skd_log_skreq(skdev, skreq, "retry"); blk_mq_requeue_request(req, true); break; @@ -1887,13 +1894,13 @@ static void skd_isr_fwstate(struct skd_device *skdev) skd_skdev_state_to_str(skdev->state), skdev->state); } -static void skd_recover_request(struct request *req, void *data, bool reserved) +static bool skd_recover_request(struct request *req, void *data, bool reserved) { struct skd_device *const skdev = data; struct skd_request_context *skreq = blk_mq_rq_to_pdu(req); if (skreq->state != SKD_REQ_STATE_BUSY) - return; + return true; skd_log_skreq(skdev, skreq, "recover"); @@ -1904,6 +1911,7 @@ static void skd_recover_request(struct request *req, void *data, bool reserved) skreq->state = SKD_REQ_STATE_IDLE; skreq->status = BLK_STS_IOERR; blk_mq_complete_request(req); + return true; } static void skd_recover_requests(struct skd_device *skdev) diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index b54fa6726303..9c0553dd13e7 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -6,7 +6,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/hdreg.h> #include <linux/genhd.h> #include <linux/cdrom.h> @@ -45,6 +45,8 @@ MODULE_VERSION(DRV_MODULE_VERSION); #define WAITING_FOR_GEN_CMD 0x04 #define WAITING_FOR_ANY -1 +#define VDC_MAX_RETRIES 10 + static struct workqueue_struct *sunvdc_wq; struct vdc_req_entry { @@ -66,9 +68,10 @@ struct vdc_port { u64 max_xfer_size; u32 vdisk_block_size; + u32 drain; u64 ldc_timeout; - struct timer_list ldc_reset_timer; + struct delayed_work ldc_reset_timer_work; struct work_struct ldc_reset_work; /* The server fills these in for us in the disk attribute @@ -80,12 +83,14 @@ struct vdc_port { u8 vdisk_mtype; u32 vdisk_phys_blksz; + struct blk_mq_tag_set tag_set; + char disk_name[32]; }; static void vdc_ldc_reset(struct vdc_port *port); static void vdc_ldc_reset_work(struct work_struct *work); -static void vdc_ldc_reset_timer(struct timer_list *t); +static void vdc_ldc_reset_timer_work(struct work_struct *work); static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) { @@ -175,11 +180,8 @@ static void vdc_blk_queue_start(struct vdc_port *port) * handshake completes, so check for initial handshake before we've * allocated a disk. */ - if (port->disk && blk_queue_stopped(port->disk->queue) && - vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) { - blk_start_queue(port->disk->queue); - } - + if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) + blk_mq_start_hw_queues(port->disk->queue); } static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for) @@ -197,7 +199,7 @@ static void vdc_handshake_complete(struct vio_driver_state *vio) { struct vdc_port *port = to_vdc_port(vio); - del_timer(&port->ldc_reset_timer); + cancel_delayed_work(&port->ldc_reset_timer_work); vdc_finish(vio, 0, WAITING_FOR_LINK_UP); vdc_blk_queue_start(port); } @@ -320,7 +322,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr, rqe->req = NULL; - __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size); + blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0); vdc_blk_queue_start(port); } @@ -431,6 +433,7 @@ static int __vdc_tx_trigger(struct vdc_port *port) .end_idx = dr->prod, }; int err, delay; + int retries = 0; hdr.seq = dr->snd_nxt; delay = 1; @@ -443,6 +446,8 @@ static int __vdc_tx_trigger(struct vdc_port *port) udelay(delay); if ((delay <<= 1) > 128) delay = 128; + if (retries++ > VDC_MAX_RETRIES) + break; } while (err == -EAGAIN); if (err == -ENOTCONN) @@ -525,29 +530,40 @@ static int __send_request(struct request *req) return err; } -static void do_vdc_request(struct request_queue *rq) +static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; + struct vdc_port *port = hctx->queue->queuedata; + struct vio_dring_state *dr; + unsigned long flags; - while ((req = blk_peek_request(rq)) != NULL) { - struct vdc_port *port; - struct vio_dring_state *dr; + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - port = req->rq_disk->private_data; - dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - if (unlikely(vdc_tx_dring_avail(dr) < 1)) - goto wait; + blk_mq_start_request(bd->rq); - blk_start_request(req); + spin_lock_irqsave(&port->vio.lock, flags); - if (__send_request(req) < 0) { - blk_requeue_request(rq, req); -wait: - /* Avoid pointless unplugs. */ - blk_stop_queue(rq); - break; - } + /* + * Doing drain, just end the request in error + */ + if (unlikely(port->drain)) { + spin_unlock_irqrestore(&port->vio.lock, flags); + return BLK_STS_IOERR; + } + + if (unlikely(vdc_tx_dring_avail(dr) < 1)) { + spin_unlock_irqrestore(&port->vio.lock, flags); + blk_mq_stop_hw_queue(hctx); + return BLK_STS_DEV_RESOURCE; + } + + if (__send_request(bd->rq) < 0) { + spin_unlock_irqrestore(&port->vio.lock, flags); + return BLK_STS_IOERR; } + + spin_unlock_irqrestore(&port->vio.lock, flags); + return BLK_STS_OK; } static int generic_request(struct vdc_port *port, u8 op, void *buf, int len) @@ -759,6 +775,31 @@ static void vdc_port_down(struct vdc_port *port) vio_ldc_free(&port->vio); } +static const struct blk_mq_ops vdc_mq_ops = { + .queue_rq = vdc_queue_rq, +}; + +static void cleanup_queue(struct request_queue *q) +{ + struct vdc_port *port = q->queuedata; + + blk_cleanup_queue(q); + blk_mq_free_tag_set(&port->tag_set); +} + +static struct request_queue *init_queue(struct vdc_port *port) +{ + struct request_queue *q; + + q = blk_mq_init_sq_queue(&port->tag_set, &vdc_mq_ops, VDC_TX_RING_SIZE, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(q)) + return q; + + q->queuedata = port; + return q; +} + static int probe_disk(struct vdc_port *port) { struct request_queue *q; @@ -796,17 +837,17 @@ static int probe_disk(struct vdc_port *port) (u64)geom.num_sec); } - q = blk_init_queue(do_vdc_request, &port->vio.lock); - if (!q) { + q = init_queue(port); + if (IS_ERR(q)) { printk(KERN_ERR PFX "%s: Could not allocate queue.\n", port->vio.name); - return -ENOMEM; + return PTR_ERR(q); } g = alloc_disk(1 << PARTITION_SHIFT); if (!g) { printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n", port->vio.name); - blk_cleanup_queue(q); + cleanup_queue(q); return -ENOMEM; } @@ -981,7 +1022,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) */ ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL); port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0; - timer_setup(&port->ldc_reset_timer, vdc_ldc_reset_timer, 0); + INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work); INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work); err = vio_driver_init(&port->vio, vdev, VDEV_DISK, @@ -1034,18 +1075,14 @@ static int vdc_port_remove(struct vio_dev *vdev) struct vdc_port *port = dev_get_drvdata(&vdev->dev); if (port) { - unsigned long flags; - - spin_lock_irqsave(&port->vio.lock, flags); - blk_stop_queue(port->disk->queue); - spin_unlock_irqrestore(&port->vio.lock, flags); + blk_mq_stop_hw_queues(port->disk->queue); flush_work(&port->ldc_reset_work); - del_timer_sync(&port->ldc_reset_timer); + cancel_delayed_work_sync(&port->ldc_reset_timer_work); del_timer_sync(&port->vio.timer); del_gendisk(port->disk); - blk_cleanup_queue(port->disk->queue); + cleanup_queue(port->disk->queue); put_disk(port->disk); port->disk = NULL; @@ -1080,32 +1117,46 @@ static void vdc_requeue_inflight(struct vdc_port *port) } rqe->req = NULL; - blk_requeue_request(port->disk->queue, req); + blk_mq_requeue_request(req, false); } } static void vdc_queue_drain(struct vdc_port *port) { - struct request *req; + struct request_queue *q = port->disk->queue; + + /* + * Mark the queue as draining, then freeze/quiesce to ensure + * that all existing requests are seen in ->queue_rq() and killed + */ + port->drain = 1; + spin_unlock_irq(&port->vio.lock); - while ((req = blk_fetch_request(port->disk->queue)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + spin_lock_irq(&port->vio.lock); + port->drain = 0; + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); } -static void vdc_ldc_reset_timer(struct timer_list *t) +static void vdc_ldc_reset_timer_work(struct work_struct *work) { - struct vdc_port *port = from_timer(port, t, ldc_reset_timer); - struct vio_driver_state *vio = &port->vio; - unsigned long flags; + struct vdc_port *port; + struct vio_driver_state *vio; - spin_lock_irqsave(&vio->lock, flags); + port = container_of(work, struct vdc_port, ldc_reset_timer_work.work); + vio = &port->vio; + + spin_lock_irq(&vio->lock); if (!(port->vio.hs_state & VIO_HS_COMPLETE)) { pr_warn(PFX "%s ldc down %llu seconds, draining queue\n", port->disk_name, port->ldc_timeout); vdc_queue_drain(port); vdc_blk_queue_start(port); } - spin_unlock_irqrestore(&vio->lock, flags); + spin_unlock_irq(&vio->lock); } static void vdc_ldc_reset_work(struct work_struct *work) @@ -1129,7 +1180,7 @@ static void vdc_ldc_reset(struct vdc_port *port) assert_spin_locked(&port->vio.lock); pr_warn(PFX "%s ldc link reset\n", port->disk_name); - blk_stop_queue(port->disk->queue); + blk_mq_stop_hw_queues(port->disk->queue); vdc_requeue_inflight(port); vdc_port_down(port); @@ -1146,7 +1197,7 @@ static void vdc_ldc_reset(struct vdc_port *port) } if (port->ldc_timeout) - mod_timer(&port->ldc_reset_timer, + mod_delayed_work(system_wq, &port->ldc_reset_timer_work, round_jiffies(jiffies + HZ * port->ldc_timeout)); mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ)); return; diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 064b8c5c7a32..4478eb7efee0 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -243,7 +243,6 @@ struct carm_port { unsigned int port_no; struct gendisk *disk; struct carm_host *host; - struct blk_mq_tag_set tag_set; /* attached device characteristics */ u64 capacity; @@ -254,13 +253,10 @@ struct carm_port { }; struct carm_request { - unsigned int tag; int n_elem; unsigned int msg_type; unsigned int msg_subtype; unsigned int msg_bucket; - struct request *rq; - struct carm_port *port; struct scatterlist sg[CARM_MAX_REQ_SG]; }; @@ -291,9 +287,6 @@ struct carm_host { unsigned int wait_q_cons; struct request_queue *wait_q[CARM_MAX_WAIT_Q]; - unsigned int n_msgs; - u64 msg_alloc; - struct carm_request req[CARM_MAX_REQ]; void *msg_base; dma_addr_t msg_dma; @@ -478,10 +471,10 @@ static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host, } static int carm_send_msg(struct carm_host *host, - struct carm_request *crq) + struct carm_request *crq, unsigned tag) { void __iomem *mmio = host->mmio; - u32 msg = (u32) carm_ref_msg_dma(host, crq->tag); + u32 msg = (u32) carm_ref_msg_dma(host, tag); u32 cm_bucket = crq->msg_bucket; u32 tmp; int rc = 0; @@ -506,99 +499,24 @@ static int carm_send_msg(struct carm_host *host, return rc; } -static struct carm_request *carm_get_request(struct carm_host *host) -{ - unsigned int i; - - /* obey global hardware limit on S/G entries */ - if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG)) - return NULL; - - for (i = 0; i < max_queue; i++) - if ((host->msg_alloc & (1ULL << i)) == 0) { - struct carm_request *crq = &host->req[i]; - crq->port = NULL; - crq->n_elem = 0; - - host->msg_alloc |= (1ULL << i); - host->n_msgs++; - - assert(host->n_msgs <= CARM_MAX_REQ); - sg_init_table(crq->sg, CARM_MAX_REQ_SG); - return crq; - } - - DPRINTK("no request available, returning NULL\n"); - return NULL; -} - -static int carm_put_request(struct carm_host *host, struct carm_request *crq) -{ - assert(crq->tag < max_queue); - - if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0)) - return -EINVAL; /* tried to clear a tag that was not active */ - - assert(host->hw_sg_used >= crq->n_elem); - - host->msg_alloc &= ~(1ULL << crq->tag); - host->hw_sg_used -= crq->n_elem; - host->n_msgs--; - - return 0; -} - -static struct carm_request *carm_get_special(struct carm_host *host) -{ - unsigned long flags; - struct carm_request *crq = NULL; - struct request *rq; - int tries = 5000; - - while (tries-- > 0) { - spin_lock_irqsave(&host->lock, flags); - crq = carm_get_request(host); - spin_unlock_irqrestore(&host->lock, flags); - - if (crq) - break; - msleep(10); - } - - if (!crq) - return NULL; - - rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0); - if (IS_ERR(rq)) { - spin_lock_irqsave(&host->lock, flags); - carm_put_request(host, crq); - spin_unlock_irqrestore(&host->lock, flags); - return NULL; - } - - crq->rq = rq; - return crq; -} - static int carm_array_info (struct carm_host *host, unsigned int array_idx) { struct carm_msg_ioctl *ioc; - unsigned int idx; u32 msg_data; dma_addr_t msg_dma; struct carm_request *crq; + struct request *rq; int rc; - crq = carm_get_special(host); - if (!crq) { + rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0); + if (IS_ERR(rq)) { rc = -ENOMEM; goto err_out; } + crq = blk_mq_rq_to_pdu(rq); - idx = crq->tag; - - ioc = carm_ref_msg(host, idx); - msg_dma = carm_ref_msg_dma(host, idx); + ioc = carm_ref_msg(host, rq->tag); + msg_dma = carm_ref_msg_dma(host, rq->tag); msg_data = (u32) (msg_dma + sizeof(struct carm_array_info)); crq->msg_type = CARM_MSG_ARRAY; @@ -612,7 +530,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx) ioc->type = CARM_MSG_ARRAY; ioc->subtype = CARM_ARRAY_INFO; ioc->array_id = (u8) array_idx; - ioc->handle = cpu_to_le32(TAG_ENCODE(idx)); + ioc->handle = cpu_to_le32(TAG_ENCODE(rq->tag)); ioc->data_addr = cpu_to_le32(msg_data); spin_lock_irq(&host->lock); @@ -620,9 +538,8 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx) host->state == HST_DEV_SCAN); spin_unlock_irq(&host->lock); - DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->special = crq; - blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); + DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag); + blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL); return 0; @@ -637,21 +554,21 @@ typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *); static int carm_send_special (struct carm_host *host, carm_sspc_t func) { + struct request *rq; struct carm_request *crq; struct carm_msg_ioctl *ioc; void *mem; - unsigned int idx, msg_size; + unsigned int msg_size; int rc; - crq = carm_get_special(host); - if (!crq) + rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0); + if (IS_ERR(rq)) return -ENOMEM; + crq = blk_mq_rq_to_pdu(rq); - idx = crq->tag; + mem = carm_ref_msg(host, rq->tag); - mem = carm_ref_msg(host, idx); - - msg_size = func(host, idx, mem); + msg_size = func(host, rq->tag, mem); ioc = mem; crq->msg_type = ioc->type; @@ -660,9 +577,8 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func) BUG_ON(rc < 0); crq->msg_bucket = (u32) rc; - DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->special = crq; - blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); + DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag); + blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL); return 0; } @@ -744,19 +660,6 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host, sizeof(struct carm_fw_ver); } -static inline void carm_end_request_queued(struct carm_host *host, - struct carm_request *crq, - blk_status_t error) -{ - struct request *req = crq->rq; - int rc; - - blk_mq_end_request(req, error); - - rc = carm_put_request(host, crq); - assert(rc == 0); -} - static inline void carm_push_q (struct carm_host *host, struct request_queue *q) { unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q; @@ -791,101 +694,50 @@ static inline void carm_round_robin(struct carm_host *host) } } -static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq, - blk_status_t error) -{ - carm_end_request_queued(host, crq, error); - if (max_queue == 1) - carm_round_robin(host); - else if ((host->n_msgs <= CARM_MSG_LOW_WATER) && - (host->hw_sg_used <= CARM_SG_LOW_WATER)) { - carm_round_robin(host); - } -} - -static blk_status_t carm_oob_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) +static inline enum dma_data_direction carm_rq_dir(struct request *rq) { - struct request_queue *q = hctx->queue; - struct carm_host *host = q->queuedata; - struct carm_request *crq; - int rc; - - blk_mq_start_request(bd->rq); - - spin_lock_irq(&host->lock); - - crq = bd->rq->special; - assert(crq != NULL); - assert(crq->rq == bd->rq); - - crq->n_elem = 0; - - DPRINTK("send req\n"); - rc = carm_send_msg(host, crq); - if (rc) { - carm_push_q(host, q); - spin_unlock_irq(&host->lock); - return BLK_STS_DEV_RESOURCE; - } - - spin_unlock_irq(&host->lock); - return BLK_STS_OK; + return op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; } static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request_queue *q = hctx->queue; + struct request *rq = bd->rq; struct carm_port *port = q->queuedata; struct carm_host *host = port->host; + struct carm_request *crq = blk_mq_rq_to_pdu(rq); struct carm_msg_rw *msg; - struct carm_request *crq; - struct request *rq = bd->rq; struct scatterlist *sg; - int writing = 0, pci_dir, i, n_elem, rc; - u32 tmp; + int i, n_elem = 0, rc; unsigned int msg_size; + u32 tmp; + + crq->n_elem = 0; + sg_init_table(crq->sg, CARM_MAX_REQ_SG); blk_mq_start_request(rq); spin_lock_irq(&host->lock); - - crq = carm_get_request(host); - if (!crq) { - carm_push_q(host, q); - spin_unlock_irq(&host->lock); - return BLK_STS_DEV_RESOURCE; - } - crq->rq = rq; - - if (rq_data_dir(rq) == WRITE) { - writing = 1; - pci_dir = DMA_TO_DEVICE; - } else { - pci_dir = DMA_FROM_DEVICE; - } + if (req_op(rq) == REQ_OP_DRV_OUT) + goto send_msg; /* get scatterlist from block layer */ sg = &crq->sg[0]; n_elem = blk_rq_map_sg(q, rq, sg); - if (n_elem <= 0) { - /* request with no s/g entries? */ - carm_end_rq(host, crq, BLK_STS_IOERR); - spin_unlock_irq(&host->lock); - return BLK_STS_IOERR; - } + if (n_elem <= 0) + goto out_ioerr; /* map scatterlist to PCI bus addresses */ - n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, pci_dir); - if (n_elem <= 0) { - /* request with no s/g entries? */ - carm_end_rq(host, crq, BLK_STS_IOERR); - spin_unlock_irq(&host->lock); - return BLK_STS_IOERR; - } + n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, carm_rq_dir(rq)); + if (n_elem <= 0) + goto out_ioerr; + + /* obey global hardware limit on S/G entries */ + if (host->hw_sg_used >= CARM_MAX_HOST_SG - n_elem) + goto out_resource; + crq->n_elem = n_elem; - crq->port = port; host->hw_sg_used += n_elem; /* @@ -893,9 +745,9 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, */ VPRINTK("build msg\n"); - msg = (struct carm_msg_rw *) carm_ref_msg(host, crq->tag); + msg = (struct carm_msg_rw *) carm_ref_msg(host, rq->tag); - if (writing) { + if (rq_data_dir(rq) == WRITE) { msg->type = CARM_MSG_WRITE; crq->msg_type = CARM_MSG_WRITE; } else { @@ -906,7 +758,7 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, msg->id = port->port_no; msg->sg_count = n_elem; msg->sg_type = SGT_32BIT; - msg->handle = cpu_to_le32(TAG_ENCODE(crq->tag)); + msg->handle = cpu_to_le32(TAG_ENCODE(rq->tag)); msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff); tmp = (blk_rq_pos(rq) >> 16) >> 16; msg->lba_high = cpu_to_le16( (u16) tmp ); @@ -923,22 +775,28 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, rc = carm_lookup_bucket(msg_size); BUG_ON(rc < 0); crq->msg_bucket = (u32) rc; - +send_msg: /* * queue read/write message to hardware */ - - VPRINTK("send msg, tag == %u\n", crq->tag); - rc = carm_send_msg(host, crq); + VPRINTK("send msg, tag == %u\n", rq->tag); + rc = carm_send_msg(host, crq, rq->tag); if (rc) { - carm_put_request(host, crq); - carm_push_q(host, q); - spin_unlock_irq(&host->lock); - return BLK_STS_DEV_RESOURCE; + host->hw_sg_used -= n_elem; + goto out_resource; } spin_unlock_irq(&host->lock); return BLK_STS_OK; +out_resource: + dma_unmap_sg(&host->pdev->dev, &crq->sg[0], n_elem, carm_rq_dir(rq)); + carm_push_q(host, q); + spin_unlock_irq(&host->lock); + return BLK_STS_DEV_RESOURCE; +out_ioerr: + carm_round_robin(host); + spin_unlock_irq(&host->lock); + return BLK_STS_IOERR; } static void carm_handle_array_info(struct carm_host *host, @@ -954,8 +812,6 @@ static void carm_handle_array_info(struct carm_host *host, DPRINTK("ENTER\n"); - carm_end_rq(host, crq, error); - if (error) goto out; if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST) @@ -1011,8 +867,6 @@ static void carm_handle_scan_chan(struct carm_host *host, DPRINTK("ENTER\n"); - carm_end_rq(host, crq, error); - if (error) { new_state = HST_ERROR; goto out; @@ -1040,8 +894,6 @@ static void carm_handle_generic(struct carm_host *host, { DPRINTK("ENTER\n"); - carm_end_rq(host, crq, error); - assert(host->state == cur_state); if (error) host->state = HST_ERROR; @@ -1050,28 +902,12 @@ static void carm_handle_generic(struct carm_host *host, schedule_work(&host->fsm_task); } -static inline void carm_handle_rw(struct carm_host *host, - struct carm_request *crq, blk_status_t error) -{ - int pci_dir; - - VPRINTK("ENTER\n"); - - if (rq_data_dir(crq->rq) == WRITE) - pci_dir = DMA_TO_DEVICE; - else - pci_dir = DMA_FROM_DEVICE; - - dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, pci_dir); - - carm_end_rq(host, crq, error); -} - static inline void carm_handle_resp(struct carm_host *host, __le32 ret_handle_le, u32 status) { u32 handle = le32_to_cpu(ret_handle_le); unsigned int msg_idx; + struct request *rq; struct carm_request *crq; blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR; u8 *mem; @@ -1087,13 +923,15 @@ static inline void carm_handle_resp(struct carm_host *host, msg_idx = TAG_DECODE(handle); VPRINTK("tag == %u\n", msg_idx); - crq = &host->req[msg_idx]; + rq = blk_mq_tag_to_rq(host->tag_set.tags[0], msg_idx); + crq = blk_mq_rq_to_pdu(rq); /* fast path */ if (likely(crq->msg_type == CARM_MSG_READ || crq->msg_type == CARM_MSG_WRITE)) { - carm_handle_rw(host, crq, error); - return; + dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, + carm_rq_dir(rq)); + goto done; } mem = carm_ref_msg(host, msg_idx); @@ -1103,7 +941,7 @@ static inline void carm_handle_resp(struct carm_host *host, switch (crq->msg_subtype) { case CARM_IOC_SCAN_CHAN: carm_handle_scan_chan(host, crq, mem, error); - break; + goto done; default: /* unknown / invalid response */ goto err_out; @@ -1116,11 +954,11 @@ static inline void carm_handle_resp(struct carm_host *host, case MISC_ALLOC_MEM: carm_handle_generic(host, crq, error, HST_ALLOC_BUF, HST_SYNC_TIME); - break; + goto done; case MISC_SET_TIME: carm_handle_generic(host, crq, error, HST_SYNC_TIME, HST_GET_FW_VER); - break; + goto done; case MISC_GET_FW_VER: { struct carm_fw_ver *ver = (struct carm_fw_ver *) (mem + sizeof(struct carm_msg_get_fw_ver)); @@ -1130,7 +968,7 @@ static inline void carm_handle_resp(struct carm_host *host, } carm_handle_generic(host, crq, error, HST_GET_FW_VER, HST_PORT_SCAN); - break; + goto done; } default: /* unknown / invalid response */ @@ -1161,7 +999,13 @@ static inline void carm_handle_resp(struct carm_host *host, err_out: printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n", pci_name(host->pdev), crq->msg_type, crq->msg_subtype); - carm_end_rq(host, crq, BLK_STS_IOERR); + error = BLK_STS_IOERR; +done: + host->hw_sg_used -= crq->n_elem; + blk_mq_end_request(blk_mq_rq_from_pdu(crq), error); + + if (host->hw_sg_used <= CARM_SG_LOW_WATER) + carm_round_robin(host); } static inline void carm_handle_responses(struct carm_host *host) @@ -1491,78 +1335,56 @@ static int carm_init_host(struct carm_host *host) return 0; } -static const struct blk_mq_ops carm_oob_mq_ops = { - .queue_rq = carm_oob_queue_rq, -}; - static const struct blk_mq_ops carm_mq_ops = { .queue_rq = carm_queue_rq, }; -static int carm_init_disks(struct carm_host *host) +static int carm_init_disk(struct carm_host *host, unsigned int port_no) { - unsigned int i; - int rc = 0; + struct carm_port *port = &host->port[port_no]; + struct gendisk *disk; + struct request_queue *q; - for (i = 0; i < CARM_MAX_PORTS; i++) { - struct gendisk *disk; - struct request_queue *q; - struct carm_port *port; + port->host = host; + port->port_no = port_no; - port = &host->port[i]; - port->host = host; - port->port_no = i; + disk = alloc_disk(CARM_MINORS_PER_MAJOR); + if (!disk) + return -ENOMEM; - disk = alloc_disk(CARM_MINORS_PER_MAJOR); - if (!disk) { - rc = -ENOMEM; - break; - } + port->disk = disk; + sprintf(disk->disk_name, DRV_NAME "/%u", + (unsigned int)host->id * CARM_MAX_PORTS + port_no); + disk->major = host->major; + disk->first_minor = port_no * CARM_MINORS_PER_MAJOR; + disk->fops = &carm_bd_ops; + disk->private_data = port; - port->disk = disk; - sprintf(disk->disk_name, DRV_NAME "/%u", - (unsigned int) (host->id * CARM_MAX_PORTS) + i); - disk->major = host->major; - disk->first_minor = i * CARM_MINORS_PER_MAJOR; - disk->fops = &carm_bd_ops; - disk->private_data = port; - - q = blk_mq_init_sq_queue(&port->tag_set, &carm_mq_ops, - max_queue, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(q)) { - rc = PTR_ERR(q); - break; - } - disk->queue = q; - blk_queue_max_segments(q, CARM_MAX_REQ_SG); - blk_queue_segment_boundary(q, CARM_SG_BOUNDARY); + q = blk_mq_init_queue(&host->tag_set); + if (IS_ERR(q)) + return PTR_ERR(q); - q->queuedata = port; - } + blk_queue_max_segments(q, CARM_MAX_REQ_SG); + blk_queue_segment_boundary(q, CARM_SG_BOUNDARY); - return rc; + q->queuedata = port; + disk->queue = q; + return 0; } -static void carm_free_disks(struct carm_host *host) +static void carm_free_disk(struct carm_host *host, unsigned int port_no) { - unsigned int i; - - for (i = 0; i < CARM_MAX_PORTS; i++) { - struct carm_port *port = &host->port[i]; - struct gendisk *disk = port->disk; + struct carm_port *port = &host->port[port_no]; + struct gendisk *disk = port->disk; - if (disk) { - struct request_queue *q = disk->queue; + if (!disk) + return; - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (q) { - blk_mq_free_tag_set(&port->tag_set); - blk_cleanup_queue(q); - } - put_disk(disk); - } - } + if (disk->flags & GENHD_FL_UP) + del_gendisk(disk); + if (disk->queue) + blk_cleanup_queue(disk->queue); + put_disk(disk); } static int carm_init_shm(struct carm_host *host) @@ -1618,9 +1440,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&host->fsm_task, carm_fsm_task); init_completion(&host->probe_comp); - for (i = 0; i < ARRAY_SIZE(host->req); i++) - host->req[i].tag = i; - host->mmio = ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if (!host->mmio) { @@ -1637,14 +1456,26 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_iounmap; } - q = blk_mq_init_sq_queue(&host->tag_set, &carm_oob_mq_ops, 1, - BLK_MQ_F_NO_SCHED); + memset(&host->tag_set, 0, sizeof(host->tag_set)); + host->tag_set.ops = &carm_mq_ops; + host->tag_set.cmd_size = sizeof(struct carm_request); + host->tag_set.nr_hw_queues = 1; + host->tag_set.nr_maps = 1; + host->tag_set.queue_depth = max_queue; + host->tag_set.numa_node = NUMA_NO_NODE; + host->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + + rc = blk_mq_alloc_tag_set(&host->tag_set); + if (rc) + goto err_out_dma_free; + + q = blk_mq_init_queue(&host->tag_set); if (IS_ERR(q)) { - printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n", - pci_name(pdev)); rc = PTR_ERR(q); + blk_mq_free_tag_set(&host->tag_set); goto err_out_dma_free; } + host->oob_q = q; q->queuedata = host; @@ -1667,9 +1498,11 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (host->flags & FL_DYN_MAJOR) host->major = rc; - rc = carm_init_disks(host); - if (rc) - goto err_out_blkdev_disks; + for (i = 0; i < CARM_MAX_PORTS; i++) { + rc = carm_init_disk(host, i); + if (rc) + goto err_out_blkdev_disks; + } pci_set_master(pdev); @@ -1699,7 +1532,8 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) err_out_free_irq: free_irq(pdev->irq, host); err_out_blkdev_disks: - carm_free_disks(host); + for (i = 0; i < CARM_MAX_PORTS; i++) + carm_free_disk(host, i); unregister_blkdev(host->major, host->name); err_out_free_majors: if (host->major == 160) @@ -1724,6 +1558,7 @@ err_out: static void carm_remove_one (struct pci_dev *pdev) { struct carm_host *host = pci_get_drvdata(pdev); + unsigned int i; if (!host) { printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n", @@ -1732,7 +1567,8 @@ static void carm_remove_one (struct pci_dev *pdev) } free_irq(pdev->irq, host); - carm_free_disks(host); + for (i = 0; i < CARM_MAX_PORTS; i++) + carm_free_disk(host, i); unregister_blkdev(host->major, host->name); if (host->major == 160) clear_bit(0, &carm_major_alloc); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index be3e3ab79950..aa035cf8a51d 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -888,8 +888,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->biotail = &card->bio; spin_lock_init(&card->lock); - card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, - &card->lock); + card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); if (!card->queue) goto failed_alloc; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 086c6bb12baa..912c4265e592 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -214,6 +214,20 @@ static void virtblk_done(struct virtqueue *vq) spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); } +static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + struct virtio_blk *vblk = hctx->queue->queuedata; + struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; + bool kick; + + spin_lock_irq(&vq->lock); + kick = virtqueue_kick_prepare(vq->vq); + spin_unlock_irq(&vq->lock); + + if (kick) + virtqueue_notify(vq->vq); +} + static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -624,7 +638,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set) { struct virtio_blk *vblk = set->driver_data; - return blk_mq_virtio_map_queues(set, vblk->vdev, 0); + return blk_mq_virtio_map_queues(&set->map[0], vblk->vdev, 0); } #ifdef CONFIG_VIRTIO_BLK_SCSI @@ -638,6 +652,7 @@ static void virtblk_initialize_rq(struct request *req) static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, + .commit_rqs = virtio_commit_rqs, .complete = virtblk_request_done, .init_request = virtblk_init_request, #ifdef CONFIG_VIRTIO_BLK_SCSI diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 8b2b72b93885..da58020a144e 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -94,7 +94,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_MISC; - rq->special = (char *)pc; + ide_req(rq)->special = pc; if (buf && bufflen) { error = blk_rq_map_kern(drive->queue, rq, buf, bufflen, @@ -172,8 +172,8 @@ EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); void ide_prep_sense(ide_drive_t *drive, struct request *rq) { struct request_sense *sense = &drive->sense_data; - struct request *sense_rq = drive->sense_rq; - struct scsi_request *req = scsi_req(sense_rq); + struct request *sense_rq; + struct scsi_request *req; unsigned int cmd_len, sense_len; int err; @@ -196,9 +196,16 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) if (ata_sense_request(rq) || drive->sense_rq_armed) return; + sense_rq = drive->sense_rq; + if (!sense_rq) { + sense_rq = blk_mq_alloc_request(drive->queue, REQ_OP_DRV_IN, + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + drive->sense_rq = sense_rq; + } + req = scsi_req(sense_rq); + memset(sense, 0, sizeof(*sense)); - blk_rq_init(rq->q, sense_rq); scsi_req_init(req); err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, @@ -207,6 +214,8 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) if (printk_ratelimit()) printk(KERN_WARNING PFX "%s: failed to map sense " "buffer\n", drive->name); + blk_mq_free_request(sense_rq); + drive->sense_rq = NULL; return; } @@ -226,6 +235,8 @@ EXPORT_SYMBOL_GPL(ide_prep_sense); int ide_queue_sense_rq(ide_drive_t *drive, void *special) { + struct request *sense_rq = drive->sense_rq; + /* deferred failure from ide_prep_sense() */ if (!drive->sense_rq_armed) { printk(KERN_WARNING PFX "%s: error queuing a sense request\n", @@ -233,12 +244,12 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special) return -ENOMEM; } - drive->sense_rq->special = special; + ide_req(sense_rq)->special = special; drive->sense_rq_armed = false; drive->hwif->rq = NULL; - elv_add_request(drive->queue, drive->sense_rq, ELEVATOR_INSERT_FRONT); + ide_insert_request_head(drive, sense_rq); return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); @@ -270,10 +281,8 @@ void ide_retry_pc(ide_drive_t *drive) */ drive->hwif->rq = NULL; ide_requeue_and_plug(drive, failed_rq); - if (ide_queue_sense_rq(drive, pc)) { - blk_start_request(failed_rq); + if (ide_queue_sense_rq(drive, pc)) ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq)); - } } EXPORT_SYMBOL_GPL(ide_retry_pc); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index f9b59d41813f..1f03884a6808 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -211,12 +211,12 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For ATA_PRIV_SENSE, "rq->special" points to the original + * For ATA_PRIV_SENSE, "ide_req(rq)->special" points to the original * failed request. Also, the sense data should be read * directly from rq which might be different from the original * sense buffer if it got copied during mapping. */ - struct request *failed = (struct request *)rq->special; + struct request *failed = ide_req(rq)->special; void *sense = bio_data(rq->bio); if (failed) { @@ -258,11 +258,22 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq) /* * take a breather */ - blk_delay_queue(drive->queue, 1); + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(drive->queue, 1); return 1; } } +static void ide_cd_free_sense(ide_drive_t *drive) +{ + if (!drive->sense_rq) + return; + + blk_mq_free_request(drive->sense_rq); + drive->sense_rq = NULL; + drive->sense_rq_armed = false; +} + /** * Returns: * 0: if the request should be continued. @@ -516,6 +527,82 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd) return false; } +/* standard prep_rq that builds 10 byte cmds */ +static bool ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) +{ + int hard_sect = queue_logical_block_size(q); + long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); + unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); + struct scsi_request *req = scsi_req(rq); + + if (rq_data_dir(rq) == READ) + req->cmd[0] = GPCMD_READ_10; + else + req->cmd[0] = GPCMD_WRITE_10; + + /* + * fill in lba + */ + req->cmd[2] = (block >> 24) & 0xff; + req->cmd[3] = (block >> 16) & 0xff; + req->cmd[4] = (block >> 8) & 0xff; + req->cmd[5] = block & 0xff; + + /* + * and transfer length + */ + req->cmd[7] = (blocks >> 8) & 0xff; + req->cmd[8] = blocks & 0xff; + req->cmd_len = 10; + return true; +} + +/* + * Most of the SCSI commands are supported directly by ATAPI devices. + * This transform handles the few exceptions. + */ +static bool ide_cdrom_prep_pc(struct request *rq) +{ + u8 *c = scsi_req(rq)->cmd; + + /* transform 6-byte read/write commands to the 10-byte version */ + if (c[0] == READ_6 || c[0] == WRITE_6) { + c[8] = c[4]; + c[5] = c[3]; + c[4] = c[2]; + c[3] = c[1] & 0x1f; + c[2] = 0; + c[1] &= 0xe0; + c[0] += (READ_10 - READ_6); + scsi_req(rq)->cmd_len = 10; + return true; + } + + /* + * it's silly to pretend we understand 6-byte sense commands, just + * reject with ILLEGAL_REQUEST and the caller should take the + * appropriate action + */ + if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { + scsi_req(rq)->result = ILLEGAL_REQUEST; + return false; + } + + return true; +} + +static bool ide_cdrom_prep_rq(ide_drive_t *drive, struct request *rq) +{ + if (!blk_rq_is_passthrough(rq)) { + scsi_req_init(scsi_req(rq)); + + return ide_cdrom_prep_fs(drive->queue, rq); + } else if (blk_rq_is_scsi(rq)) + return ide_cdrom_prep_pc(rq); + + return true; +} + static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -675,7 +762,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) out_end: if (blk_rq_is_scsi(rq) && rc == 0) { scsi_req(rq)->resid_len = 0; - blk_end_request_all(rq, BLK_STS_OK); + blk_mq_end_request(rq, BLK_STS_OK); hwif->rq = NULL; } else { if (sense && uptodate) @@ -705,6 +792,8 @@ out_end: if (sense && rc == 2) ide_error(drive, "request sense failure", stat); } + + ide_cd_free_sense(drive); return ide_stopped; } @@ -729,7 +818,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) * We may be retrying this request after an error. Fix up any * weirdness which might be present in the request packet. */ - q->prep_rq_fn(q, rq); + ide_cdrom_prep_rq(drive, rq); } /* fs requests *must* be hardware frame aligned */ @@ -1323,82 +1412,6 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) return nslots; } -/* standard prep_rq_fn that builds 10 byte cmds */ -static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) -{ - int hard_sect = queue_logical_block_size(q); - long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); - unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); - struct scsi_request *req = scsi_req(rq); - - q->initialize_rq_fn(rq); - - if (rq_data_dir(rq) == READ) - req->cmd[0] = GPCMD_READ_10; - else - req->cmd[0] = GPCMD_WRITE_10; - - /* - * fill in lba - */ - req->cmd[2] = (block >> 24) & 0xff; - req->cmd[3] = (block >> 16) & 0xff; - req->cmd[4] = (block >> 8) & 0xff; - req->cmd[5] = block & 0xff; - - /* - * and transfer length - */ - req->cmd[7] = (blocks >> 8) & 0xff; - req->cmd[8] = blocks & 0xff; - req->cmd_len = 10; - return BLKPREP_OK; -} - -/* - * Most of the SCSI commands are supported directly by ATAPI devices. - * This transform handles the few exceptions. - */ -static int ide_cdrom_prep_pc(struct request *rq) -{ - u8 *c = scsi_req(rq)->cmd; - - /* transform 6-byte read/write commands to the 10-byte version */ - if (c[0] == READ_6 || c[0] == WRITE_6) { - c[8] = c[4]; - c[5] = c[3]; - c[4] = c[2]; - c[3] = c[1] & 0x1f; - c[2] = 0; - c[1] &= 0xe0; - c[0] += (READ_10 - READ_6); - scsi_req(rq)->cmd_len = 10; - return BLKPREP_OK; - } - - /* - * it's silly to pretend we understand 6-byte sense commands, just - * reject with ILLEGAL_REQUEST and the caller should take the - * appropriate action - */ - if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { - scsi_req(rq)->result = ILLEGAL_REQUEST; - return BLKPREP_KILL; - } - - return BLKPREP_OK; -} - -static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq) -{ - if (!blk_rq_is_passthrough(rq)) - return ide_cdrom_prep_fs(q, rq); - else if (blk_rq_is_scsi(rq)) - return ide_cdrom_prep_pc(rq); - - return 0; -} - struct cd_list_entry { const char *id_model; const char *id_firmware; @@ -1508,7 +1521,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) ide_debug_log(IDE_DBG_PROBE, "enter"); - blk_queue_prep_rq(q, ide_cdrom_prep_fn); + drive->prep_rq = ide_cdrom_prep_rq; blk_queue_dma_alignment(q, 31); blk_queue_update_dma_pad(q, 15); @@ -1569,7 +1582,7 @@ static void ide_cd_release(struct device *dev) if (devinfo->handle == drive) unregister_cdrom(devinfo); drive->driver_data = NULL; - blk_queue_prep_rq(drive->queue, NULL); + drive->prep_rq = NULL; g->private_data = NULL; put_disk(g); kfree(info); diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index f4f8afdf8bbe..f2f93ed40356 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -171,7 +171,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, scsi_req(rq)->cmd_len = 5; scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC; *(int *)&scsi_req(rq)->cmd[1] = arg; - rq->special = setting->set; + ide_req(rq)->special = setting->set; blk_execute_rq(q, NULL, rq, 0); ret = scsi_req(rq)->result; @@ -182,7 +182,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq) { - int err, (*setfunc)(ide_drive_t *, int) = rq->special; + int err, (*setfunc)(ide_drive_t *, int) = ide_req(rq)->special; err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]); if (err) diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index e3b4e659082d..197912af5c2f 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -427,16 +427,15 @@ static void ide_disk_unlock_native_capacity(ide_drive_t *drive) drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */ } -static int idedisk_prep_fn(struct request_queue *q, struct request *rq) +static bool idedisk_prep_rq(ide_drive_t *drive, struct request *rq) { - ide_drive_t *drive = q->queuedata; struct ide_cmd *cmd; if (req_op(rq) != REQ_OP_FLUSH) - return BLKPREP_OK; + return true; - if (rq->special) { - cmd = rq->special; + if (ide_req(rq)->special) { + cmd = ide_req(rq)->special; memset(cmd, 0, sizeof(*cmd)); } else { cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); @@ -456,10 +455,10 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq) rq->cmd_flags &= ~REQ_OP_MASK; rq->cmd_flags |= REQ_OP_DRV_OUT; ide_req(rq)->type = ATA_PRIV_TASKFILE; - rq->special = cmd; + ide_req(rq)->special = cmd; cmd->rq = rq; - return BLKPREP_OK; + return true; } ide_devset_get(multcount, mult_count); @@ -548,7 +547,7 @@ static void update_flush(ide_drive_t *drive) if (barrier) { wc = true; - blk_queue_prep_rq(drive->queue, idedisk_prep_fn); + drive->prep_rq = idedisk_prep_rq; } } diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 47d5f3379748..e1323e058454 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -125,7 +125,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat) /* retry only "normal" I/O: */ if (blk_rq_is_passthrough(rq)) { if (ata_taskfile_request(rq)) { - struct ide_cmd *cmd = rq->special; + struct ide_cmd *cmd = ide_req(rq)->special; if (cmd) ide_complete_cmd(drive, cmd, stat, err); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index a8df300f949c..780d33ccc5d8 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -276,7 +276,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, switch (ide_req(rq)->type) { case ATA_PRIV_MISC: case ATA_PRIV_SENSE: - pc = (struct ide_atapi_pc *)rq->special; + pc = (struct ide_atapi_pc *)ide_req(rq)->special; break; default: BUG(); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 0d93e0cfbeaf..8445b484ae69 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -67,7 +67,15 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error, ide_dma_on(drive); } - return blk_end_request(rq, error, nr_bytes); + if (!blk_update_request(rq, error, nr_bytes)) { + if (rq == drive->sense_rq) + drive->sense_rq = NULL; + + __blk_mq_end_request(rq, error); + return 0; + } + + return 1; } EXPORT_SYMBOL_GPL(ide_end_rq); @@ -103,7 +111,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err) } if (rq && ata_taskfile_request(rq)) { - struct ide_cmd *orig_cmd = rq->special; + struct ide_cmd *orig_cmd = ide_req(rq)->special; if (cmd->tf_flags & IDE_TFLAG_DYN) kfree(orig_cmd); @@ -253,7 +261,7 @@ EXPORT_SYMBOL_GPL(ide_init_sg_cmd); static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq) { - struct ide_cmd *cmd = rq->special; + struct ide_cmd *cmd = ide_req(rq)->special; if (cmd) { if (cmd->protocol == ATA_PROT_PIO) { @@ -307,8 +315,6 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - BUG_ON(!(rq->rq_flags & RQF_STARTED)); - #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", drive->hwif->name, (unsigned long) rq); @@ -320,6 +326,9 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) goto kill_rq; } + if (drive->prep_rq && !drive->prep_rq(drive, rq)) + return ide_stopped; + if (ata_pm_request(rq)) ide_check_pm_state(drive, rq); @@ -343,7 +352,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (ata_taskfile_request(rq)) return execute_drive_cmd(drive, rq); else if (ata_pm_request(rq)) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, pm->pm_step); @@ -430,44 +439,42 @@ static inline void ide_unlock_host(struct ide_host *host) } } -static void __ide_requeue_and_plug(struct request_queue *q, struct request *rq) -{ - if (rq) - blk_requeue_request(q, rq); - if (rq || blk_peek_request(q)) { - /* Use 3ms as that was the old plug delay */ - blk_delay_queue(q, 3); - } -} - void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - unsigned long flags; - spin_lock_irqsave(q->queue_lock, flags); - __ide_requeue_and_plug(q, rq); - spin_unlock_irqrestore(q->queue_lock, flags); + /* Use 3ms as that was the old plug delay */ + if (rq) { + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(q, 3); + } else + blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3); } /* * Issue a new request to a device. */ -void do_ide_request(struct request_queue *q) +blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - ide_drive_t *drive = q->queuedata; + ide_drive_t *drive = hctx->queue->queuedata; ide_hwif_t *hwif = drive->hwif; struct ide_host *host = hwif->host; - struct request *rq = NULL; + struct request *rq = bd->rq; ide_startstop_t startstop; - spin_unlock_irq(q->queue_lock); + if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) { + rq->rq_flags |= RQF_DONTPREP; + ide_req(rq)->special = NULL; + } /* HLD do_request() callback might sleep, make sure it's okay */ might_sleep(); if (ide_lock_host(host, hwif)) - goto plug_device_2; + return BLK_STS_DEV_RESOURCE; + + blk_mq_start_request(rq); spin_lock_irq(&hwif->lock); @@ -503,21 +510,16 @@ repeat: hwif->cur_dev = drive; drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); - spin_unlock_irq(&hwif->lock); - spin_lock_irq(q->queue_lock); /* * we know that the queue isn't empty, but this can happen - * if the q->prep_rq_fn() decides to kill a request + * if ->prep_rq() decides to kill a request */ - if (!rq) - rq = blk_fetch_request(drive->queue); - - spin_unlock_irq(q->queue_lock); - spin_lock_irq(&hwif->lock); - if (!rq) { - ide_unlock_port(hwif); - goto out; + rq = bd->rq; + if (!rq) { + ide_unlock_port(hwif); + goto out; + } } /* @@ -551,23 +553,24 @@ repeat: if (startstop == ide_stopped) { rq = hwif->rq; hwif->rq = NULL; - goto repeat; + if (rq) + goto repeat; + ide_unlock_port(hwif); + goto out; } - } else - goto plug_device; + } else { +plug_device: + spin_unlock_irq(&hwif->lock); + ide_unlock_host(host); + ide_requeue_and_plug(drive, rq); + return BLK_STS_OK; + } + out: spin_unlock_irq(&hwif->lock); if (rq == NULL) ide_unlock_host(host); - spin_lock_irq(q->queue_lock); - return; - -plug_device: - spin_unlock_irq(&hwif->lock); - ide_unlock_host(host); -plug_device_2: - spin_lock_irq(q->queue_lock); - __ide_requeue_and_plug(q, rq); + return BLK_STS_OK; } static int drive_is_ready(ide_drive_t *drive) @@ -887,3 +890,16 @@ void ide_pad_transfer(ide_drive_t *drive, int write, int len) } } EXPORT_SYMBOL_GPL(ide_pad_transfer); + +void ide_insert_request_head(ide_drive_t *drive, struct request *rq) +{ + ide_hwif_t *hwif = drive->hwif; + unsigned long flags; + + spin_lock_irqsave(&hwif->lock, flags); + list_add_tail(&rq->queuelist, &drive->rq_list); + spin_unlock_irqrestore(&hwif->lock, flags); + + kblockd_schedule_work(&drive->rq_work); +} +EXPORT_SYMBOL_GPL(ide_insert_request_head); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 622f0edb3945..102aa3bc3e7f 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -27,7 +27,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) spin_unlock_irq(&hwif->lock); if (start_queue) - blk_run_queue(q); + blk_mq_run_hw_queues(q, true); return; } spin_unlock_irq(&hwif->lock); @@ -36,7 +36,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_PARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; - rq->special = &timeout; + ide_req(rq)->special = &timeout; blk_execute_rq(q, NULL, rq, 1); rc = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); @@ -54,7 +54,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; - elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); + ide_insert_request_head(drive, rq); out: return; @@ -67,7 +67,7 @@ ide_startstop_t ide_do_park_unpark(ide_drive_t *drive, struct request *rq) memset(&cmd, 0, sizeof(cmd)); if (scsi_req(rq)->cmd[0] == REQ_PARK_HEADS) { - drive->sleep = *(unsigned long *)rq->special; + drive->sleep = *(unsigned long *)ide_req(rq)->special; drive->dev_flags |= IDE_DFLAG_SLEEPING; tf->command = ATA_CMD_IDLEIMMEDIATE; tf->feature = 0x44; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 59217aa1d1fb..192e6c65d34e 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -21,7 +21,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_PM_SUSPEND; - rq->special = &rqpm; + ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -40,32 +40,17 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) return ret; } -static void ide_end_sync_rq(struct request *rq, blk_status_t error) -{ - complete(rq->end_io_data); -} - static int ide_pm_execute_rq(struct request *rq) { struct request_queue *q = rq->q; - DECLARE_COMPLETION_ONSTACK(wait); - rq->end_io_data = &wait; - rq->end_io = ide_end_sync_rq; - - spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { rq->rq_flags |= RQF_QUIET; scsi_req(rq)->result = -ENXIO; - __blk_end_request_all(rq, BLK_STS_OK); - spin_unlock_irq(q->queue_lock); + blk_mq_end_request(rq, BLK_STS_OK); return -ENXIO; } - __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); - __blk_run_queue_uncond(q); - spin_unlock_irq(q->queue_lock); - - wait_for_completion_io(&wait); + blk_execute_rq(q, NULL, rq, true); return scsi_req(rq)->result ? -EIO : 0; } @@ -79,6 +64,8 @@ int generic_ide_resume(struct device *dev) struct ide_pm_state rqpm; int err; + blk_mq_start_stopped_hw_queues(drive->queue, true); + if (ide_port_acpi(hwif)) { /* call ACPI _PS0 / _STM only once */ if ((drive->dn & 1) == 0 || pair == NULL) { @@ -92,7 +79,7 @@ int generic_ide_resume(struct device *dev) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT); ide_req(rq)->type = ATA_PRIV_PM_RESUME; - rq->special = &rqpm; + ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; @@ -111,7 +98,7 @@ int generic_ide_resume(struct device *dev) void ide_complete_power_step(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; #ifdef DEBUG_PM printk(KERN_INFO "%s: complete_power_step(step: %d)\n", @@ -141,7 +128,7 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq) ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; struct ide_cmd cmd = { }; switch (pm->pm_step) { @@ -213,8 +200,7 @@ out_do_tf: void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - struct ide_pm_state *pm = rq->special; - unsigned long flags; + struct ide_pm_state *pm = ide_req(rq)->special; ide_complete_power_step(drive, rq); if (pm->pm_step != IDE_PM_COMPLETED) @@ -224,22 +210,19 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) printk("%s: completing PM request, %s\n", drive->name, (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) ? "suspend" : "resume"); #endif - spin_lock_irqsave(q->queue_lock, flags); if (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) - blk_stop_queue(q); + blk_mq_stop_hw_queues(q); else drive->dev_flags &= ~IDE_DFLAG_BLOCKED; - spin_unlock_irqrestore(q->queue_lock, flags); drive->hwif->rq = NULL; - if (blk_end_request(rq, BLK_STS_OK, 0)) - BUG(); + blk_mq_end_request(rq, BLK_STS_OK); } void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; if (blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_PM_SUSPEND && @@ -260,7 +243,6 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; struct request_queue *q = drive->queue; - unsigned long flags; int rc; #ifdef DEBUG_PM printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name); @@ -274,8 +256,6 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) if (rc) printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name); - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); + blk_mq_start_hw_queues(q); } } diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 3b75a7b7a284..63627be0811a 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -746,10 +746,16 @@ static void ide_initialize_rq(struct request *rq) { struct ide_request *req = blk_mq_rq_to_pdu(rq); + req->special = NULL; scsi_req_init(&req->sreq); req->sreq.sense = req->sense; } +static const struct blk_mq_ops ide_mq_ops = { + .queue_rq = ide_queue_rq, + .initialize_rq_fn = ide_initialize_rq, +}; + /* * init request queue */ @@ -759,6 +765,7 @@ static int ide_init_queue(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; int max_sectors = 256; int max_sg_entries = PRD_ENTRIES; + struct blk_mq_tag_set *set; /* * Our default set up assumes the normal IDE case, @@ -767,19 +774,26 @@ static int ide_init_queue(ide_drive_t *drive) * limits and LBA48 we could raise it but as yet * do not. */ - q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL); - if (!q) + + set = &drive->tag_set; + set->ops = &ide_mq_ops; + set->nr_hw_queues = 1; + set->queue_depth = 32; + set->reserved_tags = 1; + set->cmd_size = sizeof(struct ide_request); + set->numa_node = hwif_to_node(hwif); + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + if (blk_mq_alloc_tag_set(set)) return 1; - q->request_fn = do_ide_request; - q->initialize_rq_fn = ide_initialize_rq; - q->cmd_size = sizeof(struct ide_request); - blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); - if (blk_init_allocated_queue(q) < 0) { - blk_cleanup_queue(q); + q = blk_mq_init_queue(set); + if (IS_ERR(q)) { + blk_mq_free_tag_set(set); return 1; } + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); + q->queuedata = drive; blk_queue_segment_boundary(q, 0xffff); @@ -965,8 +979,12 @@ static void drive_release_dev (struct device *dev) ide_proc_unregister_device(drive); + if (drive->sense_rq) + blk_mq_free_request(drive->sense_rq); + blk_cleanup_queue(drive->queue); drive->queue = NULL; + blk_mq_free_tag_set(&drive->tag_set); drive->dev_flags &= ~IDE_DFLAG_PRESENT; @@ -1133,6 +1151,28 @@ static void ide_port_cable_detect(ide_hwif_t *hwif) } } +/* + * Deferred request list insertion handler + */ +static void drive_rq_insert_work(struct work_struct *work) +{ + ide_drive_t *drive = container_of(work, ide_drive_t, rq_work); + ide_hwif_t *hwif = drive->hwif; + struct request *rq; + LIST_HEAD(list); + + spin_lock_irq(&hwif->lock); + if (!list_empty(&drive->rq_list)) + list_splice_init(&drive->rq_list, &list); + spin_unlock_irq(&hwif->lock); + + while (!list_empty(&list)) { + rq = list_first_entry(&list, struct request, queuelist); + list_del_init(&rq->queuelist); + blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL); + } +} + static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR, IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR }; @@ -1145,12 +1185,10 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) ide_port_for_each_dev(i, drive, hwif) { u8 j = (hwif->index * MAX_DRIVES) + i; u16 *saved_id = drive->id; - struct request *saved_sense_rq = drive->sense_rq; memset(drive, 0, sizeof(*drive)); memset(saved_id, 0, SECTOR_SIZE); drive->id = saved_id; - drive->sense_rq = saved_sense_rq; drive->media = ide_disk; drive->select = (i << 4) | ATA_DEVICE_OBS; @@ -1166,6 +1204,9 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) INIT_LIST_HEAD(&drive->list); init_completion(&drive->gendev_rel_comp); + + INIT_WORK(&drive->rq_work, drive_rq_insert_work); + INIT_LIST_HEAD(&drive->rq_list); } } @@ -1255,7 +1296,6 @@ static void ide_port_free_devices(ide_hwif_t *hwif) int i; ide_port_for_each_dev(i, drive, hwif) { - kfree(drive->sense_rq); kfree(drive->id); kfree(drive); } @@ -1283,17 +1323,10 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node) if (drive->id == NULL) goto out_free_drive; - drive->sense_rq = kmalloc(sizeof(struct request) + - sizeof(struct ide_request), GFP_KERNEL); - if (!drive->sense_rq) - goto out_free_id; - hwif->devices[i] = drive; } return 0; -out_free_id: - kfree(drive->id); out_free_drive: kfree(drive); out_nomem: diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 34c1165226a4..db1a65f4b490 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -639,7 +639,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, goto out; } if (req->cmd[13] & REQ_IDETAPE_PC1) { - pc = (struct ide_atapi_pc *)rq->special; + pc = (struct ide_atapi_pc *)ide_req(rq)->special; req->cmd[13] &= ~(REQ_IDETAPE_PC1); req->cmd[13] |= REQ_IDETAPE_PC2; goto out; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index c21d5c50ae3a..17b2e379e872 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -440,7 +440,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, goto put_req; } - rq->special = cmd; + ide_req(rq)->special = cmd; cmd->rq = rq; blk_execute_rq(drive->queue, NULL, rq, 0); diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index efb976a863d2..60ab11fcc81c 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -389,7 +389,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_dev; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL); + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); if (!tqueue) { ret = -ENOMEM; goto err_disk; diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 7cd36e4d1310..1f1fe9a618ea 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -43,7 +43,7 @@ static unsigned dm_get_blk_mq_queue_depth(void) int dm_request_based(struct mapped_device *md) { - return queue_is_rq_based(md->queue); + return queue_is_mq(md->queue); } void dm_start_queue(struct request_queue *q) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9038c302d5c2..844f7d0f2ef8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -919,12 +919,12 @@ static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev, struct request_queue *q = bdev_get_queue(dev->bdev); struct verify_rq_based_data *v = data; - if (q->mq_ops) + if (queue_is_mq(q)) v->mq_count++; else v->sq_count++; - return queue_is_rq_based(q); + return queue_is_mq(q); } static int dm_table_determine_type(struct dm_table *t) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c510179a7f84..ab72d79775ee 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1417,10 +1417,21 @@ static int __send_empty_flush(struct clone_info *ci) unsigned target_nr = 0; struct dm_target *ti; + /* + * Empty flush uses a statically initialized bio, &md->flush_bio, as + * the base for cloning. However, blkg association requires that a + * bdev is associated with a gendisk, which doesn't happen until the + * bdev is opened. So, blkg association is done at issue time of the + * flush rather than when the device is created in alloc_dev(). + */ + bio_set_dev(ci->bio, ci->io->md->bdev); + BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); + bio_disassociate_blkg(ci->bio); + return 0; } @@ -1896,7 +1907,7 @@ static struct mapped_device *alloc_dev(int minor) INIT_LIST_HEAD(&md->table_devices); spin_lock_init(&md->uevent_lock); - md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL); + md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id); if (!md->queue) goto bad; md->queue->queuedata = md; @@ -1939,7 +1950,6 @@ static struct mapped_device *alloc_dev(int minor) goto bad; bio_init(&md->flush_bio, NULL, 0); - bio_set_dev(&md->flush_bio, md->bdev); md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; dm_stats_init(&md->stats); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ac1cffd2a09b..f3fb5bb8c82a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) !discard_bio) continue; bio_chain(discard_bio, bio); - bio_clone_blkcg_association(discard_bio, bio); + bio_clone_blkg_association(discard_bio, bio); if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rdev->bdev), discard_bio, disk_devt(mddev->gendisk), diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 8a02f11076f9..82daccc9ea62 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -15,7 +15,7 @@ #define pr_fmt(fmt) DRIVER_NAME ": " fmt #include <linux/module.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/memstick.h> #include <linux/idr.h> #include <linux/hdreg.h> @@ -1873,69 +1873,65 @@ static void msb_io_work(struct work_struct *work) struct msb_data *msb = container_of(work, struct msb_data, io_work); int page, error, len; sector_t lba; - unsigned long flags; struct scatterlist *sg = msb->prealloc_sg; + struct request *req; dbg_verbose("IO: work started"); while (1) { - spin_lock_irqsave(&msb->q_lock, flags); + spin_lock_irq(&msb->q_lock); if (msb->need_flush_cache) { msb->need_flush_cache = false; - spin_unlock_irqrestore(&msb->q_lock, flags); + spin_unlock_irq(&msb->q_lock); msb_cache_flush(msb); continue; } - if (!msb->req) { - msb->req = blk_fetch_request(msb->queue); - if (!msb->req) { - dbg_verbose("IO: no more requests exiting"); - spin_unlock_irqrestore(&msb->q_lock, flags); - return; - } + req = msb->req; + if (!req) { + dbg_verbose("IO: no more requests exiting"); + spin_unlock_irq(&msb->q_lock); + return; } - spin_unlock_irqrestore(&msb->q_lock, flags); - - /* If card was removed meanwhile */ - if (!msb->req) - return; + spin_unlock_irq(&msb->q_lock); /* process the request */ dbg_verbose("IO: processing new request"); - blk_rq_map_sg(msb->queue, msb->req, sg); + blk_rq_map_sg(msb->queue, req, sg); - lba = blk_rq_pos(msb->req); + lba = blk_rq_pos(req); sector_div(lba, msb->page_size / 512); page = sector_div(lba, msb->pages_in_block); if (rq_data_dir(msb->req) == READ) error = msb_do_read_request(msb, lba, page, sg, - blk_rq_bytes(msb->req), &len); + blk_rq_bytes(req), &len); else error = msb_do_write_request(msb, lba, page, sg, - blk_rq_bytes(msb->req), &len); - - spin_lock_irqsave(&msb->q_lock, flags); + blk_rq_bytes(req), &len); - if (len) - if (!__blk_end_request(msb->req, BLK_STS_OK, len)) - msb->req = NULL; + if (len && !blk_update_request(req, BLK_STS_OK, len)) { + __blk_mq_end_request(req, BLK_STS_OK); + spin_lock_irq(&msb->q_lock); + msb->req = NULL; + spin_unlock_irq(&msb->q_lock); + } if (error && msb->req) { blk_status_t ret = errno_to_blk_status(error); + dbg_verbose("IO: ending one sector of the request with error"); - if (!__blk_end_request(msb->req, ret, msb->page_size)) - msb->req = NULL; + blk_mq_end_request(req, ret); + spin_lock_irq(&msb->q_lock); + msb->req = NULL; + spin_unlock_irq(&msb->q_lock); } if (msb->req) dbg_verbose("IO: request still pending"); - - spin_unlock_irqrestore(&msb->q_lock, flags); } } @@ -2002,29 +1998,40 @@ static int msb_bd_getgeo(struct block_device *bdev, return 0; } -static void msb_submit_req(struct request_queue *q) +static blk_status_t msb_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct memstick_dev *card = q->queuedata; + struct memstick_dev *card = hctx->queue->queuedata; struct msb_data *msb = memstick_get_drvdata(card); - struct request *req = NULL; + struct request *req = bd->rq; dbg_verbose("Submit request"); + spin_lock_irq(&msb->q_lock); + if (msb->card_dead) { dbg("Refusing requests on removed card"); WARN_ON(!msb->io_queue_stopped); - while ((req = blk_fetch_request(q)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); - return; + spin_unlock_irq(&msb->q_lock); + blk_mq_start_request(req); + return BLK_STS_IOERR; } - if (msb->req) - return; + if (msb->req) { + spin_unlock_irq(&msb->q_lock); + return BLK_STS_DEV_RESOURCE; + } + + blk_mq_start_request(req); + msb->req = req; if (!msb->io_queue_stopped) queue_work(msb->io_queue, &msb->io_work); + + spin_unlock_irq(&msb->q_lock); + return BLK_STS_OK; } static int msb_check_card(struct memstick_dev *card) @@ -2040,21 +2047,20 @@ static void msb_stop(struct memstick_dev *card) dbg("Stopping all msblock IO"); + blk_mq_stop_hw_queues(msb->queue); spin_lock_irqsave(&msb->q_lock, flags); - blk_stop_queue(msb->queue); msb->io_queue_stopped = true; spin_unlock_irqrestore(&msb->q_lock, flags); del_timer_sync(&msb->cache_flush_timer); flush_workqueue(msb->io_queue); + spin_lock_irqsave(&msb->q_lock, flags); if (msb->req) { - spin_lock_irqsave(&msb->q_lock, flags); - blk_requeue_request(msb->queue, msb->req); + blk_mq_requeue_request(msb->req, false); msb->req = NULL; - spin_unlock_irqrestore(&msb->q_lock, flags); } - + spin_unlock_irqrestore(&msb->q_lock, flags); } static void msb_start(struct memstick_dev *card) @@ -2077,9 +2083,7 @@ static void msb_start(struct memstick_dev *card) msb->need_flush_cache = true; msb->io_queue_stopped = false; - spin_lock_irqsave(&msb->q_lock, flags); - blk_start_queue(msb->queue); - spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); queue_work(msb->io_queue, &msb->io_work); @@ -2092,6 +2096,10 @@ static const struct block_device_operations msb_bdops = { .owner = THIS_MODULE }; +static const struct blk_mq_ops msb_mq_ops = { + .queue_rq = msb_queue_rq, +}; + /* Registers the block device */ static int msb_init_disk(struct memstick_dev *card) { @@ -2112,9 +2120,11 @@ static int msb_init_disk(struct memstick_dev *card) goto out_release_id; } - msb->queue = blk_init_queue(msb_submit_req, &msb->q_lock); - if (!msb->queue) { - rc = -ENOMEM; + msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &msb_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(msb->queue)) { + rc = PTR_ERR(msb->queue); + msb->queue = NULL; goto out_put_disk; } @@ -2202,12 +2212,13 @@ static void msb_remove(struct memstick_dev *card) /* Take care of unhandled + new requests from now on */ spin_lock_irqsave(&msb->q_lock, flags); msb->card_dead = true; - blk_start_queue(msb->queue); spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); /* Remove the disk */ del_gendisk(msb->disk); blk_cleanup_queue(msb->queue); + blk_mq_free_tag_set(&msb->tag_set); msb->queue = NULL; mutex_lock(&msb_disk_lock); diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h index 53962c3b21df..9ba84e0ced63 100644 --- a/drivers/memstick/core/ms_block.h +++ b/drivers/memstick/core/ms_block.h @@ -152,6 +152,7 @@ struct msb_data { struct gendisk *disk; struct request_queue *queue; spinlock_t q_lock; + struct blk_mq_tag_set tag_set; struct hd_geometry geometry; struct attribute_group attr_group; struct request *req; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 0cd30dcb6801..aba50ec98b4d 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -12,7 +12,7 @@ * */ -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/idr.h> #include <linux/hdreg.h> #include <linux/kthread.h> @@ -142,6 +142,7 @@ struct mspro_block_data { struct gendisk *disk; struct request_queue *queue; struct request *block_req; + struct blk_mq_tag_set tag_set; spinlock_t q_lock; unsigned short page_size; @@ -152,7 +153,6 @@ struct mspro_block_data { unsigned char system; unsigned char read_only:1, eject:1, - has_request:1, data_dir:1, active:1; unsigned char transfer_cmd; @@ -694,13 +694,12 @@ static void h_mspro_block_setup_cmd(struct memstick_dev *card, u64 offset, /*** Data transfer ***/ -static int mspro_block_issue_req(struct memstick_dev *card, int chunk) +static int mspro_block_issue_req(struct memstick_dev *card, bool chunk) { struct mspro_block_data *msb = memstick_get_drvdata(card); u64 t_off; unsigned int count; -try_again: while (chunk) { msb->current_page = 0; msb->current_seg = 0; @@ -709,9 +708,17 @@ try_again: msb->req_sg); if (!msb->seg_count) { - chunk = __blk_end_request_cur(msb->block_req, - BLK_STS_RESOURCE); - continue; + unsigned int bytes = blk_rq_cur_bytes(msb->block_req); + + chunk = blk_update_request(msb->block_req, + BLK_STS_RESOURCE, + bytes); + if (chunk) + continue; + __blk_mq_end_request(msb->block_req, + BLK_STS_RESOURCE); + msb->block_req = NULL; + break; } t_off = blk_rq_pos(msb->block_req); @@ -729,30 +736,22 @@ try_again: return 0; } - dev_dbg(&card->dev, "blk_fetch\n"); - msb->block_req = blk_fetch_request(msb->queue); - if (!msb->block_req) { - dev_dbg(&card->dev, "issue end\n"); - return -EAGAIN; - } - - dev_dbg(&card->dev, "trying again\n"); - chunk = 1; - goto try_again; + return 1; } static int mspro_block_complete_req(struct memstick_dev *card, int error) { struct mspro_block_data *msb = memstick_get_drvdata(card); - int chunk, cnt; + int cnt; + bool chunk; unsigned int t_len = 0; unsigned long flags; spin_lock_irqsave(&msb->q_lock, flags); - dev_dbg(&card->dev, "complete %d, %d\n", msb->has_request ? 1 : 0, + dev_dbg(&card->dev, "complete %d, %d\n", msb->block_req ? 1 : 0, error); - if (msb->has_request) { + if (msb->block_req) { /* Nothing to do - not really an error */ if (error == -EAGAIN) error = 0; @@ -777,15 +776,17 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error) if (error && !t_len) t_len = blk_rq_cur_bytes(msb->block_req); - chunk = __blk_end_request(msb->block_req, + chunk = blk_update_request(msb->block_req, errno_to_blk_status(error), t_len); - - error = mspro_block_issue_req(card, chunk); - - if (!error) - goto out; - else - msb->has_request = 0; + if (chunk) { + error = mspro_block_issue_req(card, chunk); + if (!error) + goto out; + } else { + __blk_mq_end_request(msb->block_req, + errno_to_blk_status(error)); + msb->block_req = NULL; + } } else { if (!error) error = -EAGAIN; @@ -806,8 +807,8 @@ static void mspro_block_stop(struct memstick_dev *card) while (1) { spin_lock_irqsave(&msb->q_lock, flags); - if (!msb->has_request) { - blk_stop_queue(msb->queue); + if (!msb->block_req) { + blk_mq_stop_hw_queues(msb->queue); rc = 1; } spin_unlock_irqrestore(&msb->q_lock, flags); @@ -822,32 +823,37 @@ static void mspro_block_stop(struct memstick_dev *card) static void mspro_block_start(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); - unsigned long flags; - spin_lock_irqsave(&msb->q_lock, flags); - blk_start_queue(msb->queue); - spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); } -static void mspro_block_submit_req(struct request_queue *q) +static blk_status_t mspro_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct memstick_dev *card = q->queuedata; + struct memstick_dev *card = hctx->queue->queuedata; struct mspro_block_data *msb = memstick_get_drvdata(card); - struct request *req = NULL; - if (msb->has_request) - return; + spin_lock_irq(&msb->q_lock); - if (msb->eject) { - while ((req = blk_fetch_request(q)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); + if (msb->block_req) { + spin_unlock_irq(&msb->q_lock); + return BLK_STS_DEV_RESOURCE; + } - return; + if (msb->eject) { + spin_unlock_irq(&msb->q_lock); + blk_mq_start_request(bd->rq); + return BLK_STS_IOERR; } - msb->has_request = 1; - if (mspro_block_issue_req(card, 0)) - msb->has_request = 0; + msb->block_req = bd->rq; + blk_mq_start_request(bd->rq); + + if (mspro_block_issue_req(card, true)) + msb->block_req = NULL; + + spin_unlock_irq(&msb->q_lock); + return BLK_STS_OK; } /*** Initialization ***/ @@ -1167,6 +1173,10 @@ static int mspro_block_init_card(struct memstick_dev *card) } +static const struct blk_mq_ops mspro_mq_ops = { + .queue_rq = mspro_queue_rq, +}; + static int mspro_block_init_disk(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); @@ -1206,9 +1216,11 @@ static int mspro_block_init_disk(struct memstick_dev *card) goto out_release_id; } - msb->queue = blk_init_queue(mspro_block_submit_req, &msb->q_lock); - if (!msb->queue) { - rc = -ENOMEM; + msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &mspro_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(msb->queue)) { + rc = PTR_ERR(msb->queue); + msb->queue = NULL; goto out_put_disk; } @@ -1318,13 +1330,14 @@ static void mspro_block_remove(struct memstick_dev *card) spin_lock_irqsave(&msb->q_lock, flags); msb->eject = 1; - blk_start_queue(msb->queue); spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); del_gendisk(msb->disk); dev_dbg(&card->dev, "mspro block remove\n"); blk_cleanup_queue(msb->queue); + blk_mq_free_tag_set(&msb->tag_set); msb->queue = NULL; sysfs_remove_group(&card->dev.kobj, &msb->attr_group); @@ -1344,8 +1357,9 @@ static int mspro_block_suspend(struct memstick_dev *card, pm_message_t state) struct mspro_block_data *msb = memstick_get_drvdata(card); unsigned long flags; + blk_mq_stop_hw_queues(msb->queue); + spin_lock_irqsave(&msb->q_lock, flags); - blk_stop_queue(msb->queue); msb->active = 0; spin_unlock_irqrestore(&msb->q_lock, flags); @@ -1355,7 +1369,6 @@ static int mspro_block_suspend(struct memstick_dev *card, pm_message_t state) static int mspro_block_resume(struct memstick_dev *card) { struct mspro_block_data *msb = memstick_get_drvdata(card); - unsigned long flags; int rc = 0; #ifdef CONFIG_MEMSTICK_UNSAFE_RESUME @@ -1401,9 +1414,7 @@ out_unlock: #endif /* CONFIG_MEMSTICK_UNSAFE_RESUME */ - spin_lock_irqsave(&msb->q_lock, flags); - blk_start_queue(msb->queue); - spin_unlock_irqrestore(&msb->q_lock, flags); + blk_mq_start_hw_queues(msb->queue); return rc; } diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index c35b5b08bb33..2c329a3e3fdb 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -100,7 +100,6 @@ static DEFINE_IDA(mmc_rpmb_ida); * There is one mmc_blk_data per slot. */ struct mmc_blk_data { - spinlock_t lock; struct device *parent; struct gendisk *disk; struct mmc_queue queue; @@ -1483,7 +1482,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) blk_mq_end_request(req, BLK_STS_OK); } - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); mq->in_flight[mmc_issue_type(mq, req)] -= 1; @@ -1491,7 +1490,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) mmc_cqe_check_busy(mq); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); if (!mq->cqe_busy) blk_mq_run_hw_queues(q, true); @@ -1988,17 +1987,16 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq, static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req) { - struct request_queue *q = req->q; unsigned long flags; bool put_card; - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); mq->in_flight[mmc_issue_type(mq, req)] -= 1; put_card = (mmc_tot_in_flight(mq) == 0); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); if (put_card) mmc_put_card(mq->card, &mq->ctx); @@ -2094,11 +2092,11 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) * request does not need to wait (although it does need to * complete complete_req first). */ - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); mq->complete_req = req; mq->rw_wait = false; waiting = mq->waiting; - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); /* * If 'waiting' then the waiting task will complete this @@ -2117,10 +2115,10 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) /* Take the recovery path for errors or urgent background operations */ if (mmc_blk_rq_error(&mqrq->brq) || mmc_blk_urgent_bkops_needed(mq, mqrq)) { - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); mq->recovery_needed = true; mq->recovery_req = req; - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); wake_up(&mq->wait); schedule_work(&mq->recovery_work); return; @@ -2136,7 +2134,6 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) { - struct request_queue *q = mq->queue; unsigned long flags; bool done; @@ -2144,7 +2141,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) * Wait while there is another request in progress, but not if recovery * is needed. Also indicate whether there is a request waiting to start. */ - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); if (mq->recovery_needed) { *err = -EBUSY; done = true; @@ -2152,7 +2149,7 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) done = !mq->rw_wait; } mq->waiting = !done; - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); return done; } @@ -2329,12 +2326,11 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, goto err_kfree; } - spin_lock_init(&md->lock); INIT_LIST_HEAD(&md->part); INIT_LIST_HEAD(&md->rpmbs); md->usage = 1; - ret = mmc_init_queue(&md->queue, card, &md->lock, subname); + ret = mmc_init_queue(&md->queue, card); if (ret) goto err_putdisk; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 6edffeed9953..35cc138b096d 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -89,9 +89,9 @@ void mmc_cqe_recovery_notifier(struct mmc_request *mrq) struct mmc_queue *mq = q->queuedata; unsigned long flags; - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); __mmc_cqe_recovery_notifier(mq); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); } static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req) @@ -128,14 +128,14 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req, unsigned long flags; int ret; - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&mq->lock, flags); if (mq->recovery_needed || !mq->use_cqe) ret = BLK_EH_RESET_TIMER; else ret = mmc_cqe_timed_out(req); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&mq->lock, flags); return ret; } @@ -157,9 +157,9 @@ static void mmc_mq_recovery_handler(struct work_struct *work) mq->in_recovery = false; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&mq->lock); mq->recovery_needed = false; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); mmc_put_card(mq->card, &mq->ctx); @@ -258,10 +258,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, issue_type = mmc_issue_type(mq, req); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&mq->lock); if (mq->recovery_needed || mq->busy) { - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); return BLK_STS_RESOURCE; } @@ -269,7 +269,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, case MMC_ISSUE_DCMD: if (mmc_cqe_dcmd_busy(mq)) { mq->cqe_busy |= MMC_CQE_DCMD_BUSY; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); return BLK_STS_RESOURCE; } break; @@ -294,7 +294,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, get_card = (mmc_tot_in_flight(mq) == 1); cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); if (!(req->rq_flags & RQF_DONTPREP)) { req_to_mmc_queue_req(req)->retries = 0; @@ -328,12 +328,12 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (issued != MMC_REQ_STARTED) { bool put_card = false; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&mq->lock); mq->in_flight[issue_type] -= 1; if (mmc_tot_in_flight(mq) == 0) put_card = true; mq->busy = false; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&mq->lock); if (put_card) mmc_put_card(card, &mq->ctx); } else { @@ -378,14 +378,37 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) init_waitqueue_head(&mq->wait); } -static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth, - const struct blk_mq_ops *mq_ops, spinlock_t *lock) +/* Set queue depth to get a reasonable value for q->nr_requests */ +#define MMC_QUEUE_DEPTH 64 + +/** + * mmc_init_queue - initialise a queue structure. + * @mq: mmc queue + * @card: mmc card to attach this queue + * + * Initialise a MMC card request queue. + */ +int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) { + struct mmc_host *host = card->host; int ret; + mq->card = card; + mq->use_cqe = host->cqe_enabled; + + spin_lock_init(&mq->lock); + memset(&mq->tag_set, 0, sizeof(mq->tag_set)); - mq->tag_set.ops = mq_ops; - mq->tag_set.queue_depth = q_depth; + mq->tag_set.ops = &mmc_mq_ops; + /* + * The queue depth for CQE must match the hardware because the request + * tag is used to index the hardware queue. + */ + if (mq->use_cqe) + mq->tag_set.queue_depth = + min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth); + else + mq->tag_set.queue_depth = MMC_QUEUE_DEPTH; mq->tag_set.numa_node = NUMA_NO_NODE; mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; @@ -403,68 +426,17 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth, goto free_tag_set; } - mq->queue->queue_lock = lock; mq->queue->queuedata = mq; + blk_queue_rq_timeout(mq->queue, 60 * HZ); + mmc_setup_queue(mq, card); return 0; free_tag_set: blk_mq_free_tag_set(&mq->tag_set); - return ret; } -/* Set queue depth to get a reasonable value for q->nr_requests */ -#define MMC_QUEUE_DEPTH 64 - -static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card, - spinlock_t *lock) -{ - struct mmc_host *host = card->host; - int q_depth; - int ret; - - /* - * The queue depth for CQE must match the hardware because the request - * tag is used to index the hardware queue. - */ - if (mq->use_cqe) - q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth); - else - q_depth = MMC_QUEUE_DEPTH; - - ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock); - if (ret) - return ret; - - blk_queue_rq_timeout(mq->queue, 60 * HZ); - - mmc_setup_queue(mq, card); - - return 0; -} - -/** - * mmc_init_queue - initialise a queue structure. - * @mq: mmc queue - * @card: mmc card to attach this queue - * @lock: queue lock - * @subname: partition subname - * - * Initialise a MMC card request queue. - */ -int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, - spinlock_t *lock, const char *subname) -{ - struct mmc_host *host = card->host; - - mq->card = card; - - mq->use_cqe = host->cqe_enabled; - - return mmc_mq_init(mq, card, lock); -} - void mmc_queue_suspend(struct mmc_queue *mq) { blk_mq_quiesce_queue(mq->queue); diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 9bf3c9245075..fd11491ced9f 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -77,6 +77,7 @@ struct mmc_queue { struct blk_mq_tag_set tag_set; struct mmc_blk_data *blkdata; struct request_queue *queue; + spinlock_t lock; int in_flight[MMC_ISSUE_MAX]; unsigned int cqe_busy; #define MMC_CQE_DCMD_BUSY BIT(0) @@ -95,8 +96,7 @@ struct mmc_queue { struct work_struct complete_work; }; -extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, - const char *); +extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *); extern void mmc_cleanup_queue(struct mmc_queue *); extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 0e39e3d1846f..f7019294740c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -393,7 +393,7 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL); + q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); if (!q) return -ENOMEM; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 962012135b62..1310753a01e5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -97,7 +97,6 @@ static dev_t nvme_chr_devt; static struct class *nvme_class; static struct class *nvme_subsys_class; -static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, @@ -245,12 +244,31 @@ static inline bool nvme_req_needs_retry(struct request *req) return true; } +static void nvme_retry_req(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + unsigned long delay = 0; + u16 crd; + + /* The mask and shift result must be <= 3 */ + crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11; + if (ns && crd) + delay = ns->ctrl->crdt[crd - 1] * 100; + + nvme_req(req)->retries++; + blk_mq_requeue_request(req, false); + blk_mq_delay_kick_requeue_list(req->q, delay); +} + void nvme_complete_rq(struct request *req) { blk_status_t status = nvme_error_status(req); trace_nvme_complete_rq(req); + if (nvme_req(req)->ctrl->kas) + nvme_req(req)->ctrl->comp_seen = true; + if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { if ((req->cmd_flags & REQ_NVME_MPATH) && blk_path_error(status)) { @@ -259,8 +277,7 @@ void nvme_complete_rq(struct request *req) } if (!blk_queue_dying(req->q)) { - nvme_req(req)->retries++; - blk_mq_requeue_request(req, true); + nvme_retry_req(req); return; } } @@ -268,14 +285,14 @@ void nvme_complete_rq(struct request *req) } EXPORT_SYMBOL_GPL(nvme_complete_rq); -void nvme_cancel_request(struct request *req, void *data, bool reserved) +bool nvme_cancel_request(struct request *req, void *data, bool reserved) { dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); nvme_req(req)->status = NVME_SC_ABORT_REQ; blk_mq_complete_request(req); - + return true; } EXPORT_SYMBOL_GPL(nvme_cancel_request); @@ -536,7 +553,6 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { - memset(cmnd, 0, sizeof(*cmnd)); cmnd->common.opcode = nvme_cmd_flush; cmnd->common.nsid = cpu_to_le32(ns->head->ns_id); } @@ -569,7 +585,6 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, return BLK_STS_IOERR; } - memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id); cmnd->dsm.nr = cpu_to_le32(segments - 1); @@ -598,7 +613,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, if (req->cmd_flags & REQ_RAHEAD) dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; - memset(cmnd, 0, sizeof(*cmnd)); cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); @@ -663,6 +677,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, nvme_clear_nvme_request(req); + memset(cmd, 0, sizeof(*cmd)); switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: @@ -843,6 +858,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) return; } + ctrl->comp_seen = false; spin_lock_irqsave(&ctrl->lock, flags); if (ctrl->state == NVME_CTRL_LIVE || ctrl->state == NVME_CTRL_CONNECTING) @@ -873,6 +889,15 @@ static void nvme_keep_alive_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_ctrl, ka_work); + bool comp_seen = ctrl->comp_seen; + + if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { + dev_dbg(ctrl->device, + "reschedule traffic based keep-alive timer\n"); + ctrl->comp_seen = false; + schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + return; + } if (nvme_keep_alive(ctrl)) { /* allocation failure, reset the controller */ @@ -1881,6 +1906,26 @@ static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) return ret; } +static int nvme_configure_acre(struct nvme_ctrl *ctrl) +{ + struct nvme_feat_host_behavior *host; + int ret; + + /* Don't bother enabling the feature if retry delay is not reported */ + if (!ctrl->crdt[0]) + return 0; + + host = kzalloc(sizeof(*host), GFP_KERNEL); + if (!host) + return 0; + + host->acre = NVME_ENABLE_ACRE; + ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0, + host, sizeof(*host), NULL); + kfree(host); + return ret; +} + static int nvme_configure_apst(struct nvme_ctrl *ctrl) { /* @@ -2402,6 +2447,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; } + ctrl->crdt[0] = le16_to_cpu(id->crdt1); + ctrl->crdt[1] = le16_to_cpu(id->crdt2); + ctrl->crdt[2] = le16_to_cpu(id->crdt3); + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->oncs = le16_to_cpup(&id->oncs); ctrl->oaes = le32_to_cpu(id->oaes); @@ -2419,6 +2468,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); ctrl->max_namespaces = le32_to_cpu(id->mnan); + ctrl->ctratt = le32_to_cpu(id->ctratt); if (id->rtd3e) { /* us -> s */ @@ -2501,6 +2551,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; + ret = nvme_configure_acre(ctrl); + if (ret < 0) + return ret; + ctrl->identified = true; return 0; @@ -2776,6 +2830,7 @@ static ssize_t field##_show(struct device *dev, \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); nvme_show_int_function(cntlid); +nvme_show_int_function(numa_node); static ssize_t nvme_sysfs_delete(struct device *dev, struct device_attribute *attr, const char *buf, @@ -2855,6 +2910,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_subsysnqn.attr, &dev_attr_address.attr, &dev_attr_state.attr, + &dev_attr_numa_node.attr, NULL }; @@ -3065,7 +3121,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) struct gendisk *disk; struct nvme_id_ns *id; char disk_name[DISK_NAME_LEN]; - int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT; + int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index bd0969db6225..10074ac7731b 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -392,6 +392,9 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 : cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000); + if (ctrl->opts->disable_sqflow) + cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -451,6 +454,9 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) cmd.connect.qid = cpu_to_le16(qid); cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize); + if (ctrl->opts->disable_sqflow) + cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -607,6 +613,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, { NVMF_OPT_HOST_ID, "hostid=%s" }, { NVMF_OPT_DUP_CONNECT, "duplicate_connect" }, + { NVMF_OPT_DISABLE_SQFLOW, "disable_sqflow" }, { NVMF_OPT_ERR, NULL } }; @@ -817,6 +824,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, case NVMF_OPT_DUP_CONNECT: opts->duplicate_connect = true; break; + case NVMF_OPT_DISABLE_SQFLOW: + opts->disable_sqflow = true; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -933,7 +943,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); #define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN) #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ - NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT) + NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\ + NVMF_OPT_DISABLE_SQFLOW) static struct nvme_ctrl * nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 6ea6275f332a..ecd9a006a091 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -58,6 +58,7 @@ enum { NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, NVMF_OPT_HOST_ID = 1 << 12, NVMF_OPT_DUP_CONNECT = 1 << 13, + NVMF_OPT_DISABLE_SQFLOW = 1 << 14, }; /** @@ -101,6 +102,7 @@ struct nvmf_ctrl_options { unsigned int kato; struct nvmf_host *host; int max_reconnects; + bool disable_sqflow; }; /* diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index feb86b59170e..b79e41938513 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2326,38 +2326,6 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); } -static struct blk_mq_tags * -nvme_fc_tagset(struct nvme_fc_queue *queue) -{ - if (queue->qnum == 0) - return queue->ctrl->admin_tag_set.tags[queue->qnum]; - - return queue->ctrl->tag_set.tags[queue->qnum - 1]; -} - -static int -nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) - -{ - struct nvme_fc_queue *queue = hctx->driver_data; - struct nvme_fc_ctrl *ctrl = queue->ctrl; - struct request *req; - struct nvme_fc_fcp_op *op; - - req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); - if (!req) - return 0; - - op = blk_mq_rq_to_pdu(req); - - if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && - (ctrl->lport->ops->poll_queue)) - ctrl->lport->ops->poll_queue(&ctrl->lport->localport, - queue->lldd_handle); - - return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); -} - static void nvme_fc_submit_async_event(struct nvme_ctrl *arg) { @@ -2410,7 +2378,7 @@ nvme_fc_complete_rq(struct request *rq) * status. The done path will return the io request back to the block * layer with an error status. */ -static void +static bool nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) { struct nvme_ctrl *nctrl = data; @@ -2418,6 +2386,7 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); __nvme_fc_abort_op(ctrl, op); + return true; } @@ -2427,7 +2396,6 @@ static const struct blk_mq_ops nvme_fc_mq_ops = { .init_request = nvme_fc_init_request, .exit_request = nvme_fc_exit_request, .init_hctx = nvme_fc_init_hctx, - .poll = nvme_fc_poll, .timeout = nvme_fc_timeout, }; @@ -2457,7 +2425,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->tag_set.ops = &nvme_fc_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; + ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, @@ -3050,6 +3018,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; + ctrl->ctrl.numa_node = dev_to_node(lport->dev); INIT_LIST_HEAD(&ctrl->ctrl_list); ctrl->lport = lport; ctrl->rport = rport; @@ -3090,7 +3059,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->admin_tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9901afd804ce..183ec17ba067 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -141,7 +141,7 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) test_bit(NVME_NS_ANA_PENDING, &ns->flags)) continue; - distance = node_distance(node, dev_to_node(ns->ctrl->dev)); + distance = node_distance(node, ns->ctrl->numa_node); switch (ns->ana_state) { case NVME_ANA_OPTIMIZED: @@ -220,21 +220,6 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, return ret; } -static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) -{ - struct nvme_ns_head *head = q->queuedata; - struct nvme_ns *ns; - bool found = false; - int srcu_idx; - - srcu_idx = srcu_read_lock(&head->srcu); - ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu); - if (likely(ns && nvme_path_is_optimized(ns))) - found = ns->queue->poll_fn(q, qc); - srcu_read_unlock(&head->srcu, srcu_idx); - return found; -} - static void nvme_requeue_work(struct work_struct *work) { struct nvme_ns_head *head = @@ -276,12 +261,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) return 0; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); + q = blk_alloc_queue_node(GFP_KERNEL, ctrl->numa_node); if (!q) goto out; q->queuedata = head; blk_queue_make_request(q, nvme_ns_head_make_request); - q->poll_fn = nvme_ns_head_poll; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 081cbdcce880..8e0ec365ce8d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -145,6 +145,7 @@ enum nvme_ctrl_state { }; struct nvme_ctrl { + bool comp_seen; enum nvme_ctrl_state state; bool identified; spinlock_t lock; @@ -153,6 +154,7 @@ struct nvme_ctrl { struct request_queue *connect_q; struct device *dev; int instance; + int numa_node; struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; @@ -179,6 +181,7 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; u32 max_segments; + u16 crdt[3]; u16 oncs; u16 oacs; u16 nssa; @@ -193,6 +196,7 @@ struct nvme_ctrl { u8 apsta; u32 oaes; u32 aen_result; + u32 ctratt; unsigned int shutdown_timeout; unsigned int kato; bool subsystem; @@ -364,15 +368,6 @@ static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {} static inline void nvme_should_fail(struct request *req) {} #endif -static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) -{ - u32 val = 0; - - if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val)) - return false; - return val & NVME_CSTS_RDY; -} - static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { if (!ctrl->subsystem) @@ -408,7 +403,7 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) } void nvme_complete_rq(struct request *req); -void nvme_cancel_request(struct request *req, void *data, bool reserved); +bool nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c33bb201b884..7732c4979a4e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -74,6 +74,22 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); +static int queue_count_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops queue_count_ops = { + .set = queue_count_set, + .get = param_get_int, +}; + +static int write_queues; +module_param_cb(write_queues, &queue_count_ops, &write_queues, 0644); +MODULE_PARM_DESC(write_queues, + "Number of queues to use for writes. If not set, reads and writes " + "will share a queue set."); + +static int poll_queues = 0; +module_param_cb(poll_queues, &queue_count_ops, &poll_queues, 0644); +MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); + struct nvme_dev; struct nvme_queue; @@ -92,6 +108,7 @@ struct nvme_dev { struct dma_pool *prp_small_pool; unsigned online_queues; unsigned max_qid; + unsigned io_queues[HCTX_MAX_TYPES]; unsigned int num_vecs; int q_depth; u32 db_stride; @@ -105,7 +122,6 @@ struct nvme_dev { u32 cmbsz; u32 cmbloc; struct nvme_ctrl ctrl; - struct completion ioq_wait; mempool_t *iod_mempool; @@ -134,6 +150,17 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp) return param_set_int(val, kp); } +static int queue_count_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (n > num_possible_cpus()) + n = num_possible_cpus(); + + return param_set_int(val, kp); +} + static inline unsigned int sq_idx(unsigned int qid, u32 stride) { return qid * 2 * stride; @@ -158,8 +185,8 @@ struct nvme_queue { struct nvme_dev *dev; spinlock_t sq_lock; struct nvme_command *sq_cmds; - bool sq_cmds_is_io; - spinlock_t cq_lock ____cacheline_aligned_in_smp; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; @@ -168,14 +195,20 @@ struct nvme_queue { u16 q_depth; s16 cq_vector; u16 sq_tail; + u16 last_sq_tail; u16 cq_head; u16 last_cq_head; u16 qid; u8 cq_phase; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 u32 *dbbuf_sq_db; u32 *dbbuf_cq_db; u32 *dbbuf_sq_ei; u32 *dbbuf_cq_ei; + struct completion delete_done; }; /* @@ -218,9 +251,20 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); } +static unsigned int max_io_queues(void) +{ + return num_possible_cpus() + write_queues + poll_queues; +} + +static unsigned int max_queue_count(void) +{ + /* IO queues + admin queue */ + return 1 + max_io_queues(); +} + static inline unsigned int nvme_dbbuf_size(u32 stride) { - return ((num_possible_cpus() + 1) * 8 * stride); + return (max_queue_count() * 8 * stride); } static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) @@ -431,30 +475,94 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, return 0; } +static int queue_irq_offset(struct nvme_dev *dev) +{ + /* if we have more than 1 vec, admin queue offsets us by 1 */ + if (dev->num_vecs > 1) + return 1; + + return 0; +} + static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; + int i, qoff, offset; + + offset = queue_irq_offset(dev); + for (i = 0, qoff = 0; i < set->nr_maps; i++) { + struct blk_mq_queue_map *map = &set->map[i]; + + map->nr_queues = dev->io_queues[i]; + if (!map->nr_queues) { + BUG_ON(i == HCTX_TYPE_DEFAULT); + + /* shared set, resuse read set parameters */ + map->nr_queues = dev->io_queues[HCTX_TYPE_DEFAULT]; + qoff = 0; + offset = queue_irq_offset(dev); + } + + /* + * The poll queue(s) doesn't have an IRQ (and hence IRQ + * affinity), so use the regular blk-mq cpu mapping + */ + map->queue_offset = qoff; + if (i != HCTX_TYPE_POLL) + blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); + else + blk_mq_map_queues(map); + qoff += map->nr_queues; + offset += map->nr_queues; + } - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), - dev->num_vecs > 1 ? 1 /* admin queue */ : 0); + return 0; +} + +/* + * Write sq tail if we are asked to, or if the next command would wrap. + */ +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) +{ + if (!write_sq) { + u16 next_tail = nvmeq->sq_tail + 1; + + if (next_tail == nvmeq->q_depth) + next_tail = 0; + if (next_tail != nvmeq->last_sq_tail) + return; + } + + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, + nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) + writel(nvmeq->sq_tail, nvmeq->q_db); + nvmeq->last_sq_tail = nvmeq->sq_tail; } /** * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use * @cmd: The command to send + * @write_sq: whether to write to the SQ doorbell */ -static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, + bool write_sq) { spin_lock(&nvmeq->sq_lock); - memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); - if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, - nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) - writel(nvmeq->sq_tail, nvmeq->q_db); + nvme_write_sq_db(nvmeq, write_sq); + spin_unlock(&nvmeq->sq_lock); +} + +static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + spin_lock(&nvmeq->sq_lock); + if (nvmeq->sq_tail != nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq, true); spin_unlock(&nvmeq->sq_lock); } @@ -822,7 +930,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * We should not need to do this, but we're still using this to * ensure we can drain requests on a dying queue. */ - if (unlikely(nvmeq->cq_vector < 0)) + if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) return BLK_STS_IOERR; ret = nvme_setup_cmd(ns, req, &cmnd); @@ -840,7 +948,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } blk_mq_start_request(req); - nvme_submit_cmd(nvmeq, &cmnd); + nvme_submit_cmd(nvmeq, &cmnd, bd->last); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -919,15 +1027,15 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) } } -static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, - u16 *end, int tag) +static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, + u16 *end, unsigned int tag) { - bool found = false; + int found = 0; *start = nvmeq->cq_head; - while (!found && nvme_cqe_pending(nvmeq)) { - if (nvmeq->cqes[nvmeq->cq_head].command_id == tag) - found = true; + while (nvme_cqe_pending(nvmeq)) { + if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag) + found++; nvme_update_cq_head(nvmeq); } *end = nvmeq->cq_head; @@ -943,12 +1051,16 @@ static irqreturn_t nvme_irq(int irq, void *data) irqreturn_t ret = IRQ_NONE; u16 start, end; - spin_lock(&nvmeq->cq_lock); + /* + * The rmb/wmb pair ensures we see all updates from a previous run of + * the irq handler, even if that was on another CPU. + */ + rmb(); if (nvmeq->cq_head != nvmeq->last_cq_head) ret = IRQ_HANDLED; nvme_process_cq(nvmeq, &start, &end, -1); nvmeq->last_cq_head = nvmeq->cq_head; - spin_unlock(&nvmeq->cq_lock); + wmb(); if (start != end) { nvme_complete_cqes(nvmeq, start, end); @@ -966,27 +1078,50 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_NONE; } -static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) +/* + * Poll for completions any queue, including those not dedicated to polling. + * Can be called from any context. + */ +static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) { + struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); u16 start, end; - bool found; + int found; - if (!nvme_cqe_pending(nvmeq)) - return 0; - - spin_lock_irq(&nvmeq->cq_lock); + /* + * For a poll queue we need to protect against the polling thread + * using the CQ lock. For normal interrupt driven threads we have + * to disable the interrupt to avoid racing with it. + */ + if (nvmeq->cq_vector == -1) + spin_lock(&nvmeq->cq_poll_lock); + else + disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); found = nvme_process_cq(nvmeq, &start, &end, tag); - spin_unlock_irq(&nvmeq->cq_lock); + if (nvmeq->cq_vector == -1) + spin_unlock(&nvmeq->cq_poll_lock); + else + enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); nvme_complete_cqes(nvmeq, start, end); return found; } -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int nvme_poll(struct blk_mq_hw_ctx *hctx) { struct nvme_queue *nvmeq = hctx->driver_data; + u16 start, end; + bool found; + + if (!nvme_cqe_pending(nvmeq)) + return 0; - return __nvme_poll(nvmeq, tag); + spin_lock(&nvmeq->cq_poll_lock); + found = nvme_process_cq(nvmeq, &start, &end, -1); + spin_unlock(&nvmeq->cq_poll_lock); + + nvme_complete_cqes(nvmeq, start, end); + return found; } static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) @@ -998,7 +1133,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - nvme_submit_cmd(nvmeq, &c); + nvme_submit_cmd(nvmeq, &c, true); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -1016,7 +1151,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq, s16 vector) { struct nvme_command c; - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; + int flags = NVME_QUEUE_PHYS_CONTIG; + + if (vector != -1) + flags |= NVME_CQ_IRQ_ENABLED; /* * Note: we (ab)use the fact that the prp fields survive if no data @@ -1028,7 +1166,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - c.create_cq.irq_vector = cpu_to_le16(vector); + if (vector != -1) + c.create_cq.irq_vector = cpu_to_le16(vector); + else + c.create_cq.irq_vector = 0; return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1157,7 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) /* * Did we miss an interrupt? */ - if (__nvme_poll(nvmeq, req->tag)) { + if (nvme_poll_irqdisable(nvmeq, req->tag)) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); @@ -1237,17 +1378,15 @@ static void nvme_free_queue(struct nvme_queue *nvmeq) { dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); + if (!nvmeq->sq_cmds) + return; - if (nvmeq->sq_cmds) { - if (nvmeq->sq_cmds_is_io) - pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev), - nvmeq->sq_cmds, - SQ_SIZE(nvmeq->q_depth)); - else - dma_free_coherent(nvmeq->q_dmadev, - SQ_SIZE(nvmeq->q_depth), - nvmeq->sq_cmds, - nvmeq->sq_dma_addr); + if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { + pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev), + nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth)); + } else { + dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), + nvmeq->sq_cmds, nvmeq->sq_dma_addr); } } @@ -1267,47 +1406,32 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) */ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { - int vector; - - spin_lock_irq(&nvmeq->cq_lock); - if (nvmeq->cq_vector == -1) { - spin_unlock_irq(&nvmeq->cq_lock); + if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags)) return 1; - } - vector = nvmeq->cq_vector; - nvmeq->dev->online_queues--; - nvmeq->cq_vector = -1; - spin_unlock_irq(&nvmeq->cq_lock); - /* - * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without - * having to grab the lock. - */ + /* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */ mb(); + nvmeq->dev->online_queues--; if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); - - pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq); - + if (nvmeq->cq_vector == -1) + return 0; + pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); + nvmeq->cq_vector = -1; return 0; } static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; - u16 start, end; if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); - spin_lock_irq(&nvmeq->cq_lock); - nvme_process_cq(nvmeq, &start, &end, -1); - spin_unlock_irq(&nvmeq->cq_lock); - - nvme_complete_cqes(nvmeq, start, end); + nvme_poll_irqdisable(nvmeq, -1); } static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, @@ -1343,15 +1467,14 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth)); nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, nvmeq->sq_cmds); - nvmeq->sq_cmds_is_io = true; - } - - if (!nvmeq->sq_cmds) { - nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), - &nvmeq->sq_dma_addr, GFP_KERNEL); - nvmeq->sq_cmds_is_io = false; + if (nvmeq->sq_dma_addr) { + set_bit(NVMEQ_SQ_CMB, &nvmeq->flags); + return 0; + } } + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), + &nvmeq->sq_dma_addr, GFP_KERNEL); if (!nvmeq->sq_cmds) return -ENOMEM; return 0; @@ -1375,7 +1498,7 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; spin_lock_init(&nvmeq->sq_lock); - spin_lock_init(&nvmeq->cq_lock); + spin_lock_init(&nvmeq->cq_poll_lock); nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; @@ -1411,28 +1534,34 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) { struct nvme_dev *dev = nvmeq->dev; - spin_lock_irq(&nvmeq->cq_lock); nvmeq->sq_tail = 0; + nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; - spin_unlock_irq(&nvmeq->cq_lock); + wmb(); /* ensure the first interrupt sees the initialization */ } -static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) +static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) { struct nvme_dev *dev = nvmeq->dev; int result; s16 vector; + clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); + /* * A queue's vector matches the queue identifier unless the controller * has only one vector available. */ - vector = dev->num_vecs == 1 ? 0 : qid; + if (!polled) + vector = dev->num_vecs == 1 ? 0 : qid; + else + vector = -1; + result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result) return result; @@ -1443,17 +1572,16 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) else if (result) goto release_cq; - /* - * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will - * invoke free_irq for it and cause a 'Trying to free already-free IRQ - * xxx' warning if the create CQ/SQ command times out. - */ nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); - result = queue_request_irq(nvmeq); - if (result < 0) - goto release_sq; + if (vector != -1) { + result = queue_request_irq(nvmeq); + if (result < 0) + goto release_sq; + } + + set_bit(NVMEQ_ENABLED, &nvmeq->flags); return result; release_sq: @@ -1477,6 +1605,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { static const struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, + .commit_rqs = nvme_commit_rqs, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, .map_queues = nvme_pci_map_queues, @@ -1602,12 +1731,13 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) return result; } + set_bit(NVMEQ_ENABLED, &nvmeq->flags); return result; } static int nvme_create_io_queues(struct nvme_dev *dev) { - unsigned i, max; + unsigned i, max, rw_queues; int ret = 0; for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { @@ -1618,8 +1748,17 @@ static int nvme_create_io_queues(struct nvme_dev *dev) } max = min(dev->max_qid, dev->ctrl.queue_count - 1); + if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) { + rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] + + dev->io_queues[HCTX_TYPE_READ]; + } else { + rw_queues = max; + } + for (i = dev->online_queues; i <= max; i++) { - ret = nvme_create_queue(&dev->queues[i], i); + bool polled = i > rw_queues; + + ret = nvme_create_queue(&dev->queues[i], i, polled); if (ret) break; } @@ -1891,6 +2030,116 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) return ret; } +static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) +{ + unsigned int this_w_queues = write_queues; + unsigned int this_p_queues = poll_queues; + + /* + * Setup read/write queue split + */ + if (nr_io_queues == 1) { + dev->io_queues[HCTX_TYPE_DEFAULT] = 1; + dev->io_queues[HCTX_TYPE_READ] = 0; + dev->io_queues[HCTX_TYPE_POLL] = 0; + return; + } + + /* + * Configure number of poll queues, if set + */ + if (this_p_queues) { + /* + * We need at least one queue left. With just one queue, we'll + * have a single shared read/write set. + */ + if (this_p_queues >= nr_io_queues) { + this_w_queues = 0; + this_p_queues = nr_io_queues - 1; + } + + dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; + nr_io_queues -= this_p_queues; + } else + dev->io_queues[HCTX_TYPE_POLL] = 0; + + /* + * If 'write_queues' is set, ensure it leaves room for at least + * one read queue + */ + if (this_w_queues >= nr_io_queues) + this_w_queues = nr_io_queues - 1; + + /* + * If 'write_queues' is set to zero, reads and writes will share + * a queue set. + */ + if (!this_w_queues) { + dev->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues; + dev->io_queues[HCTX_TYPE_READ] = 0; + } else { + dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues; + dev->io_queues[HCTX_TYPE_READ] = nr_io_queues - this_w_queues; + } +} + +static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + int irq_sets[2]; + struct irq_affinity affd = { + .pre_vectors = 1, + .nr_sets = ARRAY_SIZE(irq_sets), + .sets = irq_sets, + }; + int result = 0; + + /* + * For irq sets, we have to ask for minvec == maxvec. This passes + * any reduction back to us, so we can adjust our queue counts and + * IRQ vector needs. + */ + do { + nvme_calc_io_queues(dev, nr_io_queues); + irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT]; + irq_sets[1] = dev->io_queues[HCTX_TYPE_READ]; + if (!irq_sets[1]) + affd.nr_sets = 1; + + /* + * If we got a failure and we're down to asking for just + * 1 + 1 queues, just ask for a single vector. We'll share + * that between the single IO queue and the admin queue. + */ + if (!(result < 0 && nr_io_queues == 1)) + nr_io_queues = irq_sets[0] + irq_sets[1] + 1; + + result = pci_alloc_irq_vectors_affinity(pdev, nr_io_queues, + nr_io_queues, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); + + /* + * Need to reduce our vec counts. If we get ENOSPC, the + * platform should support mulitple vecs, we just need + * to decrease our ask. If we get EINVAL, the platform + * likely does not. Back down to ask for just one vector. + */ + if (result == -ENOSPC) { + nr_io_queues--; + if (!nr_io_queues) + return result; + continue; + } else if (result == -EINVAL) { + nr_io_queues = 1; + continue; + } else if (result <= 0) + return -EIO; + break; + } while (1); + + return result; +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = &dev->queues[0]; @@ -1898,17 +2147,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; - struct irq_affinity affd = { - .pre_vectors = 1 - }; - - nr_io_queues = num_possible_cpus(); + nr_io_queues = max_io_queues(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; if (nr_io_queues == 0) return 0; + + clear_bit(NVMEQ_ENABLED, &adminq->flags); if (dev->cmb_use_sqes) { result = nvme_cmb_qdepth(dev, nr_io_queues, @@ -1937,12 +2184,19 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * setting up the full range we need. */ pci_free_irq_vectors(pdev); - result = pci_alloc_irq_vectors_affinity(pdev, 1, nr_io_queues + 1, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); + + result = nvme_setup_irqs(dev, nr_io_queues); if (result <= 0) return -EIO; + dev->num_vecs = result; - dev->max_qid = max(result - 1, 1); + result = max(result - 1, 1); + dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; + + dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", + dev->io_queues[HCTX_TYPE_DEFAULT], + dev->io_queues[HCTX_TYPE_READ], + dev->io_queues[HCTX_TYPE_POLL]); /* * Should investigate if there's a performance win from allocating @@ -1956,6 +2210,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) adminq->cq_vector = -1; return result; } + set_bit(NVMEQ_ENABLED, &adminq->flags); return nvme_create_io_queues(dev); } @@ -1964,23 +2219,15 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error) struct nvme_queue *nvmeq = req->end_io_data; blk_mq_free_request(req); - complete(&nvmeq->dev->ioq_wait); + complete(&nvmeq->delete_done); } static void nvme_del_cq_end(struct request *req, blk_status_t error) { struct nvme_queue *nvmeq = req->end_io_data; - u16 start, end; - - if (!error) { - unsigned long flags; - spin_lock_irqsave(&nvmeq->cq_lock, flags); - nvme_process_cq(nvmeq, &start, &end, -1); - spin_unlock_irqrestore(&nvmeq->cq_lock, flags); - - nvme_complete_cqes(nvmeq, start, end); - } + if (error) + set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); nvme_del_queue_end(req, error); } @@ -2002,37 +2249,44 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) req->timeout = ADMIN_TIMEOUT; req->end_io_data = nvmeq; + init_completion(&nvmeq->delete_done); blk_execute_rq_nowait(q, NULL, req, false, opcode == nvme_admin_delete_cq ? nvme_del_cq_end : nvme_del_queue_end); return 0; } -static void nvme_disable_io_queues(struct nvme_dev *dev) +static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) { - int pass, queues = dev->online_queues - 1; + int nr_queues = dev->online_queues - 1, sent = 0; unsigned long timeout; - u8 opcode = nvme_admin_delete_sq; - - for (pass = 0; pass < 2; pass++) { - int sent = 0, i = queues; - reinit_completion(&dev->ioq_wait); retry: - timeout = ADMIN_TIMEOUT; - for (; i > 0; i--, sent++) - if (nvme_delete_queue(&dev->queues[i], opcode)) - break; - - while (sent--) { - timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout); - if (timeout == 0) - return; - if (i) - goto retry; - } - opcode = nvme_admin_delete_cq; + timeout = ADMIN_TIMEOUT; + while (nr_queues > 0) { + if (nvme_delete_queue(&dev->queues[nr_queues], opcode)) + break; + nr_queues--; + sent++; } + while (sent) { + struct nvme_queue *nvmeq = &dev->queues[nr_queues + sent]; + + timeout = wait_for_completion_io_timeout(&nvmeq->delete_done, + timeout); + if (timeout == 0) + return false; + + /* handle any remaining CQEs */ + if (opcode == nvme_admin_delete_cq && + !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags)) + nvme_poll_irqdisable(nvmeq, -1); + + sent--; + if (nr_queues) + goto retry; + } + return true; } /* @@ -2045,6 +2299,7 @@ static int nvme_dev_add(struct nvme_dev *dev) if (!dev->ctrl.tagset) { dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; + dev->tagset.nr_maps = HCTX_MAX_TYPES; dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = @@ -2187,7 +2442,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { - nvme_disable_io_queues(dev); + if (nvme_disable_io_queues(dev, nvme_admin_delete_sq)) + nvme_disable_io_queues(dev, nvme_admin_delete_cq); nvme_disable_admin_queue(dev, shutdown); } for (i = dev->ctrl.queue_count - 1; i >= 0; i--) @@ -2491,8 +2747,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!dev) return -ENOMEM; - dev->queues = kcalloc_node(num_possible_cpus() + 1, - sizeof(struct nvme_queue), GFP_KERNEL, node); + dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue), + GFP_KERNEL, node); if (!dev->queues) goto free; @@ -2506,7 +2762,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); mutex_init(&dev->shutdown_lock); - init_completion(&dev->ioq_wait); result = nvme_setup_prp_pools(dev); if (result) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ab6ec7295bf9..f2db848f6985 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -694,7 +694,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->ops = &nvme_rdma_admin_mq_ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ - set->numa_node = NUMA_NO_NODE; + set->numa_node = nctrl->numa_node; set->cmd_size = sizeof(struct nvme_rdma_request) + SG_CHUNK_SIZE * sizeof(struct scatterlist); set->driver_data = ctrl; @@ -707,7 +707,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->ops = &nvme_rdma_mq_ops; set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ - set->numa_node = NUMA_NO_NODE; + set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; set->cmd_size = sizeof(struct nvme_rdma_request) + SG_CHUNK_SIZE * sizeof(struct scatterlist); @@ -763,6 +763,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, return error; ctrl->device = ctrl->queues[0].device; + ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); @@ -1411,12 +1412,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) WARN_ON_ONCE(ret); } -static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, - struct nvme_completion *cqe, struct ib_wc *wc, int tag) +static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, + struct nvme_completion *cqe, struct ib_wc *wc) { struct request *rq; struct nvme_rdma_request *req; - int ret = 0; rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id); if (!rq) { @@ -1424,7 +1424,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, "tag 0x%x on QP %#x not found\n", cqe->command_id, queue->qp->qp_num); nvme_rdma_error_recovery(queue->ctrl); - return ret; + return; } req = blk_mq_rq_to_pdu(rq); @@ -1439,6 +1439,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, nvme_rdma_error_recovery(queue->ctrl); } } else if (req->mr) { + int ret; + ret = nvme_rdma_inv_rkey(queue, req); if (unlikely(ret < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1447,19 +1449,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, nvme_rdma_error_recovery(queue->ctrl); } /* the local invalidation completion will end the request */ - return 0; + return; } - if (refcount_dec_and_test(&req->ref)) { - if (rq->tag == tag) - ret = 1; + if (refcount_dec_and_test(&req->ref)) nvme_end_request(rq, req->status, req->result); - } - - return ret; } -static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) +static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvme_rdma_qe *qe = container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); @@ -1467,11 +1464,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) struct ib_device *ibdev = queue->device->dev; struct nvme_completion *cqe = qe->data; const size_t len = sizeof(struct nvme_completion); - int ret = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "RECV"); - return 0; + return; } ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); @@ -1486,16 +1482,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); else - ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag); + nvme_rdma_process_nvme_rsp(queue, cqe, wc); ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE); nvme_rdma_post_recv(queue, qe); - return ret; -} - -static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) -{ - __nvme_rdma_recv_done(cq, wc, -1); } static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) @@ -1749,27 +1739,6 @@ err: return BLK_STS_IOERR; } -static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) -{ - struct nvme_rdma_queue *queue = hctx->driver_data; - struct ib_cq *cq = queue->ib_cq; - struct ib_wc wc; - int found = 0; - - while (ib_poll_cq(cq, 1, &wc) > 0) { - struct ib_cqe *cqe = wc.wr_cqe; - - if (cqe) { - if (cqe->done == nvme_rdma_recv_done) - found |= __nvme_rdma_recv_done(cq, &wc, tag); - else - cqe->done(cq, &wc); - } - } - - return found; -} - static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); @@ -1791,7 +1760,6 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = { .init_request = nvme_rdma_init_request, .exit_request = nvme_rdma_exit_request, .init_hctx = nvme_rdma_init_hctx, - .poll = nvme_rdma_poll, .timeout = nvme_rdma_timeout, .map_queues = nvme_rdma_map_queues, }; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 1179f6314323..753515fc8028 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -19,19 +19,6 @@ #include <asm/unaligned.h> #include "nvmet.h" -/* - * This helper allows us to clear the AEN based on the RAE bit, - * Please use this helper when processing the log pages which are - * associated with the AEN. - */ -static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit) -{ - int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15; - - if (!rae) - clear_bit(aen_bit, &req->sq->ctrl->aen_masked); -} - u32 nvmet_get_log_page_len(struct nvme_command *cmd) { u32 len = le16_to_cpu(cmd->get_log_page.numdu); @@ -189,7 +176,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) if (!status) status = nvmet_zero_sgl(req, len, req->data_len - len); ctrl->nr_changed_ns = 0; - nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR); + nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR); mutex_unlock(&ctrl->lock); out: nvmet_req_complete(req, status); @@ -252,7 +239,7 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt); hdr.ngrps = cpu_to_le16(ngrps); - nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE); + nvmet_clear_aen_bit(req, NVME_AEN_BIT_ANA_CHANGE); up_read(&nvmet_ana_sem); kfree(desc); @@ -304,7 +291,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* XXX: figure out what to do about RTD3R/RTD3 */ id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL); - id->ctratt = cpu_to_le32(1 << 0); + id->ctratt = cpu_to_le32(NVME_CTRL_ATTR_HID_128_BIT | + NVME_CTRL_ATTR_TBKAS); id->oacs = 0; @@ -599,11 +587,34 @@ static u16 nvmet_set_feat_write_protect(struct nvmet_req *req) return status; } +u16 nvmet_set_feat_kato(struct nvmet_req *req) +{ + u32 val32 = le32_to_cpu(req->cmd->common.cdw10[1]); + + req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); + + nvmet_set_result(req, req->sq->ctrl->kato); + + return 0; +} + +u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask) +{ + u32 val32 = le32_to_cpu(req->cmd->common.cdw10[1]); + + if (val32 & ~mask) + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + + WRITE_ONCE(req->sq->ctrl->aen_enabled, val32); + nvmet_set_result(req, val32); + + return 0; +} + static void nvmet_execute_set_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); - u32 val32; u16 status = 0; switch (cdw10 & 0xff) { @@ -612,19 +623,10 @@ static void nvmet_execute_set_features(struct nvmet_req *req) (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); break; case NVME_FEAT_KATO: - val32 = le32_to_cpu(req->cmd->common.cdw10[1]); - req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); - nvmet_set_result(req, req->sq->ctrl->kato); + status = nvmet_set_feat_kato(req); break; case NVME_FEAT_ASYNC_EVENT: - val32 = le32_to_cpu(req->cmd->common.cdw10[1]); - if (val32 & ~NVMET_AEN_CFG_ALL) { - status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - break; - } - - WRITE_ONCE(req->sq->ctrl->aen_enabled, val32); - nvmet_set_result(req, val32); + status = nvmet_set_feat_async_event(req, NVMET_AEN_CFG_ALL); break; case NVME_FEAT_HOST_ID: status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; @@ -660,6 +662,16 @@ static u16 nvmet_get_feat_write_protect(struct nvmet_req *req) return 0; } +void nvmet_get_feat_kato(struct nvmet_req *req) +{ + nvmet_set_result(req, req->sq->ctrl->kato * 1000); +} + +void nvmet_get_feat_async_event(struct nvmet_req *req) +{ + nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled)); +} + static void nvmet_execute_get_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; @@ -689,7 +701,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) break; #endif case NVME_FEAT_ASYNC_EVENT: - nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled)); + nvmet_get_feat_async_event(req); break; case NVME_FEAT_VOLATILE_WC: nvmet_set_result(req, 1); @@ -699,7 +711,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) (subsys->max_qid-1) | ((subsys->max_qid-1) << 16)); break; case NVME_FEAT_KATO: - nvmet_set_result(req, req->sq->ctrl->kato * 1000); + nvmet_get_feat_kato(req); break; case NVME_FEAT_HOST_ID: /* need 128-bit host identifier flag */ @@ -722,7 +734,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) nvmet_req_complete(req, status); } -static void nvmet_execute_async_event(struct nvmet_req *req) +void nvmet_execute_async_event(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -738,7 +750,7 @@ static void nvmet_execute_async_event(struct nvmet_req *req) schedule_work(&ctrl->async_event_work); } -static void nvmet_execute_keep_alive(struct nvmet_req *req) +void nvmet_execute_keep_alive(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d895579b6c5d..db2cb64be7ba 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -25,6 +25,9 @@ static const struct config_item_type nvmet_host_type; static const struct config_item_type nvmet_subsys_type; +static LIST_HEAD(nvmet_ports_list); +struct list_head *nvmet_ports = &nvmet_ports_list; + static const struct nvmet_transport_name { u8 type; const char *name; @@ -150,7 +153,8 @@ CONFIGFS_ATTR(nvmet_, addr_traddr); static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.treq) { + switch (to_nvmet_port(item)->disc_addr.treq & + NVME_TREQ_SECURE_CHANNEL_MASK) { case NVMF_TREQ_NOT_SPECIFIED: return sprintf(page, "not specified\n"); case NVMF_TREQ_REQUIRED: @@ -166,6 +170,7 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); + u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -174,15 +179,16 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, } if (sysfs_streq(page, "not specified")) { - port->disc_addr.treq = NVMF_TREQ_NOT_SPECIFIED; + treq |= NVMF_TREQ_NOT_SPECIFIED; } else if (sysfs_streq(page, "required")) { - port->disc_addr.treq = NVMF_TREQ_REQUIRED; + treq |= NVMF_TREQ_REQUIRED; } else if (sysfs_streq(page, "not required")) { - port->disc_addr.treq = NVMF_TREQ_NOT_REQUIRED; + treq |= NVMF_TREQ_NOT_REQUIRED; } else { pr_err("Invalid value '%s' for treq\n", page); return -EINVAL; } + port->disc_addr.treq = treq; return count; } @@ -646,7 +652,8 @@ static int nvmet_port_subsys_allow_link(struct config_item *parent, } list_add_tail(&link->entry, &port->subsystems); - nvmet_genctr++; + nvmet_port_disc_changed(port, subsys); + up_write(&nvmet_config_sem); return 0; @@ -673,7 +680,8 @@ static void nvmet_port_subsys_drop_link(struct config_item *parent, found: list_del(&p->entry); - nvmet_genctr++; + nvmet_port_disc_changed(port, subsys); + if (list_empty(&port->subsystems)) nvmet_disable_port(port); up_write(&nvmet_config_sem); @@ -722,7 +730,8 @@ static int nvmet_allowed_hosts_allow_link(struct config_item *parent, goto out_free_link; } list_add_tail(&link->entry, &subsys->hosts); - nvmet_genctr++; + nvmet_subsys_disc_changed(subsys, host); + up_write(&nvmet_config_sem); return 0; out_free_link: @@ -748,7 +757,8 @@ static void nvmet_allowed_hosts_drop_link(struct config_item *parent, found: list_del(&p->entry); - nvmet_genctr++; + nvmet_subsys_disc_changed(subsys, host); + up_write(&nvmet_config_sem); kfree(p); } @@ -787,7 +797,11 @@ static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item, goto out_unlock; } - subsys->allow_any_host = allow_any_host; + if (subsys->allow_any_host != allow_any_host) { + subsys->allow_any_host = allow_any_host; + nvmet_subsys_disc_changed(subsys, NULL); + } + out_unlock: up_write(&nvmet_config_sem); return ret ? ret : count; @@ -936,7 +950,7 @@ static ssize_t nvmet_referral_enable_store(struct config_item *item, if (enable) nvmet_referral_enable(parent, port); else - nvmet_referral_disable(port); + nvmet_referral_disable(parent, port); return count; inval: @@ -962,9 +976,10 @@ static struct configfs_attribute *nvmet_referral_attrs[] = { static void nvmet_referral_release(struct config_item *item) { + struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent); struct nvmet_port *port = to_nvmet_port(item); - nvmet_referral_disable(port); + nvmet_referral_disable(parent, port); kfree(port); } @@ -1137,6 +1152,8 @@ static void nvmet_port_release(struct config_item *item) { struct nvmet_port *port = to_nvmet_port(item); + list_del(&port->global_entry); + kfree(port->ana_state); kfree(port); } @@ -1189,12 +1206,15 @@ static struct config_group *nvmet_ports_make(struct config_group *group, port->ana_state[i] = NVME_ANA_INACCESSIBLE; } + list_add(&port->global_entry, &nvmet_ports_list); + INIT_LIST_HEAD(&port->entry); INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->referrals); port->inline_data_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); + port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW; config_group_init_type_name(&port->group, name, &nvmet_port_type); config_group_init_type_name(&port->subsys_group, diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a5f9bbce863f..e468100b9211 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -130,7 +130,7 @@ static void nvmet_async_event_work(struct work_struct *work) } } -static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, +void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page) { struct nvmet_async_event *aen; @@ -150,13 +150,6 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, schedule_work(&ctrl->async_event_work); } -static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen) -{ - if (!(READ_ONCE(ctrl->aen_enabled) & aen)) - return true; - return test_and_set_bit(aen, &ctrl->aen_masked); -} - static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) { u32 i; @@ -187,7 +180,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); - if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR)) + if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) continue; nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, NVME_AER_NOTICE_NS_CHANGED, @@ -204,7 +197,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys, list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { if (port && ctrl->port != port) continue; - if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE)) + if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) continue; nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, NVME_AER_NOTICE_ANA, NVME_LOG_ANA); @@ -299,6 +292,15 @@ static void nvmet_keep_alive_timer(struct work_struct *work) { struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work), struct nvmet_ctrl, ka_work); + bool cmd_seen = ctrl->cmd_seen; + + ctrl->cmd_seen = false; + if (cmd_seen) { + pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", + ctrl->cntlid); + schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + return; + } pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", ctrl->cntlid, ctrl->kato); @@ -595,26 +597,28 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) return ns; } -static void __nvmet_req_complete(struct nvmet_req *req, u16 status) +static void nvmet_update_sq_head(struct nvmet_req *req) { - u32 old_sqhd, new_sqhd; - u16 sqhd; - - if (status) - nvmet_set_status(req, status); - if (req->sq->size) { + u32 old_sqhd, new_sqhd; + do { old_sqhd = req->sq->sqhd; new_sqhd = (old_sqhd + 1) % req->sq->size; } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != old_sqhd); } - sqhd = req->sq->sqhd & 0x0000FFFF; - req->rsp->sq_head = cpu_to_le16(sqhd); + req->rsp->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); +} + +static void __nvmet_req_complete(struct nvmet_req *req, u16 status) +{ + if (!req->sq->sqhd_disabled) + nvmet_update_sq_head(req); req->rsp->sq_id = cpu_to_le16(req->sq->qid); req->rsp->command_id = req->cmd->common.command_id; - + if (unlikely(status)) + nvmet_set_status(req, status); if (req->ns) nvmet_put_namespace(req->ns); req->ops->queue_response(req); @@ -763,6 +767,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sg_cnt = 0; req->transfer_len = 0; req->rsp->status = 0; + req->rsp->sq_head = 0; req->ns = NULL; /* no support for fused commands yet */ @@ -801,6 +806,9 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, goto fail; } + if (sq->ctrl) + sq->ctrl->cmd_seen = true; + return true; fail: @@ -1027,14 +1035,18 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) return 0; } -static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, - const char *hostnqn) +bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) { struct nvmet_host_link *p; + lockdep_assert_held(&nvmet_config_sem); + if (subsys->allow_any_host) return true; + if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */ + return true; + list_for_each_entry(p, &subsys->hosts, entry) { if (!strcmp(nvmet_host_name(p->host), hostnqn)) return true; @@ -1043,30 +1055,6 @@ static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, return false; } -static bool nvmet_host_discovery_allowed(struct nvmet_req *req, - const char *hostnqn) -{ - struct nvmet_subsys_link *s; - - list_for_each_entry(s, &req->port->subsystems, entry) { - if (__nvmet_host_allowed(s->subsys, hostnqn)) - return true; - } - - return false; -} - -bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, - const char *hostnqn) -{ - lockdep_assert_held(&nvmet_config_sem); - - if (subsys->type == NVME_NQN_DISC) - return nvmet_host_discovery_allowed(req, hostnqn); - else - return __nvmet_host_allowed(subsys, hostnqn); -} - /* * Note: ctrl->subsys->lock should be held when calling this function */ @@ -1117,7 +1105,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; down_read(&nvmet_config_sem); - if (!nvmet_host_allowed(req, subsys, hostnqn)) { + if (!nvmet_host_allowed(subsys, hostnqn)) { pr_info("connect by host %s for subsystem %s not allowed\n", hostnqn, subsysnqn); req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); @@ -1175,31 +1163,17 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, ctrl->cntlid = ret; ctrl->ops = req->ops; - if (ctrl->subsys->type == NVME_NQN_DISC) { - /* Don't accept keep-alive timeout for discovery controllers */ - if (kato) { - status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_remove_ida; - } - /* - * Discovery controllers use some arbitrary high value in order - * to cleanup stale discovery sessions - * - * From the latest base diff RC: - * "The Keep Alive command is not supported by - * Discovery controllers. A transport may specify a - * fixed Discovery controller activity timeout value - * (e.g., 2 minutes). If no commands are received - * by a Discovery controller within that time - * period, the controller may perform the - * actions for Keep Alive Timer expiration". - */ - ctrl->kato = NVMET_DISC_KATO; - } else { - /* keep-alive timeout in seconds */ - ctrl->kato = DIV_ROUND_UP(kato, 1000); - } + /* + * Discovery controllers may use some arbitrary high value + * in order to cleanup stale discovery sessions + */ + if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato) + kato = NVMET_DISC_KATO_MS; + + /* keep-alive timeout in seconds */ + ctrl->kato = DIV_ROUND_UP(kato, 1000); + nvmet_start_keep_alive_timer(ctrl); mutex_lock(&subsys->lock); @@ -1210,8 +1184,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, *ctrlp = ctrl; return 0; -out_remove_ida: - ida_simple_remove(&cntlid_ida, ctrl->cntlid); out_free_sqs: kfree(ctrl->sqs); out_free_cqs: diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index bc0aa0bf1543..4d8757ae8210 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -18,7 +18,65 @@ struct nvmet_subsys *nvmet_disc_subsys; -u64 nvmet_genctr; +static u64 nvmet_genctr; + +static void __nvmet_disc_changed(struct nvmet_port *port, + struct nvmet_ctrl *ctrl) +{ + if (ctrl->port != port) + return; + + if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE)) + return; + + nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC); +} + +void nvmet_port_disc_changed(struct nvmet_port *port, + struct nvmet_subsys *subsys) +{ + struct nvmet_ctrl *ctrl; + + nvmet_genctr++; + + list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { + if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn)) + continue; + + __nvmet_disc_changed(port, ctrl); + } +} + +static void __nvmet_subsys_disc_changed(struct nvmet_port *port, + struct nvmet_subsys *subsys, + struct nvmet_host *host) +{ + struct nvmet_ctrl *ctrl; + + list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { + if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn)) + continue; + + __nvmet_disc_changed(port, ctrl); + } +} + +void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, + struct nvmet_host *host) +{ + struct nvmet_port *port; + struct nvmet_subsys_link *s; + + nvmet_genctr++; + + list_for_each_entry(port, nvmet_ports, global_entry) + list_for_each_entry(s, &port->subsystems, entry) { + if (s->subsys != subsys) + continue; + __nvmet_subsys_disc_changed(port, subsys, host); + } +} void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port) { @@ -26,18 +84,18 @@ void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port) if (list_empty(&port->entry)) { list_add_tail(&port->entry, &parent->referrals); port->enabled = true; - nvmet_genctr++; + nvmet_port_disc_changed(parent, NULL); } up_write(&nvmet_config_sem); } -void nvmet_referral_disable(struct nvmet_port *port) +void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port) { down_write(&nvmet_config_sem); if (!list_empty(&port->entry)) { port->enabled = false; list_del_init(&port->entry); - nvmet_genctr++; + nvmet_port_disc_changed(parent, NULL); } up_write(&nvmet_config_sem); } @@ -107,7 +165,7 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) down_read(&nvmet_config_sem); list_for_each_entry(p, &req->port->subsystems, entry) { - if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn)) + if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) continue; if (residual_len >= entry_size) { char traddr[NVMF_TRADDR_SIZE]; @@ -136,6 +194,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) hdr->numrec = cpu_to_le64(numrec); hdr->recfmt = cpu_to_le16(0); + nvmet_clear_aen_bit(req, NVME_AEN_BIT_DISC_CHANGE); + up_read(&nvmet_config_sem); status = nvmet_copy_to_sgl(req, 0, hdr, data_len); @@ -174,6 +234,8 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); + id->oaes = cpu_to_le32(NVMET_DISC_AEN_CFG_OPTIONAL); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); @@ -183,6 +245,47 @@ out: nvmet_req_complete(req, status); } +static void nvmet_execute_disc_set_features(struct nvmet_req *req) +{ + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); + u16 stat; + + switch (cdw10 & 0xff) { + case NVME_FEAT_KATO: + stat = nvmet_set_feat_kato(req); + break; + case NVME_FEAT_ASYNC_EVENT: + stat = nvmet_set_feat_async_event(req, + NVMET_DISC_AEN_CFG_OPTIONAL); + break; + default: + stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } + + nvmet_req_complete(req, stat); +} + +static void nvmet_execute_disc_get_features(struct nvmet_req *req) +{ + u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); + u16 stat = 0; + + switch (cdw10 & 0xff) { + case NVME_FEAT_KATO: + nvmet_get_feat_kato(req); + break; + case NVME_FEAT_ASYNC_EVENT: + nvmet_get_feat_async_event(req); + break; + default: + stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } + + nvmet_req_complete(req, stat); +} + u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -194,6 +297,22 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) } switch (cmd->common.opcode) { + case nvme_admin_set_features: + req->execute = nvmet_execute_disc_set_features; + req->data_len = 0; + return 0; + case nvme_admin_get_features: + req->execute = nvmet_execute_disc_get_features; + req->data_len = 0; + return 0; + case nvme_admin_async_event: + req->execute = nvmet_execute_async_event; + req->data_len = 0; + return 0; + case nvme_admin_keep_alive: + req->execute = nvmet_execute_keep_alive; + req->data_len = 0; + return 0; case nvme_admin_get_log_page: req->data_len = nvmet_get_log_page_len(cmd); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d84ae004cb85..328ae46d8344 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -115,6 +115,12 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) /* note: convert queue size from 0's-based value to 1's-based value */ nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1); nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1); + + if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) { + req->sq->sqhd_disabled = true; + req->rsp->sq_head = cpu_to_le16(0xffff); + } + return 0; } diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 409081a03b24..f98f5c5bea26 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -86,8 +86,6 @@ struct nvmet_fc_fcp_iod { spinlock_t flock; struct nvmet_req req; - struct work_struct work; - struct work_struct done_work; struct work_struct defer_work; struct nvmet_fc_tgtport *tgtport; @@ -134,7 +132,6 @@ struct nvmet_fc_tgt_queue { u16 sqsize; u16 ersp_ratio; __le16 sqhd; - int cpu; atomic_t connected; atomic_t sqtail; atomic_t zrspcnt; @@ -232,8 +229,6 @@ static LIST_HEAD(nvmet_fc_portentry_list); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); -static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); -static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); @@ -438,8 +433,6 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, int i; for (i = 0; i < queue->sqsize; fod++, i++) { - INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); - INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work); fod->tgtport = tgtport; fod->queue = queue; @@ -517,10 +510,7 @@ nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport, fcpreq->hwqid = queue->qid ? ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; - if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) - queue_work_on(queue->cpu, queue->work_q, &fod->work); - else - nvmet_fc_handle_fcp_rqst(tgtport, fod); + nvmet_fc_handle_fcp_rqst(tgtport, fod); } static void @@ -599,30 +589,6 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, queue_work(queue->work_q, &fod->defer_work); } -static int -nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid) -{ - int cpu, idx, cnt; - - if (tgtport->ops->max_hw_queues == 1) - return WORK_CPU_UNBOUND; - - /* Simple cpu selection based on qid modulo active cpu count */ - idx = !qid ? 0 : (qid - 1) % num_active_cpus(); - - /* find the n'th active cpu */ - for (cpu = 0, cnt = 0; ; ) { - if (cpu_active(cpu)) { - if (cnt == idx) - break; - cnt++; - } - cpu = (cpu + 1) % num_possible_cpus(); - } - - return cpu; -} - static struct nvmet_fc_tgt_queue * nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, u16 qid, u16 sqsize) @@ -653,7 +619,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->qid = qid; queue->sqsize = sqsize; queue->assoc = assoc; - queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); INIT_LIST_HEAD(&queue->avail_defer_list); INIT_LIST_HEAD(&queue->pending_cmd_list); @@ -2146,25 +2111,11 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) } static void -nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work) -{ - struct nvmet_fc_fcp_iod *fod = - container_of(work, struct nvmet_fc_fcp_iod, done_work); - - nvmet_fc_fod_op_done(fod); -} - -static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) { struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; - struct nvmet_fc_tgt_queue *queue = fod->queue; - if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR) - /* context switch so completion is not in ISR context */ - queue_work_on(queue->cpu, queue->work_q, &fod->done_work); - else - nvmet_fc_fod_op_done(fod); + nvmet_fc_fod_op_done(fod); } /* @@ -2332,19 +2283,6 @@ transport_error: nvmet_fc_abort_op(tgtport, fod); } -/* - * Actual processing routine for received FC-NVME LS Requests from the LLD - */ -static void -nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) -{ - struct nvmet_fc_fcp_iod *fod = - container_of(work, struct nvmet_fc_fcp_iod, work); - struct nvmet_fc_tgtport *tgtport = fod->tgtport; - - nvmet_fc_handle_fcp_rqst(tgtport, fod); -} - /** * nvmet_fc_rcv_fcp_req - transport entry point called by an LLDD * upon the reception of a NVME FCP CMD IU. diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index c1ec3475a140..c1cb2ed5531c 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -115,8 +115,6 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) } cookie = submit_bio(bio); - - blk_poll(bdev_get_queue(req->ns->bdev), cookie); } static void nvmet_bdev_execute_flush(struct nvmet_req *req) diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 01feebec29ea..12eaa8ddc248 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -83,17 +83,16 @@ static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) } static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, - unsigned long nr_segs, size_t count) + unsigned long nr_segs, size_t count, int ki_flags) { struct kiocb *iocb = &req->f.iocb; ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter); struct iov_iter iter; - int ki_flags = 0, rw; - ssize_t ret; + int rw; if (req->cmd->rw.opcode == nvme_cmd_write) { if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) - ki_flags = IOCB_DSYNC; + ki_flags |= IOCB_DSYNC; call_iter = req->ns->file->f_op->write_iter; rw = WRITE; } else { @@ -107,12 +106,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, iocb->ki_filp = req->ns->file; iocb->ki_flags = ki_flags | iocb_flags(req->ns->file); - ret = call_iter(iocb, &iter); - - if (ret != -EIOCBQUEUED && iocb->ki_complete) - iocb->ki_complete(iocb, ret, 0); - - return ret; + return call_iter(iocb, &iter); } static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) @@ -130,7 +124,7 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) NVME_SC_INTERNAL | NVME_SC_DNR : 0); } -static void nvmet_file_execute_rw(struct nvmet_req *req) +static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) { ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); struct sg_page_iter sg_pg_iter; @@ -140,30 +134,14 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) ssize_t ret = 0; loff_t pos; - if (!req->sg_cnt || !nr_bvec) { - nvmet_req_complete(req, 0); - return; - } + + if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC) + is_sync = true; pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; if (unlikely(pos + req->data_len > req->ns->size)) { nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR); - return; - } - - if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) - req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), - GFP_KERNEL); - else - req->f.bvec = req->inline_bvec; - - req->f.mpool_alloc = false; - if (unlikely(!req->f.bvec)) { - /* fallback under memory pressure */ - req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); - req->f.mpool_alloc = true; - if (nr_bvec > NVMET_MAX_MPOOL_BVEC) - is_sync = true; + return true; } memset(&req->f.iocb, 0, sizeof(struct kiocb)); @@ -177,9 +155,10 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) if (unlikely(is_sync) && (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) { - ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len); + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len, 0); if (ret < 0) - goto out; + goto complete; + pos += len; bv_cnt = 0; len = 0; @@ -187,30 +166,92 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) nr_bvec--; } - if (WARN_ON_ONCE(total_len != req->data_len)) + if (WARN_ON_ONCE(total_len != req->data_len)) { ret = -EIO; -out: - if (unlikely(is_sync || ret)) { - nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0); - return; + goto complete; + } + + if (unlikely(is_sync)) { + ret = total_len; + goto complete; } - req->f.iocb.ki_complete = nvmet_file_io_done; - nvmet_file_submit_bvec(req, pos, bv_cnt, total_len); + + /* + * A NULL ki_complete ask for synchronous execution, which we want + * for the IOCB_NOWAIT case. + */ + if (!(ki_flags & IOCB_NOWAIT)) + req->f.iocb.ki_complete = nvmet_file_io_done; + + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, total_len, ki_flags); + + switch (ret) { + case -EIOCBQUEUED: + return true; + case -EAGAIN: + if (WARN_ON_ONCE(!(ki_flags & IOCB_NOWAIT))) + goto complete; + return false; + case -EOPNOTSUPP: + /* + * For file systems returning error -EOPNOTSUPP, handle + * IOCB_NOWAIT error case separately and retry without + * IOCB_NOWAIT. + */ + if ((ki_flags & IOCB_NOWAIT)) + return false; + break; + } + +complete: + nvmet_file_io_done(&req->f.iocb, ret, 0); + return true; } static void nvmet_file_buffered_io_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); - nvmet_file_execute_rw(req); + nvmet_file_execute_io(req, 0); } -static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req) +static void nvmet_file_submit_buffered_io(struct nvmet_req *req) { INIT_WORK(&req->f.work, nvmet_file_buffered_io_work); queue_work(buffered_io_wq, &req->f.work); } +static void nvmet_file_execute_rw(struct nvmet_req *req) +{ + ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); + + if (!req->sg_cnt || !nr_bvec) { + nvmet_req_complete(req, 0); + return; + } + + if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) + req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_KERNEL); + else + req->f.bvec = req->inline_bvec; + + if (unlikely(!req->f.bvec)) { + /* fallback under memory pressure */ + req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); + req->f.mpool_alloc = true; + } else + req->f.mpool_alloc = false; + + if (req->ns->buffered_io) { + if (likely(!req->f.mpool_alloc) && + nvmet_file_execute_io(req, IOCB_NOWAIT)) + return; + nvmet_file_submit_buffered_io(req); + } else + nvmet_file_execute_io(req, 0); +} + u16 nvmet_file_flush(struct nvmet_req *req) { if (vfs_fsync(req->ns->file, 1) < 0) @@ -320,10 +361,7 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) switch (cmd->common.opcode) { case nvme_cmd_read: case nvme_cmd_write: - if (req->ns->buffered_io) - req->execute = nvmet_file_execute_rw_buffered_io; - else - req->execute = nvmet_file_execute_rw; + req->execute = nvmet_file_execute_rw; req->data_len = nvmet_rw_len(req); return 0; case nvme_cmd_flush: diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index c2b4d9ee6391..7d8b7a7d572a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -36,6 +36,8 @@ */ #define NVMET_AEN_CFG_OPTIONAL \ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE) +#define NVMET_DISC_AEN_CFG_OPTIONAL \ + (NVME_AEN_CFG_DISC_CHANGE) /* * Plus mandatory SMART AENs (we'll never send them, but allow enabling them): @@ -104,6 +106,7 @@ struct nvmet_sq { u16 qid; u16 size; u32 sqhd; + bool sqhd_disabled; struct completion free_done; struct completion confirm_done; }; @@ -137,6 +140,7 @@ struct nvmet_port { struct list_head subsystems; struct config_group referrals_group; struct list_head referrals; + struct list_head global_entry; struct config_group ana_groups_group; struct nvmet_ana_group ana_default_group; enum nvme_ana_state *ana_state; @@ -163,6 +167,8 @@ struct nvmet_ctrl { struct nvmet_cq **cqs; struct nvmet_sq **sqs; + bool cmd_seen; + struct mutex lock; u64 cap; u32 cc; @@ -194,8 +200,8 @@ struct nvmet_ctrl { char subsysnqn[NVMF_NQN_FIELD_LEN]; char hostnqn[NVMF_NQN_FIELD_LEN]; - struct device *p2p_client; - struct radix_tree_root p2p_ns_map; + struct device *p2p_client; + struct radix_tree_root p2p_ns_map; }; struct nvmet_subsys { @@ -308,8 +314,8 @@ struct nvmet_req { void (*execute)(struct nvmet_req *req); const struct nvmet_fabrics_ops *ops; - struct pci_dev *p2p_dev; - struct device *p2p_client; + struct pci_dev *p2p_dev; + struct device *p2p_client; }; extern struct workqueue_struct *buffered_io_wq; @@ -340,6 +346,27 @@ struct nvmet_async_event { u8 log_page; }; +static inline void nvmet_clear_aen_bit(struct nvmet_req *req, u32 bn) +{ + int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15; + + if (!rae) + clear_bit(bn, &req->sq->ctrl->aen_masked); +} + +static inline bool nvmet_aen_bit_disabled(struct nvmet_ctrl *ctrl, u32 bn) +{ + if (!(READ_ONCE(ctrl->aen_enabled) & (1 << bn))) + return true; + return test_and_set_bit(bn, &ctrl->aen_masked); +} + +void nvmet_get_feat_kato(struct nvmet_req *req); +void nvmet_get_feat_async_event(struct nvmet_req *req); +u16 nvmet_set_feat_kato(struct nvmet_req *req); +u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask); +void nvmet_execute_async_event(struct nvmet_req *req); + u16 nvmet_parse_connect_cmd(struct nvmet_req *req); u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); @@ -355,6 +382,8 @@ void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); void nvmet_req_free_sgl(struct nvmet_req *req); +void nvmet_execute_keep_alive(struct nvmet_req *req); + void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, u16 size); void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid, @@ -395,7 +424,7 @@ int nvmet_enable_port(struct nvmet_port *port); void nvmet_disable_port(struct nvmet_port *port); void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port); -void nvmet_referral_disable(struct nvmet_port *port); +void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port); u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf, size_t len); @@ -405,6 +434,14 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len); u32 nvmet_get_log_page_len(struct nvme_command *cmd); +extern struct list_head *nvmet_ports; +void nvmet_port_disc_changed(struct nvmet_port *port, + struct nvmet_subsys *subsys); +void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, + struct nvmet_host *host); +void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, + u8 event_info, u8 log_page); + #define NVMET_QUEUE_SIZE 1024 #define NVMET_NR_QUEUES 128 #define NVMET_MAX_CMD NVMET_QUEUE_SIZE @@ -425,7 +462,7 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd); #define NVMET_DEFAULT_ANA_GRPID 1 #define NVMET_KAS 10 -#define NVMET_DISC_KATO 120 +#define NVMET_DISC_KATO_MS 120000 int __init nvmet_init_configfs(void); void __exit nvmet_exit_configfs(void); @@ -434,15 +471,13 @@ int __init nvmet_init_discovery(void); void nvmet_exit_discovery(void); extern struct nvmet_subsys *nvmet_disc_subsys; -extern u64 nvmet_genctr; extern struct rw_semaphore nvmet_config_sem; extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; extern u64 nvmet_ana_chgcnt; extern struct rw_semaphore nvmet_ana_sem; -bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, - const char *hostnqn); +bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn); int nvmet_bdev_ns_enable(struct nvmet_ns *ns); int nvmet_file_ns_enable(struct nvmet_ns *ns); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 583086dd9cb9..fb84caddd94b 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -196,7 +196,7 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) { unsigned long flags; - if (rsp->allocated) { + if (unlikely(rsp->allocated)) { kfree(rsp); return; } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index af24ed50a245..265ed3e4c920 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1036,6 +1036,13 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (maxvec < minvec) return -ERANGE; + /* + * If the caller is passing in sets, we can't support a range of + * vectors. The caller needs to handle that. + */ + if (affd && affd->nr_sets && minvec != maxvec) + return -EINVAL; + if (WARN_ON_ONCE(dev->msi_enabled)) return -EINVAL; @@ -1087,6 +1094,13 @@ static int __pci_enable_msix_range(struct pci_dev *dev, if (maxvec < minvec) return -ERANGE; + /* + * If the caller is passing in sets, we can't support a range of + * supported vectors. The caller needs to handle that. + */ + if (affd && affd->nr_sets && minvec != maxvec) + return -EINVAL; + if (WARN_ON_ONCE(dev->msix_enabled)) return -EINVAL; diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 2016e0ed5865..8e26001dc11c 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -412,6 +412,7 @@ static int dasd_ioctl_information(struct dasd_block *block, struct ccw_dev_id dev_id; struct dasd_device *base; struct ccw_device *cdev; + struct list_head *l; unsigned long flags; int rc; @@ -462,23 +463,10 @@ static int dasd_ioctl_information(struct dasd_block *block, memcpy(dasd_info->type, base->discipline->name, 4); - if (block->request_queue->request_fn) { - struct list_head *l; -#ifdef DASD_EXTENDED_PROFILING - { - struct list_head *l; - spin_lock_irqsave(&block->lock, flags); - list_for_each(l, &block->request_queue->queue_head) - dasd_info->req_queue_len++; - spin_unlock_irqrestore(&block->lock, flags); - } -#endif /* DASD_EXTENDED_PROFILING */ - spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - list_for_each(l, &base->ccw_queue) - dasd_info->chanq_len++; - spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), - flags); - } + spin_lock_irqsave(&block->queue_lock, flags); + list_for_each(l, &base->ccw_queue) + dasd_info->chanq_len++; + spin_unlock_irqrestore(&block->queue_lock, flags); rc = 0; if (copy_to_user(argp, dasd_info, diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 640cd1b31a18..f38882f6f37d 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -50,18 +50,6 @@ config SCSI_NETLINK default n depends on NET -config SCSI_MQ_DEFAULT - bool "SCSI: use blk-mq I/O path by default" - default y - depends on SCSI - ---help--- - This option enables the blk-mq based I/O path for SCSI devices by - default. With this option the scsi_mod.use_blk_mq module/boot - option defaults to Y, without it to N, but it can still be - overridden either way. - - If unsure say Y. - config SCSI_PROC_FS bool "legacy /proc/scsi/ support" depends on SCSI && PROC_FS diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index e9e669a6c2bc..6bad2689edd4 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1906,7 +1906,6 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session, struct iscsi_task *task; struct scsi_cmnd *sc; int rc = 0; - int cpu; spin_lock(&session->back_lock); task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data, @@ -1917,14 +1916,9 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session, } sc = task->sc; - if (!blk_rq_cpu_valid(sc->request)) - cpu = smp_processor_id(); - else - cpu = sc->request->cpu; - spin_unlock(&session->back_lock); - p = &per_cpu(bnx2i_percpu, cpu); + p = &per_cpu(bnx2i_percpu, blk_mq_rq_cpu(sc->request)); spin_lock(&p->p_work_lock); if (unlikely(!p->iothread)) { rc = -EINVAL; diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index 8c15b7acb4b7..a95debbea0e4 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1780,16 +1780,10 @@ csio_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmnd) int nsge = 0; int rv = SCSI_MLQUEUE_HOST_BUSY, nr; int retval; - int cpu; struct csio_scsi_qset *sqset; struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); - if (!blk_rq_cpu_valid(cmnd->request)) - cpu = smp_processor_id(); - else - cpu = cmnd->request->cpu; - - sqset = &hw->sqset[ln->portid][cpu]; + sqset = &hw->sqset[ln->portid][blk_mq_rq_cpu(cmnd->request)]; nr = fc_remote_port_chkready(rport); if (nr) { diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 6637116529aa..abdc9eac4173 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -3088,12 +3088,6 @@ static ssize_t hwq_mode_store(struct device *dev, return -EINVAL; } - if ((mode == HWQ_MODE_TAG) && !shost_use_blk_mq(shost)) { - dev_info(cfgdev, "SCSI-MQ is not enabled, use a different " - "HWQ steering mode.\n"); - return -EINVAL; - } - afu->hwq_mode = mode; return count; diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 12dc7100bb4c..d7ac498ba35a 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1071,28 +1071,29 @@ static void alua_check(struct scsi_device *sdev, bool force) * Fail I/O to all paths not in state * active/optimized or active/non-optimized. */ -static int alua_prep_fn(struct scsi_device *sdev, struct request *req) +static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req) { struct alua_dh_data *h = sdev->handler_data; struct alua_port_group *pg; unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; - int ret = BLKPREP_OK; rcu_read_lock(); pg = rcu_dereference(h->pg); if (pg) state = pg->state; rcu_read_unlock(); - if (state == SCSI_ACCESS_STATE_TRANSITIONING) - ret = BLKPREP_DEFER; - else if (state != SCSI_ACCESS_STATE_OPTIMAL && - state != SCSI_ACCESS_STATE_ACTIVE && - state != SCSI_ACCESS_STATE_LBA) { - ret = BLKPREP_KILL; + + switch (state) { + case SCSI_ACCESS_STATE_OPTIMAL: + case SCSI_ACCESS_STATE_ACTIVE: + case SCSI_ACCESS_STATE_LBA: + return BLK_STS_OK; + case SCSI_ACCESS_STATE_TRANSITIONING: + return BLK_STS_RESOURCE; + default: req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; } - return ret; - } static void alua_rescan(struct scsi_device *sdev) diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 95c47909a58f..bea8e13febb6 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -341,17 +341,17 @@ static int clariion_check_sense(struct scsi_device *sdev, return SCSI_RETURN_NOT_HANDLED; } -static int clariion_prep_fn(struct scsi_device *sdev, struct request *req) +static blk_status_t clariion_prep_fn(struct scsi_device *sdev, + struct request *req) { struct clariion_dh_data *h = sdev->handler_data; - int ret = BLKPREP_OK; if (h->lun_state != CLARIION_LUN_OWNED) { - ret = BLKPREP_KILL; req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; } - return ret; + return BLK_STS_OK; } static int clariion_std_inquiry(struct scsi_device *sdev, diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index e65a0ebb4b54..80129b033855 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -172,17 +172,16 @@ retry: return rc; } -static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) +static blk_status_t hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) { struct hp_sw_dh_data *h = sdev->handler_data; - int ret = BLKPREP_OK; if (h->path_state != HP_SW_PATH_ACTIVE) { - ret = BLKPREP_KILL; req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; } - return ret; + return BLK_STS_OK; } /* diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index d27fabae8ddd..65f1fe343c64 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -642,17 +642,16 @@ done: return 0; } -static int rdac_prep_fn(struct scsi_device *sdev, struct request *req) +static blk_status_t rdac_prep_fn(struct scsi_device *sdev, struct request *req) { struct rdac_dh_data *h = sdev->handler_data; - int ret = BLKPREP_OK; if (h->state != RDAC_STATE_ACTIVE) { - ret = BLKPREP_KILL; req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; } - return ret; + return BLK_STS_OK; } static int rdac_check_sense(struct scsi_device *sdev, diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 96acfcecd540..cafbcfb85bfa 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -2274,7 +2274,7 @@ fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc) return SCSI_NO_TAG; sc->tag = sc->request->tag = dummy->tag; - sc->request->special = sc; + sc->host_scribble = (unsigned char *)dummy; return dummy->tag; } @@ -2286,7 +2286,7 @@ fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc) static inline void fnic_scsi_host_end_tag(struct fnic *fnic, struct scsi_cmnd *sc) { - struct request *dummy = sc->request->special; + struct request *dummy = (struct request *)sc->host_scribble; blk_mq_free_request(dummy); } diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index ea4b0bb0c1cd..cc71136ba300 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -222,18 +222,9 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, if (error) goto fail; - if (shost_use_blk_mq(shost)) { - error = scsi_mq_setup_tags(shost); - if (error) - goto fail; - } else { - shost->bqt = blk_init_tags(shost->can_queue, - shost->hostt->tag_alloc_policy); - if (!shost->bqt) { - error = -ENOMEM; - goto fail; - } - } + error = scsi_mq_setup_tags(shost); + if (error) + goto fail; if (!shost->shost_gendev.parent) shost->shost_gendev.parent = dev ? dev : &platform_bus; @@ -309,8 +300,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, pm_runtime_disable(&shost->shost_gendev); pm_runtime_set_suspended(&shost->shost_gendev); pm_runtime_put_noidle(&shost->shost_gendev); - if (shost_use_blk_mq(shost)) - scsi_mq_destroy_tags(shost); + scsi_mq_destroy_tags(shost); fail: return error; } @@ -344,13 +334,8 @@ static void scsi_host_dev_release(struct device *dev) kfree(dev_name(&shost->shost_dev)); } - if (shost_use_blk_mq(shost)) { - if (shost->tag_set.tags) - scsi_mq_destroy_tags(shost); - } else { - if (shost->bqt) - blk_free_tags(shost->bqt); - } + if (shost->tag_set.tags) + scsi_mq_destroy_tags(shost); kfree(shost->shost_data); @@ -472,8 +457,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) else shost->dma_boundary = 0xffffffff; - shost->use_blk_mq = scsi_use_blk_mq || shost->hostt->force_blk_mq; - device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 4f6cdf53e913..c90b278cc28c 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -601,12 +601,7 @@ void sas_ata_task_abort(struct sas_task *task) /* Bounce SCSI-initiated commands to the SCSI EH */ if (qc->scsicmd) { - struct request_queue *q = qc->scsicmd->device->request_queue; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); blk_abort_request(qc->scsicmd->request); - spin_unlock_irqrestore(q->queue_lock, flags); return; } diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 33229348dcb6..af085432c5fe 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -930,16 +930,10 @@ void sas_task_abort(struct sas_task *task) return; } - if (dev_is_sata(task->dev)) { + if (dev_is_sata(task->dev)) sas_ata_task_abort(task); - } else { - struct request_queue *q = sc->device->request_queue; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); + else blk_abort_request(sc->request); - spin_unlock_irqrestore(q->queue_lock, flags); - } } void sas_target_destroy(struct scsi_target *starget) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 4fa6703a9ec9..baed2b891efb 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3914,7 +3914,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, uint32_t tag; uint16_t hwq; - if (cmnd && shost_use_blk_mq(cmnd->device->host)) { + if (cmnd) { tag = blk_mq_unique_tag(cmnd->request); hwq = blk_mq_unique_tag_to_hwq(tag); diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index e19fa883376f..60cf7c5eb880 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -506,11 +506,11 @@ static void osd_request_async_done(struct request *req, blk_status_t error) _set_error_resid(or, req, error); if (req->next_rq) { - __blk_put_request(req->q, req->next_rq); + blk_put_request(req->next_rq); req->next_rq = NULL; } - __blk_put_request(req->q, req); + blk_put_request(req); or->request = NULL; or->in.req = NULL; or->out.req = NULL; diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 7a1a1edde35d..664c1238a87f 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -341,7 +341,7 @@ static void osst_end_async(struct request *req, blk_status_t status) blk_rq_unmap_user(SRpnt->bio); } - __blk_put_request(req->q, req); + blk_put_request(req); } /* osst_request memory management */ diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 105b0e4d7818..311eb22068e1 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -644,8 +644,7 @@ static struct qedi_ctx *qedi_host_alloc(struct pci_dev *pdev) qedi->max_active_conns = ISCSI_MAX_SESS_PER_HBA; qedi->max_sqes = QEDI_SQ_SIZE; - if (shost_use_blk_mq(shost)) - shost->nr_hw_queues = MIN_NUM_CPUS_MSIX(qedi); + shost->nr_hw_queues = MIN_NUM_CPUS_MSIX(qedi); pci_set_drvdata(pdev, qedi); diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 7e78e7eff783..fccc733145fc 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -272,17 +272,6 @@ static void qla_nvme_fcp_abort(struct nvme_fc_local_port *lport, schedule_work(&priv->abort_work); } -static void qla_nvme_poll(struct nvme_fc_local_port *lport, void *hw_queue_handle) -{ - struct qla_qpair *qpair = hw_queue_handle; - unsigned long flags; - struct scsi_qla_host *vha = lport->private; - - spin_lock_irqsave(&qpair->qp_lock, flags); - qla24xx_process_response_queue(vha, qpair->rsp); - spin_unlock_irqrestore(&qpair->qp_lock, flags); -} - static inline int qla2x00_start_nvme_mq(srb_t *sp) { unsigned long flags; @@ -578,7 +567,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = { .ls_abort = qla_nvme_ls_abort, .fcp_io = qla_nvme_post_cmd, .fcp_abort = qla_nvme_fcp_abort, - .poll_queue = qla_nvme_poll, .max_hw_queues = 8, .max_sgl_segments = 128, .max_dif_sgl_segments = 64, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index b658b9a5eb1e..b67c43d2c62f 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -857,13 +857,9 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) } if (ha->mqenable) { - if (shost_use_blk_mq(vha->host)) { - tag = blk_mq_unique_tag(cmd->request); - hwq = blk_mq_unique_tag_to_hwq(tag); - qpair = ha->queue_pair_map[hwq]; - } else if (vha->vp_idx && vha->qpair) { - qpair = vha->qpair; - } + tag = blk_mq_unique_tag(cmd->request); + hwq = blk_mq_unique_tag_to_hwq(tag); + qpair = ha->queue_pair_map[hwq]; if (qpair) return qla2xxx_mqueuecommand(host, cmd, qpair); @@ -1464,7 +1460,7 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type, goto eh_reset_failed; } err = 2; - if (do_reset(fcport, cmd->device->lun, cmd->request->cpu + 1) + if (do_reset(fcport, cmd->device->lun, blk_mq_rq_cpu(cmd->request) + 1) != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x800c, "do_reset failed for cmd=%p.\n", cmd); @@ -3159,7 +3155,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) goto probe_failed; } - if (ha->mqenable && shost_use_blk_mq(host)) { + if (ha->mqenable) { /* number of hardware queues supported by blk/scsi-mq*/ host->nr_hw_queues = ha->max_qpairs; @@ -3271,25 +3267,17 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) base_vha->mgmt_svr_loop_id, host->sg_tablesize); if (ha->mqenable) { - bool mq = false; bool startit = false; - if (QLA_TGT_MODE_ENABLED()) { - mq = true; + if (QLA_TGT_MODE_ENABLED()) startit = false; - } - if ((ql2x_ini_mode == QLA2XXX_INI_MODE_ENABLED) && - shost_use_blk_mq(host)) { - mq = true; + if (ql2x_ini_mode == QLA2XXX_INI_MODE_ENABLED) startit = true; - } - if (mq) { - /* Create start of day qpairs for Block MQ */ - for (i = 0; i < ha->max_qpairs; i++) - qla2xxx_create_qpair(base_vha, 5, 0, startit); - } + /* Create start of day qpairs for Block MQ */ + for (i = 0; i < ha->max_qpairs; i++) + qla2xxx_create_qpair(base_vha, 5, 0, startit); } if (ha->flags.running_gold_fw) @@ -6952,11 +6940,12 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost) { int rc; scsi_qla_host_t *vha = (scsi_qla_host_t *)shost->hostdata; + struct blk_mq_queue_map *qmap = &shost->tag_set.map[0]; if (USER_CTRL_IRQ(vha->hw)) - rc = blk_mq_map_queues(&shost->tag_set); + rc = blk_mq_map_queues(qmap); else - rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0); + rc = blk_mq_pci_map_queues(qmap, vha->hw->pdev, 0); return rc; } diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index fc1356d101b0..7675ff0ca2ea 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -780,11 +780,8 @@ MODULE_LICENSE("GPL"); module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels"); -#ifdef CONFIG_SCSI_MQ_DEFAULT +/* This should go away in the future, it doesn't do anything anymore */ bool scsi_use_blk_mq = true; -#else -bool scsi_use_blk_mq = false; -#endif module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO); static int __init init_scsi(void) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 60bcc6df97a9..4740f1e9dd17 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5881,8 +5881,7 @@ static int sdebug_driver_probe(struct device *dev) } /* Decide whether to tell scsi subsystem that we want mq */ /* Following should give the same answer for each host */ - if (shost_use_blk_mq(hpnt)) - hpnt->nr_hw_queues = submit_queues; + hpnt->nr_hw_queues = submit_queues; sdbg_host->shost = hpnt; *((struct sdebug_host_info **)hpnt->hostdata) = sdbg_host; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index c736d61b1648..16eef068e9e9 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -297,19 +297,19 @@ enum blk_eh_timer_return scsi_times_out(struct request *req) if (rtn == BLK_EH_DONE) { /* - * For blk-mq, we must set the request state to complete now - * before sending the request to the scsi error handler. This - * will prevent a use-after-free in the event the LLD manages - * to complete the request before the error handler finishes - * processing this timed out request. + * Set the command to complete first in order to prevent a real + * completion from releasing the command while error handling + * is using it. If the command was already completed, then the + * lower level driver beat the timeout handler, and it is safe + * to return without escalating error recovery. * - * If the request was already completed, then the LLD beat the - * time out handler from transferring the request to the scsi - * error handler. In that case we can return immediately as no - * further action is required. + * If timeout handling lost the race to a real completion, the + * block layer may ignore that due to a fake timeout injection, + * so return RESET_TIMER to allow error handling another shot + * at this command. */ - if (req->q->mq_ops && !blk_mq_mark_complete(req)) - return rtn; + if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) + return BLK_EH_RESET_TIMER; if (scsi_abort_command(scmd) != SUCCESS) { set_host_byte(scmd, DID_TIME_OUT); scsi_eh_scmd_add(scmd); @@ -1932,7 +1932,7 @@ maybe_retry: static void eh_lock_door_done(struct request *req, blk_status_t status) { - __blk_put_request(req->q, req); + blk_put_request(req); } /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index fa6e0c3b3aa6..0dbf25512778 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -168,8 +168,6 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy) { struct scsi_device *device = cmd->device; - struct request_queue *q = device->request_queue; - unsigned long flags; SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd, "Inserting command %p into mlqueue\n", cmd)); @@ -190,26 +188,20 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy) * before blk_cleanup_queue() finishes. */ cmd->result = 0; - if (q->mq_ops) { - /* - * Before a SCSI command is dispatched, - * get_device(&sdev->sdev_gendev) is called and the host, - * target and device busy counters are increased. Since - * requeuing a request causes these actions to be repeated and - * since scsi_device_unbusy() has already been called, - * put_device(&device->sdev_gendev) must still be called. Call - * put_device() after blk_mq_requeue_request() to avoid that - * removal of the SCSI device can start before requeueing has - * happened. - */ - blk_mq_requeue_request(cmd->request, true); - put_device(&device->sdev_gendev); - return; - } - spin_lock_irqsave(q->queue_lock, flags); - blk_requeue_request(q, cmd->request); - kblockd_schedule_work(&device->requeue_work); - spin_unlock_irqrestore(q->queue_lock, flags); + + /* + * Before a SCSI command is dispatched, + * get_device(&sdev->sdev_gendev) is called and the host, + * target and device busy counters are increased. Since + * requeuing a request causes these actions to be repeated and + * since scsi_device_unbusy() has already been called, + * put_device(&device->sdev_gendev) must still be called. Call + * put_device() after blk_mq_requeue_request() to avoid that + * removal of the SCSI device can start before requeueing has + * happened. + */ + blk_mq_requeue_request(cmd->request, true); + put_device(&device->sdev_gendev); } /* @@ -370,10 +362,7 @@ void scsi_device_unbusy(struct scsi_device *sdev) static void scsi_kick_queue(struct request_queue *q) { - if (q->mq_ops) - blk_mq_run_hw_queues(q, false); - else - blk_run_queue(q); + blk_mq_run_hw_queues(q, false); } /* @@ -534,10 +523,7 @@ static void scsi_run_queue(struct request_queue *q) if (!list_empty(&sdev->host->starved_list)) scsi_starved_list_run(sdev->host); - if (q->mq_ops) - blk_mq_run_hw_queues(q, false); - else - blk_run_queue(q); + blk_mq_run_hw_queues(q, false); } void scsi_requeue_run_queue(struct work_struct *work) @@ -550,42 +536,6 @@ void scsi_requeue_run_queue(struct work_struct *work) scsi_run_queue(q); } -/* - * Function: scsi_requeue_command() - * - * Purpose: Handle post-processing of completed commands. - * - * Arguments: q - queue to operate on - * cmd - command that may need to be requeued. - * - * Returns: Nothing - * - * Notes: After command completion, there may be blocks left - * over which weren't finished by the previous command - * this can be for a number of reasons - the main one is - * I/O errors in the middle of the request, in which case - * we need to request the blocks that come after the bad - * sector. - * Notes: Upon return, cmd is a stale pointer. - */ -static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd) -{ - struct scsi_device *sdev = cmd->device; - struct request *req = cmd->request; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - blk_unprep_request(req); - req->special = NULL; - scsi_put_command(cmd); - blk_requeue_request(q, req); - spin_unlock_irqrestore(q->queue_lock, flags); - - scsi_run_queue(q); - - put_device(&sdev->sdev_gendev); -} - void scsi_run_host_queues(struct Scsi_Host *shost) { struct scsi_device *sdev; @@ -626,42 +576,6 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd) scsi_del_cmd_from_list(cmd); } -/* - * Function: scsi_release_buffers() - * - * Purpose: Free resources allocate for a scsi_command. - * - * Arguments: cmd - command that we are bailing. - * - * Lock status: Assumed that no lock is held upon entry. - * - * Returns: Nothing - * - * Notes: In the event that an upper level driver rejects a - * command, we must release resources allocated during - * the __init_io() function. Primarily this would involve - * the scatter-gather table. - */ -static void scsi_release_buffers(struct scsi_cmnd *cmd) -{ - if (cmd->sdb.table.nents) - sg_free_table_chained(&cmd->sdb.table, false); - - memset(&cmd->sdb, 0, sizeof(cmd->sdb)); - - if (scsi_prot_sg_count(cmd)) - sg_free_table_chained(&cmd->prot_sdb->table, false); -} - -static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd) -{ - struct scsi_data_buffer *bidi_sdb = cmd->request->next_rq->special; - - sg_free_table_chained(&bidi_sdb->table, false); - kmem_cache_free(scsi_sdb_cache, bidi_sdb); - cmd->request->next_rq->special = NULL; -} - /* Returns false when no more bytes to process, true if there are more */ static bool scsi_end_request(struct request *req, blk_status_t error, unsigned int bytes, unsigned int bidi_bytes) @@ -687,46 +601,30 @@ static bool scsi_end_request(struct request *req, blk_status_t error, destroy_rcu_head(&cmd->rcu); } - if (req->mq_ctx) { - /* - * In the MQ case the command gets freed by __blk_mq_end_request, - * so we have to do all cleanup that depends on it earlier. - * - * We also can't kick the queues from irq context, so we - * will have to defer it to a workqueue. - */ - scsi_mq_uninit_cmd(cmd); - - /* - * queue is still alive, so grab the ref for preventing it - * from being cleaned up during running queue. - */ - percpu_ref_get(&q->q_usage_counter); - - __blk_mq_end_request(req, error); - - if (scsi_target(sdev)->single_lun || - !list_empty(&sdev->host->starved_list)) - kblockd_schedule_work(&sdev->requeue_work); - else - blk_mq_run_hw_queues(q, true); - - percpu_ref_put(&q->q_usage_counter); - } else { - unsigned long flags; + /* + * In the MQ case the command gets freed by __blk_mq_end_request, + * so we have to do all cleanup that depends on it earlier. + * + * We also can't kick the queues from irq context, so we + * will have to defer it to a workqueue. + */ + scsi_mq_uninit_cmd(cmd); - if (bidi_bytes) - scsi_release_bidi_buffers(cmd); - scsi_release_buffers(cmd); - scsi_put_command(cmd); + /* + * queue is still alive, so grab the ref for preventing it + * from being cleaned up during running queue. + */ + percpu_ref_get(&q->q_usage_counter); - spin_lock_irqsave(q->queue_lock, flags); - blk_finish_request(req, error); - spin_unlock_irqrestore(q->queue_lock, flags); + __blk_mq_end_request(req, error); - scsi_run_queue(q); - } + if (scsi_target(sdev)->single_lun || + !list_empty(&sdev->host->starved_list)) + kblockd_schedule_work(&sdev->requeue_work); + else + blk_mq_run_hw_queues(q, true); + percpu_ref_put(&q->q_usage_counter); put_device(&sdev->sdev_gendev); return false; } @@ -774,13 +672,7 @@ static void scsi_io_completion_reprep(struct scsi_cmnd *cmd, struct request_queue *q) { /* A new command will be prepared and issued. */ - if (q->mq_ops) { - scsi_mq_requeue_cmd(cmd); - } else { - /* Unprep request and put it back at head of the queue. */ - scsi_release_buffers(cmd); - scsi_requeue_command(q, cmd); - } + scsi_mq_requeue_cmd(cmd); } /* Helper for scsi_io_completion() when special action required. */ @@ -1120,7 +1012,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_io_completion_action(cmd, result); } -static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) +static blk_status_t scsi_init_sgtable(struct request *req, + struct scsi_data_buffer *sdb) { int count; @@ -1129,7 +1022,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) */ if (unlikely(sg_alloc_table_chained(&sdb->table, blk_rq_nr_phys_segments(req), sdb->table.sgl))) - return BLKPREP_DEFER; + return BLK_STS_RESOURCE; /* * Next, walk the list, and fill in the addresses and sizes of @@ -1139,7 +1032,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) BUG_ON(count > sdb->table.nents); sdb->table.nents = count; sdb->length = blk_rq_payload_bytes(req); - return BLKPREP_OK; + return BLK_STS_OK; } /* @@ -1149,62 +1042,48 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) * * Arguments: cmd - Command descriptor we wish to initialize * - * Returns: 0 on success - * BLKPREP_DEFER if the failure is retryable - * BLKPREP_KILL if the failure is fatal + * Returns: BLK_STS_OK on success + * BLK_STS_RESOURCE if the failure is retryable + * BLK_STS_IOERR if the failure is fatal */ -int scsi_init_io(struct scsi_cmnd *cmd) +blk_status_t scsi_init_io(struct scsi_cmnd *cmd) { - struct scsi_device *sdev = cmd->device; struct request *rq = cmd->request; - bool is_mq = (rq->mq_ctx != NULL); - int error = BLKPREP_KILL; + blk_status_t ret; if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq))) - goto err_exit; + return BLK_STS_IOERR; - error = scsi_init_sgtable(rq, &cmd->sdb); - if (error) - goto err_exit; + ret = scsi_init_sgtable(rq, &cmd->sdb); + if (ret) + return ret; if (blk_bidi_rq(rq)) { - if (!rq->q->mq_ops) { - struct scsi_data_buffer *bidi_sdb = - kmem_cache_zalloc(scsi_sdb_cache, GFP_ATOMIC); - if (!bidi_sdb) { - error = BLKPREP_DEFER; - goto err_exit; - } - - rq->next_rq->special = bidi_sdb; - } - - error = scsi_init_sgtable(rq->next_rq, rq->next_rq->special); - if (error) - goto err_exit; + ret = scsi_init_sgtable(rq->next_rq, rq->next_rq->special); + if (ret) + goto out_free_sgtables; } if (blk_integrity_rq(rq)) { struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; int ivecs, count; - if (prot_sdb == NULL) { + if (WARN_ON_ONCE(!prot_sdb)) { /* * This can happen if someone (e.g. multipath) * queues a command to a device on an adapter * that does not support DIX. */ - WARN_ON_ONCE(1); - error = BLKPREP_KILL; - goto err_exit; + ret = BLK_STS_IOERR; + goto out_free_sgtables; } ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio); if (sg_alloc_table_chained(&prot_sdb->table, ivecs, prot_sdb->table.sgl)) { - error = BLKPREP_DEFER; - goto err_exit; + ret = BLK_STS_RESOURCE; + goto out_free_sgtables; } count = blk_rq_map_integrity_sg(rq->q, rq->bio, @@ -1216,17 +1095,10 @@ int scsi_init_io(struct scsi_cmnd *cmd) cmd->prot_sdb->table.nents = count; } - return BLKPREP_OK; -err_exit: - if (is_mq) { - scsi_mq_free_sgtables(cmd); - } else { - scsi_release_buffers(cmd); - cmd->request->special = NULL; - scsi_put_command(cmd); - put_device(&sdev->sdev_gendev); - } - return error; + return BLK_STS_OK; +out_free_sgtables: + scsi_mq_free_sgtables(cmd); + return ret; } EXPORT_SYMBOL(scsi_init_io); @@ -1312,7 +1184,8 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) scsi_add_cmd_to_list(cmd); } -static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) +static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev, + struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); @@ -1323,8 +1196,8 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) * submit a request without an attached bio. */ if (req->bio) { - int ret = scsi_init_io(cmd); - if (unlikely(ret)) + blk_status_t ret = scsi_init_io(cmd); + if (unlikely(ret != BLK_STS_OK)) return ret; } else { BUG_ON(blk_rq_bytes(req)); @@ -1336,20 +1209,21 @@ static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) cmd->cmnd = scsi_req(req)->cmd; cmd->transfersize = blk_rq_bytes(req); cmd->allowed = scsi_req(req)->retries; - return BLKPREP_OK; + return BLK_STS_OK; } /* * Setup a normal block command. These are simple request from filesystems * that still need to be translated to SCSI CDBs from the ULD. */ -static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) +static blk_status_t scsi_setup_fs_cmnd(struct scsi_device *sdev, + struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); if (unlikely(sdev->handler && sdev->handler->prep_fn)) { - int ret = sdev->handler->prep_fn(sdev, req); - if (ret != BLKPREP_OK) + blk_status_t ret = sdev->handler->prep_fn(sdev, req); + if (ret != BLK_STS_OK) return ret; } @@ -1358,7 +1232,8 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) return scsi_cmd_to_driver(cmd)->init_command(cmd); } -static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req) +static blk_status_t scsi_setup_cmnd(struct scsi_device *sdev, + struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); @@ -1375,129 +1250,48 @@ static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req) return scsi_setup_fs_cmnd(sdev, req); } -static int +static blk_status_t scsi_prep_state_check(struct scsi_device *sdev, struct request *req) { - int ret = BLKPREP_OK; - - /* - * If the device is not in running state we will reject some - * or all commands. - */ - if (unlikely(sdev->sdev_state != SDEV_RUNNING)) { - switch (sdev->sdev_state) { - case SDEV_OFFLINE: - case SDEV_TRANSPORT_OFFLINE: - /* - * If the device is offline we refuse to process any - * commands. The device must be brought online - * before trying any recovery commands. - */ - sdev_printk(KERN_ERR, sdev, - "rejecting I/O to offline device\n"); - ret = BLKPREP_KILL; - break; - case SDEV_DEL: - /* - * If the device is fully deleted, we refuse to - * process any commands as well. - */ - sdev_printk(KERN_ERR, sdev, - "rejecting I/O to dead device\n"); - ret = BLKPREP_KILL; - break; - case SDEV_BLOCK: - case SDEV_CREATED_BLOCK: - ret = BLKPREP_DEFER; - break; - case SDEV_QUIESCE: - /* - * If the devices is blocked we defer normal commands. - */ - if (req && !(req->rq_flags & RQF_PREEMPT)) - ret = BLKPREP_DEFER; - break; - default: - /* - * For any other not fully online state we only allow - * special commands. In particular any user initiated - * command is not allowed. - */ - if (req && !(req->rq_flags & RQF_PREEMPT)) - ret = BLKPREP_KILL; - break; - } - } - return ret; -} - -static int -scsi_prep_return(struct request_queue *q, struct request *req, int ret) -{ - struct scsi_device *sdev = q->queuedata; - - switch (ret) { - case BLKPREP_KILL: - case BLKPREP_INVALID: - scsi_req(req)->result = DID_NO_CONNECT << 16; - /* release the command and kill it */ - if (req->special) { - struct scsi_cmnd *cmd = req->special; - scsi_release_buffers(cmd); - scsi_put_command(cmd); - put_device(&sdev->sdev_gendev); - req->special = NULL; - } - break; - case BLKPREP_DEFER: + switch (sdev->sdev_state) { + case SDEV_OFFLINE: + case SDEV_TRANSPORT_OFFLINE: /* - * If we defer, the blk_peek_request() returns NULL, but the - * queue must be restarted, so we schedule a callback to happen - * shortly. + * If the device is offline we refuse to process any + * commands. The device must be brought online + * before trying any recovery commands. */ - if (atomic_read(&sdev->device_busy) == 0) - blk_delay_queue(q, SCSI_QUEUE_DELAY); - break; + sdev_printk(KERN_ERR, sdev, + "rejecting I/O to offline device\n"); + return BLK_STS_IOERR; + case SDEV_DEL: + /* + * If the device is fully deleted, we refuse to + * process any commands as well. + */ + sdev_printk(KERN_ERR, sdev, + "rejecting I/O to dead device\n"); + return BLK_STS_IOERR; + case SDEV_BLOCK: + case SDEV_CREATED_BLOCK: + return BLK_STS_RESOURCE; + case SDEV_QUIESCE: + /* + * If the devices is blocked we defer normal commands. + */ + if (req && !(req->rq_flags & RQF_PREEMPT)) + return BLK_STS_RESOURCE; + return BLK_STS_OK; default: - req->rq_flags |= RQF_DONTPREP; - } - - return ret; -} - -static int scsi_prep_fn(struct request_queue *q, struct request *req) -{ - struct scsi_device *sdev = q->queuedata; - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); - int ret; - - ret = scsi_prep_state_check(sdev, req); - if (ret != BLKPREP_OK) - goto out; - - if (!req->special) { - /* Bail if we can't get a reference to the device */ - if (unlikely(!get_device(&sdev->sdev_gendev))) { - ret = BLKPREP_DEFER; - goto out; - } - - scsi_init_command(sdev, cmd); - req->special = cmd; + /* + * For any other not fully online state we only allow + * special commands. In particular any user initiated + * command is not allowed. + */ + if (req && !(req->rq_flags & RQF_PREEMPT)) + return BLK_STS_IOERR; + return BLK_STS_OK; } - - cmd->tag = req->tag; - cmd->request = req; - cmd->prot_op = SCSI_PROT_NORMAL; - - ret = scsi_setup_cmnd(sdev, req); -out: - return scsi_prep_return(q, req, ret); -} - -static void scsi_unprep_fn(struct request_queue *q, struct request *req) -{ - scsi_uninit_cmd(blk_mq_rq_to_pdu(req)); } /* @@ -1519,14 +1313,8 @@ static inline int scsi_dev_queue_ready(struct request_queue *q, /* * unblock after device_blocked iterates to zero */ - if (atomic_dec_return(&sdev->device_blocked) > 0) { - /* - * For the MQ case we take care of this in the caller. - */ - if (!q->mq_ops) - blk_delay_queue(q, SCSI_QUEUE_DELAY); + if (atomic_dec_return(&sdev->device_blocked) > 0) goto out_dec; - } SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev, "unblocking device at zero depth\n")); } @@ -1661,13 +1449,13 @@ out_dec: * needs to return 'not busy'. Otherwise, request stacking drivers * may hold requests forever. */ -static int scsi_lld_busy(struct request_queue *q) +static bool scsi_mq_lld_busy(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost; if (blk_queue_dying(q)) - return 0; + return false; shost = sdev->host; @@ -1678,43 +1466,9 @@ static int scsi_lld_busy(struct request_queue *q) * in SCSI layer. */ if (scsi_host_in_recovery(shost) || scsi_device_is_busy(sdev)) - return 1; - - return 0; -} - -/* - * Kill a request for a dead device - */ -static void scsi_kill_request(struct request *req, struct request_queue *q) -{ - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); - struct scsi_device *sdev; - struct scsi_target *starget; - struct Scsi_Host *shost; - - blk_start_request(req); - - scmd_printk(KERN_INFO, cmd, "killing request\n"); - - sdev = cmd->device; - starget = scsi_target(sdev); - shost = sdev->host; - scsi_init_cmd_errh(cmd); - cmd->result = DID_NO_CONNECT << 16; - atomic_inc(&cmd->device->iorequest_cnt); - - /* - * SCSI request completion path will do scsi_device_unbusy(), - * bump busy counts. To bump the counters, we need to dance - * with the locks as normal issue path does. - */ - atomic_inc(&sdev->device_busy); - atomic_inc(&shost->host_busy); - if (starget->can_queue > 0) - atomic_inc(&starget->target_busy); + return true; - blk_complete_request(req); + return false; } static void scsi_softirq_done(struct request *rq) @@ -1837,170 +1591,6 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd) return 0; } -/** - * scsi_done - Invoke completion on finished SCSI command. - * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives - * ownership back to SCSI Core -- i.e. the LLDD has finished with it. - * - * Description: This function is the mid-level's (SCSI Core) interrupt routine, - * which regains ownership of the SCSI command (de facto) from a LLDD, and - * calls blk_complete_request() for further processing. - * - * This function is interrupt context safe. - */ -static void scsi_done(struct scsi_cmnd *cmd) -{ - trace_scsi_dispatch_cmd_done(cmd); - blk_complete_request(cmd->request); -} - -/* - * Function: scsi_request_fn() - * - * Purpose: Main strategy routine for SCSI. - * - * Arguments: q - Pointer to actual queue. - * - * Returns: Nothing - * - * Lock status: request queue lock assumed to be held when called. - * - * Note: See sd_zbc.c sd_zbc_write_lock_zone() for write order - * protection for ZBC disks. - */ -static void scsi_request_fn(struct request_queue *q) - __releases(q->queue_lock) - __acquires(q->queue_lock) -{ - struct scsi_device *sdev = q->queuedata; - struct Scsi_Host *shost; - struct scsi_cmnd *cmd; - struct request *req; - - /* - * To start with, we keep looping until the queue is empty, or until - * the host is no longer able to accept any more requests. - */ - shost = sdev->host; - for (;;) { - int rtn; - /* - * get next queueable request. We do this early to make sure - * that the request is fully prepared even if we cannot - * accept it. - */ - req = blk_peek_request(q); - if (!req) - break; - - if (unlikely(!scsi_device_online(sdev))) { - sdev_printk(KERN_ERR, sdev, - "rejecting I/O to offline device\n"); - scsi_kill_request(req, q); - continue; - } - - if (!scsi_dev_queue_ready(q, sdev)) - break; - - /* - * Remove the request from the request list. - */ - if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req))) - blk_start_request(req); - - spin_unlock_irq(q->queue_lock); - cmd = blk_mq_rq_to_pdu(req); - if (cmd != req->special) { - printk(KERN_CRIT "impossible request in %s.\n" - "please mail a stack trace to " - "linux-scsi@vger.kernel.org\n", - __func__); - blk_dump_rq_flags(req, "foo"); - BUG(); - } - - /* - * We hit this when the driver is using a host wide - * tag map. For device level tag maps the queue_depth check - * in the device ready fn would prevent us from trying - * to allocate a tag. Since the map is a shared host resource - * we add the dev to the starved list so it eventually gets - * a run when a tag is freed. - */ - if (blk_queue_tagged(q) && !(req->rq_flags & RQF_QUEUED)) { - spin_lock_irq(shost->host_lock); - if (list_empty(&sdev->starved_entry)) - list_add_tail(&sdev->starved_entry, - &shost->starved_list); - spin_unlock_irq(shost->host_lock); - goto not_ready; - } - - if (!scsi_target_queue_ready(shost, sdev)) - goto not_ready; - - if (!scsi_host_queue_ready(q, shost, sdev)) - goto host_not_ready; - - if (sdev->simple_tags) - cmd->flags |= SCMD_TAGGED; - else - cmd->flags &= ~SCMD_TAGGED; - - /* - * Finally, initialize any error handling parameters, and set up - * the timers for timeouts. - */ - scsi_init_cmd_errh(cmd); - - /* - * Dispatch the command to the low-level driver. - */ - cmd->scsi_done = scsi_done; - rtn = scsi_dispatch_cmd(cmd); - if (rtn) { - scsi_queue_insert(cmd, rtn); - spin_lock_irq(q->queue_lock); - goto out_delay; - } - spin_lock_irq(q->queue_lock); - } - - return; - - host_not_ready: - if (scsi_target(sdev)->can_queue > 0) - atomic_dec(&scsi_target(sdev)->target_busy); - not_ready: - /* - * lock q, handle tag, requeue req, and decrement device_busy. We - * must return with queue_lock held. - * - * Decrementing device_busy without checking it is OK, as all such - * cases (host limits or settings) should run the queue at some - * later time. - */ - spin_lock_irq(q->queue_lock); - blk_requeue_request(q, req); - atomic_dec(&sdev->device_busy); -out_delay: - if (!atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev)) - blk_delay_queue(q, SCSI_QUEUE_DELAY); -} - -static inline blk_status_t prep_to_mq(int ret) -{ - switch (ret) { - case BLKPREP_OK: - return BLK_STS_OK; - case BLKPREP_DEFER: - return BLK_STS_RESOURCE; - default: - return BLK_STS_IOERR; - } -} - /* Size in bytes of the sg-list stored in the scsi-mq command-private data. */ static unsigned int scsi_mq_sgl_size(struct Scsi_Host *shost) { @@ -2008,7 +1598,7 @@ static unsigned int scsi_mq_sgl_size(struct Scsi_Host *shost) sizeof(struct scatterlist); } -static int scsi_mq_prep_fn(struct request *req) +static blk_status_t scsi_mq_prep_fn(struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); struct scsi_device *sdev = req->q->queuedata; @@ -2052,8 +1642,18 @@ static int scsi_mq_prep_fn(struct request *req) static void scsi_mq_done(struct scsi_cmnd *cmd) { + if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state))) + return; trace_scsi_dispatch_cmd_done(cmd); - blk_mq_complete_request(cmd->request); + + /* + * If the block layer didn't complete the request due to a timeout + * injection, scsi must clear its internal completed state so that the + * timeout handler will see it needs to escalate its own error + * recovery. + */ + if (unlikely(!blk_mq_complete_request(cmd->request))) + clear_bit(SCMD_STATE_COMPLETE, &cmd->state); } static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx) @@ -2096,9 +1696,15 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, blk_status_t ret; int reason; - ret = prep_to_mq(scsi_prep_state_check(sdev, req)); - if (ret != BLK_STS_OK) - goto out_put_budget; + /* + * If the device is not in running state we will reject some or all + * commands. + */ + if (unlikely(sdev->sdev_state != SDEV_RUNNING)) { + ret = scsi_prep_state_check(sdev, req); + if (ret != BLK_STS_OK) + goto out_put_budget; + } ret = BLK_STS_RESOURCE; if (!scsi_target_queue_ready(shost, sdev)) @@ -2106,8 +1712,9 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (!scsi_host_queue_ready(q, shost, sdev)) goto out_dec_target_busy; + clear_bit(SCMD_STATE_COMPLETE, &cmd->state); if (!(req->rq_flags & RQF_DONTPREP)) { - ret = prep_to_mq(scsi_mq_prep_fn(req)); + ret = scsi_mq_prep_fn(req); if (ret != BLK_STS_OK) goto out_dec_host_busy; req->rq_flags |= RQF_DONTPREP; @@ -2208,7 +1815,7 @@ static int scsi_map_queues(struct blk_mq_tag_set *set) if (shost->hostt->map_queues) return shost->hostt->map_queues(shost); - return blk_mq_map_queues(set); + return blk_mq_map_queues(&set->map[0]); } void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) @@ -2251,77 +1858,6 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) } EXPORT_SYMBOL_GPL(__scsi_init_queue); -static int scsi_old_init_rq(struct request_queue *q, struct request *rq, - gfp_t gfp) -{ - struct Scsi_Host *shost = q->rq_alloc_data; - const bool unchecked_isa_dma = shost->unchecked_isa_dma; - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); - - memset(cmd, 0, sizeof(*cmd)); - - if (unchecked_isa_dma) - cmd->flags |= SCMD_UNCHECKED_ISA_DMA; - cmd->sense_buffer = scsi_alloc_sense_buffer(unchecked_isa_dma, gfp, - NUMA_NO_NODE); - if (!cmd->sense_buffer) - goto fail; - cmd->req.sense = cmd->sense_buffer; - - if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) { - cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp); - if (!cmd->prot_sdb) - goto fail_free_sense; - } - - return 0; - -fail_free_sense: - scsi_free_sense_buffer(unchecked_isa_dma, cmd->sense_buffer); -fail: - return -ENOMEM; -} - -static void scsi_old_exit_rq(struct request_queue *q, struct request *rq) -{ - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); - - if (cmd->prot_sdb) - kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb); - scsi_free_sense_buffer(cmd->flags & SCMD_UNCHECKED_ISA_DMA, - cmd->sense_buffer); -} - -struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev) -{ - struct Scsi_Host *shost = sdev->host; - struct request_queue *q; - - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); - if (!q) - return NULL; - q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; - q->rq_alloc_data = shost; - q->request_fn = scsi_request_fn; - q->init_rq_fn = scsi_old_init_rq; - q->exit_rq_fn = scsi_old_exit_rq; - q->initialize_rq_fn = scsi_initialize_rq; - - if (blk_init_allocated_queue(q) < 0) { - blk_cleanup_queue(q); - return NULL; - } - - __scsi_init_queue(shost, q); - blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); - blk_queue_prep_rq(q, scsi_prep_fn); - blk_queue_unprep_rq(q, scsi_unprep_fn); - blk_queue_softirq_done(q, scsi_softirq_done); - blk_queue_rq_timed_out(q, scsi_times_out); - blk_queue_lld_busy(q, scsi_lld_busy); - return q; -} - static const struct blk_mq_ops scsi_mq_ops = { .get_budget = scsi_mq_get_budget, .put_budget = scsi_mq_put_budget, @@ -2334,6 +1870,7 @@ static const struct blk_mq_ops scsi_mq_ops = { .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .initialize_rq_fn = scsi_initialize_rq, + .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, }; @@ -2388,10 +1925,7 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q) { struct scsi_device *sdev = NULL; - if (q->mq_ops) { - if (q->mq_ops == &scsi_mq_ops) - sdev = q->queuedata; - } else if (q->request_fn == scsi_request_fn) + if (q->mq_ops == &scsi_mq_ops) sdev = q->queuedata; if (!sdev || !get_device(&sdev->sdev_gendev)) sdev = NULL; @@ -2995,39 +2529,6 @@ void sdev_evt_send_simple(struct scsi_device *sdev, EXPORT_SYMBOL_GPL(sdev_evt_send_simple); /** - * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() - * @sdev: SCSI device to count the number of scsi_request_fn() callers for. - */ -static int scsi_request_fn_active(struct scsi_device *sdev) -{ - struct request_queue *q = sdev->request_queue; - int request_fn_active; - - WARN_ON_ONCE(sdev->host->use_blk_mq); - - spin_lock_irq(q->queue_lock); - request_fn_active = q->request_fn_active; - spin_unlock_irq(q->queue_lock); - - return request_fn_active; -} - -/** - * scsi_wait_for_queuecommand() - wait for ongoing queuecommand() calls - * @sdev: SCSI device pointer. - * - * Wait until the ongoing shost->hostt->queuecommand() calls that are - * invoked from scsi_request_fn() have finished. - */ -static void scsi_wait_for_queuecommand(struct scsi_device *sdev) -{ - WARN_ON_ONCE(sdev->host->use_blk_mq); - - while (scsi_request_fn_active(sdev)) - msleep(20); -} - -/** * scsi_device_quiesce - Block user issued commands. * @sdev: scsi device to quiesce. * @@ -3150,7 +2651,6 @@ EXPORT_SYMBOL(scsi_target_resume); int scsi_internal_device_block_nowait(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; - unsigned long flags; int err = 0; err = scsi_device_set_state(sdev, SDEV_BLOCK); @@ -3166,14 +2666,7 @@ int scsi_internal_device_block_nowait(struct scsi_device *sdev) * block layer from calling the midlayer with this device's * request queue. */ - if (q->mq_ops) { - blk_mq_quiesce_queue_nowait(q); - } else { - spin_lock_irqsave(q->queue_lock, flags); - blk_stop_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } - + blk_mq_quiesce_queue_nowait(q); return 0; } EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait); @@ -3204,12 +2697,8 @@ static int scsi_internal_device_block(struct scsi_device *sdev) mutex_lock(&sdev->state_mutex); err = scsi_internal_device_block_nowait(sdev); - if (err == 0) { - if (q->mq_ops) - blk_mq_quiesce_queue(q); - else - scsi_wait_for_queuecommand(sdev); - } + if (err == 0) + blk_mq_quiesce_queue(q); mutex_unlock(&sdev->state_mutex); return err; @@ -3218,15 +2707,8 @@ static int scsi_internal_device_block(struct scsi_device *sdev) void scsi_start_queue(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; - unsigned long flags; - if (q->mq_ops) { - blk_mq_unquiesce_queue(q); - } else { - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } + blk_mq_unquiesce_queue(q); } /** diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 99f1db5e467e..5f21547b2ad2 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -92,7 +92,6 @@ extern void scsi_queue_insert(struct scsi_cmnd *cmd, int reason); extern void scsi_io_completion(struct scsi_cmnd *, unsigned int); extern void scsi_run_host_queues(struct Scsi_Host *shost); extern void scsi_requeue_run_queue(struct work_struct *work); -extern struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev); extern struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev); extern void scsi_start_queue(struct scsi_device *sdev); extern int scsi_mq_setup_tags(struct Scsi_Host *shost); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 78ca63dfba4a..dd0d516f65e2 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -266,10 +266,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, */ sdev->borken = 1; - if (shost_use_blk_mq(shost)) - sdev->request_queue = scsi_mq_alloc_queue(sdev); - else - sdev->request_queue = scsi_old_alloc_queue(sdev); + sdev->request_queue = scsi_mq_alloc_queue(sdev); if (!sdev->request_queue) { /* release fn is set up in scsi_sysfs_device_initialise, so * have to free and put manually here */ @@ -280,11 +277,6 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, WARN_ON_ONCE(!blk_get_queue(sdev->request_queue)); sdev->request_queue->queuedata = sdev; - if (!shost_use_blk_mq(sdev->host)) { - blk_queue_init_tags(sdev->request_queue, - sdev->host->cmd_per_lun, shost->bqt, - shost->hostt->tag_alloc_policy); - } scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun ? sdev->host->cmd_per_lun : 1); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 3aee9464a7bf..6a9040faed00 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -367,7 +367,6 @@ store_shost_eh_deadline(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline); -shost_rd_attr(use_blk_mq, "%d\n"); shost_rd_attr(unique_id, "%u\n"); shost_rd_attr(cmd_per_lun, "%hd\n"); shost_rd_attr(can_queue, "%hd\n"); @@ -386,6 +385,13 @@ show_host_busy(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR(host_busy, S_IRUGO, show_host_busy, NULL); +static ssize_t +show_use_blk_mq(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} +static DEVICE_ATTR(use_blk_mq, S_IRUGO, show_use_blk_mq, NULL); + static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_use_blk_mq.attr, &dev_attr_unique_id.attr, diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 381668fa135d..d7035270d274 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3592,7 +3592,7 @@ fc_bsg_job_timeout(struct request *req) /* the blk_end_sync_io() doesn't check the error */ if (inflight) - __blk_complete_request(req); + blk_mq_end_request(req, BLK_STS_IOERR); return BLK_EH_DONE; } @@ -3684,14 +3684,9 @@ static void fc_bsg_goose_queue(struct fc_rport *rport) { struct request_queue *q = rport->rqst_q; - unsigned long flags; - - if (!q) - return; - spin_lock_irqsave(q->queue_lock, flags); - blk_run_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); + if (q) + blk_mq_run_hw_queues(q, true); } /** @@ -3759,6 +3754,37 @@ static int fc_bsg_dispatch(struct bsg_job *job) return fc_bsg_host_dispatch(shost, job); } +static blk_status_t fc_bsg_rport_prep(struct fc_rport *rport) +{ + if (rport->port_state == FC_PORTSTATE_BLOCKED && + !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) + return BLK_STS_RESOURCE; + + if (rport->port_state != FC_PORTSTATE_ONLINE) + return BLK_STS_IOERR; + + return BLK_STS_OK; +} + + +static int fc_bsg_dispatch_prep(struct bsg_job *job) +{ + struct fc_rport *rport = fc_bsg_to_rport(job); + blk_status_t ret; + + ret = fc_bsg_rport_prep(rport); + switch (ret) { + case BLK_STS_OK: + break; + case BLK_STS_RESOURCE: + return -EAGAIN; + default: + return -EIO; + } + + return fc_bsg_dispatch(job); +} + /** * fc_bsg_hostadd - Create and add the bsg hooks so we can receive requests * @shost: shost for fc_host @@ -3780,7 +3806,8 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) snprintf(bsg_name, sizeof(bsg_name), "fc_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size); + q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, fc_bsg_job_timeout, + i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "fc_host%d: bsg interface failed to initialize - setup queue\n", @@ -3788,26 +3815,11 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) return PTR_ERR(q); } __scsi_init_queue(shost, q); - blk_queue_rq_timed_out(q, fc_bsg_job_timeout); blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT); fc_host->rqst_q = q; return 0; } -static int fc_bsg_rport_prep(struct request_queue *q, struct request *req) -{ - struct fc_rport *rport = dev_to_rport(q->queuedata); - - if (rport->port_state == FC_PORTSTATE_BLOCKED && - !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) - return BLKPREP_DEFER; - - if (rport->port_state != FC_PORTSTATE_ONLINE) - return BLKPREP_KILL; - - return BLKPREP_OK; -} - /** * fc_bsg_rportadd - Create and add the bsg hooks so we can receive requests * @shost: shost that rport is attached to @@ -3825,15 +3837,13 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) if (!i->f->bsg_request) return -ENOTSUPP; - q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch, - i->f->dd_bsg_size); + q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch_prep, + fc_bsg_job_timeout, i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "failed to setup bsg queue\n"); return PTR_ERR(q); } __scsi_init_queue(shost, q); - blk_queue_prep_rq(q, fc_bsg_rport_prep); - blk_queue_rq_timed_out(q, fc_bsg_job_timeout); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); rport->rqst_q = q; return 0; @@ -3852,10 +3862,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) static void fc_bsg_remove(struct request_queue *q) { - if (q) { - bsg_unregister_queue(q); - blk_cleanup_queue(q); - } + bsg_remove_queue(q); } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 6fd2fe210fc3..ff123023e5a5 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1542,7 +1542,7 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0); + q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, NULL, 0); if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); @@ -1576,10 +1576,7 @@ static int iscsi_remove_host(struct transport_container *tc, struct Scsi_Host *shost = dev_to_shost(dev); struct iscsi_cls_host *ihost = shost->shost_data; - if (ihost->bsg_q) { - bsg_unregister_queue(ihost->bsg_q); - blk_cleanup_queue(ihost->bsg_q); - } + bsg_remove_queue(ihost->bsg_q); return 0; } diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 0a165b2b3e81..692b46937e52 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -198,7 +198,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) if (rphy) { q = bsg_setup_queue(&rphy->dev, dev_name(&rphy->dev), - sas_smp_dispatch, 0); + sas_smp_dispatch, NULL, 0); if (IS_ERR(q)) return PTR_ERR(q); rphy->q = q; @@ -207,7 +207,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) snprintf(name, sizeof(name), "sas_host%d", shost->host_no); q = bsg_setup_queue(&shost->shost_gendev, name, - sas_smp_dispatch, 0); + sas_smp_dispatch, NULL, 0); if (IS_ERR(q)) return PTR_ERR(q); to_sas_host_attrs(shost)->q = q; @@ -246,11 +246,7 @@ static int sas_host_remove(struct transport_container *tc, struct device *dev, struct Scsi_Host *shost = dev_to_shost(dev); struct request_queue *q = to_sas_host_attrs(shost)->q; - if (q) { - bsg_unregister_queue(q); - blk_cleanup_queue(q); - } - + bsg_remove_queue(q); return 0; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3bb2b3351e35..4a6ed2fc8c71 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -114,7 +114,7 @@ static int sd_suspend_system(struct device *); static int sd_suspend_runtime(struct device *); static int sd_resume(struct device *); static void sd_rescan(struct device *); -static int sd_init_command(struct scsi_cmnd *SCpnt); +static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt); static void sd_uninit_command(struct scsi_cmnd *SCpnt); static int sd_done(struct scsi_cmnd *); static void sd_eh_reset(struct scsi_cmnd *); @@ -750,7 +750,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } -static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) +static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) { struct scsi_device *sdp = cmd->device; struct request *rq = cmd->request; @@ -761,7 +761,7 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); if (!rq->special_vec.bv_page) - return BLKPREP_DEFER; + return BLK_STS_RESOURCE; rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; rq->rq_flags |= RQF_SPECIAL_PAYLOAD; @@ -784,7 +784,8 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) return scsi_init_io(cmd); } -static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) +static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, + bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = cmd->request; @@ -794,7 +795,7 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); if (!rq->special_vec.bv_page) - return BLKPREP_DEFER; + return BLK_STS_RESOURCE; rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; rq->rq_flags |= RQF_SPECIAL_PAYLOAD; @@ -814,7 +815,8 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) return scsi_init_io(cmd); } -static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) +static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, + bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = cmd->request; @@ -824,7 +826,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); if (!rq->special_vec.bv_page) - return BLKPREP_DEFER; + return BLK_STS_RESOURCE; rq->special_vec.bv_offset = 0; rq->special_vec.bv_len = data_len; rq->rq_flags |= RQF_SPECIAL_PAYLOAD; @@ -844,7 +846,7 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) return scsi_init_io(cmd); } -static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) +static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; struct scsi_device *sdp = cmd->device; @@ -862,7 +864,7 @@ static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) } if (sdp->no_write_same) - return BLKPREP_INVALID; + return BLK_STS_TARGET; if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); @@ -939,7 +941,7 @@ out: * Will set up either WRITE SAME(10) or WRITE SAME(16) depending on * the preference indicated by the target device. **/ -static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) +static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; struct scsi_device *sdp = cmd->device; @@ -948,10 +950,10 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) sector_t sector = blk_rq_pos(rq); unsigned int nr_sectors = blk_rq_sectors(rq); unsigned int nr_bytes = blk_rq_bytes(rq); - int ret; + blk_status_t ret; if (sdkp->device->no_write_same) - return BLKPREP_INVALID; + return BLK_STS_TARGET; BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size); @@ -992,7 +994,7 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) return ret; } -static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd) +static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; @@ -1005,10 +1007,10 @@ static int sd_setup_flush_cmnd(struct scsi_cmnd *cmd) cmd->allowed = SD_MAX_RETRIES; rq->timeout = rq->q->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; - return BLKPREP_OK; + return BLK_STS_OK; } -static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) +static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) { struct request *rq = SCpnt->request; struct scsi_device *sdp = SCpnt->device; @@ -1018,18 +1020,14 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) sector_t threshold; unsigned int this_count = blk_rq_sectors(rq); unsigned int dif, dix; - int ret; unsigned char protect; + blk_status_t ret; ret = scsi_init_io(SCpnt); - if (ret != BLKPREP_OK) + if (ret != BLK_STS_OK) return ret; WARN_ON_ONCE(SCpnt != rq->special); - /* from here on until we're complete, any goto out - * is used for a killable error condition */ - ret = BLKPREP_KILL; - SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, "%s: block=%llu, count=%d\n", @@ -1042,7 +1040,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) blk_rq_sectors(rq))); SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "Retry with 0x%p\n", SCpnt)); - goto out; + return BLK_STS_IOERR; } if (sdp->changed) { @@ -1051,7 +1049,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) * the changed bit has been reset */ /* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */ - goto out; + return BLK_STS_IOERR; } /* @@ -1089,31 +1087,28 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) if ((block & 1) || (blk_rq_sectors(rq) & 1)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); - goto out; - } else { - block = block >> 1; - this_count = this_count >> 1; + return BLK_STS_IOERR; } + block = block >> 1; + this_count = this_count >> 1; } if (sdp->sector_size == 2048) { if ((block & 3) || (blk_rq_sectors(rq) & 3)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); - goto out; - } else { - block = block >> 2; - this_count = this_count >> 2; + return BLK_STS_IOERR; } + block = block >> 2; + this_count = this_count >> 2; } if (sdp->sector_size == 4096) { if ((block & 7) || (blk_rq_sectors(rq) & 7)) { scmd_printk(KERN_ERR, SCpnt, "Bad block number requested\n"); - goto out; - } else { - block = block >> 3; - this_count = this_count >> 3; + return BLK_STS_IOERR; } + block = block >> 3; + this_count = this_count >> 3; } if (rq_data_dir(rq) == WRITE) { SCpnt->cmnd[0] = WRITE_6; @@ -1125,7 +1120,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) SCpnt->cmnd[0] = READ_6; } else { scmd_printk(KERN_ERR, SCpnt, "Unknown command %d\n", req_op(rq)); - goto out; + return BLK_STS_IOERR; } SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, @@ -1145,10 +1140,8 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); - if (unlikely(SCpnt->cmnd == NULL)) { - ret = BLKPREP_DEFER; - goto out; - } + if (unlikely(!SCpnt->cmnd)) + return BLK_STS_RESOURCE; SCpnt->cmd_len = SD_EXT_CDB_SIZE; memset(SCpnt->cmnd, 0, SCpnt->cmd_len); @@ -1216,7 +1209,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) */ scmd_printk(KERN_ERR, SCpnt, "FUA write on READ/WRITE(6) drive\n"); - goto out; + return BLK_STS_IOERR; } SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f); @@ -1240,12 +1233,10 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) * This indicates that the command is ready from our end to be * queued. */ - ret = BLKPREP_OK; - out: - return ret; + return BLK_STS_OK; } -static int sd_init_command(struct scsi_cmnd *cmd) +static blk_status_t sd_init_command(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; @@ -1261,7 +1252,7 @@ static int sd_init_command(struct scsi_cmnd *cmd) case SD_LBP_ZERO: return sd_setup_write_same10_cmnd(cmd, false); default: - return BLKPREP_INVALID; + return BLK_STS_TARGET; } case REQ_OP_WRITE_ZEROES: return sd_setup_write_zeroes_cmnd(cmd); @@ -1276,7 +1267,7 @@ static int sd_init_command(struct scsi_cmnd *cmd) return sd_zbc_setup_reset_cmnd(cmd); default: WARN_ON_ONCE(1); - return BLKPREP_KILL; + return BLK_STS_NOTSUPP; } } diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 1d63f3a23ffb..7f43e6839bce 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -271,7 +271,7 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp) extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); extern void sd_zbc_print_zones(struct scsi_disk *sdkp); -extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd); +extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd); extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr); extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, @@ -288,9 +288,9 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {} -static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) +static inline blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) { - return BLKPREP_INVALID; + return BLK_STS_TARGET; } static inline void sd_zbc_complete(struct scsi_cmnd *cmd, diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index e06c48c866e4..83365b29a4d8 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -185,7 +185,7 @@ static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) * * Called from sd_init_command() for a REQ_OP_ZONE_RESET request. */ -int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) +blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) { struct request *rq = cmd->request; struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); @@ -194,14 +194,14 @@ int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) if (!sd_is_zoned(sdkp)) /* Not a zoned device */ - return BLKPREP_KILL; + return BLK_STS_IOERR; if (sdkp->device->changed) - return BLKPREP_KILL; + return BLK_STS_IOERR; if (sector & (sd_zbc_zone_sectors(sdkp) - 1)) /* Unaligned request */ - return BLKPREP_KILL; + return BLK_STS_IOERR; cmd->cmd_len = 16; memset(cmd->cmnd, 0, cmd->cmd_len); @@ -214,7 +214,7 @@ int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) cmd->transfersize = 0; cmd->allowed = 0; - return BLKPREP_OK; + return BLK_STS_OK; } /** diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index c6ad00703c5b..4e27460ec926 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1390,7 +1390,7 @@ sg_rq_end_io(struct request *rq, blk_status_t status) */ srp->rq = NULL; scsi_req_free_cmd(scsi_req(rq)); - __blk_put_request(rq->q, rq); + blk_put_request(rq); write_lock_irqsave(&sfp->rq_list_lock, iflags); if (unlikely(srp->orphan)) { diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index a25a07a0b7f0..bac084260d80 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -5319,7 +5319,8 @@ static int pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev, 0); + return blk_mq_pci_map_queues(&shost->tag_set.map[0], + ctrl_info->pci_dev, 0); } static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info, diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 54dd70ae9731..38ddbbfe5f3c 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -80,7 +80,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_WORM); static DEFINE_MUTEX(sr_mutex); static int sr_probe(struct device *); static int sr_remove(struct device *); -static int sr_init_command(struct scsi_cmnd *SCpnt); +static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt); static int sr_done(struct scsi_cmnd *); static int sr_runtime_suspend(struct device *dev); @@ -384,22 +384,22 @@ static int sr_done(struct scsi_cmnd *SCpnt) return good_bytes; } -static int sr_init_command(struct scsi_cmnd *SCpnt) +static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt) { int block = 0, this_count, s_size; struct scsi_cd *cd; struct request *rq = SCpnt->request; - int ret; + blk_status_t ret; ret = scsi_init_io(SCpnt); - if (ret != BLKPREP_OK) + if (ret != BLK_STS_OK) goto out; WARN_ON_ONCE(SCpnt != rq->special); cd = scsi_cd(rq->rq_disk); /* from here on until we're complete, any goto out * is used for a killable error condition */ - ret = BLKPREP_KILL; + ret = BLK_STS_IOERR; SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, "Doing sr request, block = %d\n", block)); @@ -516,7 +516,7 @@ static int sr_init_command(struct scsi_cmnd *SCpnt) * This indicates that the command is ready from our end to be * queued. */ - ret = BLKPREP_OK; + ret = BLK_STS_OK; out: return ret; } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 307df2fa39a3..7ff22d3f03e3 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -530,7 +530,7 @@ static void st_scsi_execute_end(struct request *req, blk_status_t status) complete(SRpnt->waiting); blk_rq_unmap_user(tmp); - __blk_put_request(req->q, req); + blk_put_request(req); } static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c index e5f8e54bf644..775bb4e5e36e 100644 --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@ -157,7 +157,7 @@ void ufs_bsg_remove(struct ufs_hba *hba) if (!hba->bsg_queue) return; - bsg_unregister_queue(hba->bsg_queue); + bsg_remove_queue(hba->bsg_queue); device_del(bsg_dev); put_device(bsg_dev); @@ -193,7 +193,7 @@ int ufs_bsg_probe(struct ufs_hba *hba) if (ret) goto out; - q = bsg_setup_queue(bsg_dev, dev_name(bsg_dev), ufs_bsg_request, 0); + q = bsg_setup_queue(bsg_dev, dev_name(bsg_dev), ufs_bsg_request, NULL, 0); if (IS_ERR(q)) { ret = PTR_ERR(q); goto out; diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 1c72db94270e..c3c95b314286 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -719,8 +719,9 @@ static void virtscsi_target_destroy(struct scsi_target *starget) static int virtscsi_map_queues(struct Scsi_Host *shost) { struct virtio_scsi *vscsi = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[0]; - return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2); + return blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2); } /* diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 36b742932c72..86987da86dd6 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -150,24 +150,26 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd) static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup) { int tag = -1; - DEFINE_WAIT(wait); + DEFINE_SBQ_WAIT(wait); struct sbq_wait_state *ws; + struct sbitmap_queue *sbq; if (state == TASK_RUNNING) return tag; - ws = &se_sess->sess_tag_pool.ws[0]; + sbq = &se_sess->sess_tag_pool; + ws = &sbq->ws[0]; for (;;) { - prepare_to_wait_exclusive(&ws->wait, &wait, state); + sbitmap_prepare_to_wait(sbq, ws, &wait, state); if (signal_pending_state(state, current)) break; - tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup); + tag = sbitmap_queue_get(sbq, cpup); if (tag >= 0) break; schedule(); } - finish_wait(&ws->wait, &wait); + sbitmap_finish_wait(sbq, ws, &wait); return tag; } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 47d76c862014..c062d363dce3 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1094,7 +1094,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status) break; } - __blk_put_request(req->q, req); + blk_put_request(req); kfree(pt); } |