diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 20:37:27 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 20:37:27 -0800 |
commit | a9aa31cdc2a7be4a70b0ea24a451dfeb00ce0024 (patch) | |
tree | 10838dc96d953fdd77c29e01f0663b76ce0be764 | |
parent | d9734e0d1ccf87e828ad172c58a96dff97cfc0ba (diff) | |
parent | b3975e94f5688691f487ea00126dabe8f5bee3af (diff) | |
download | linux-a9aa31cdc2a7be4a70b0ea24a451dfeb00ce0024.tar.bz2 |
Merge branch 'for-4.4/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"Here are the block driver changes for 4.4. This pull request
contains:
- NVMe:
- Refactor and moving of code to prepare for proper target
support. From Christoph and Jay.
- 32-bit nvme warning fix from Arnd.
- Error initialization fix from me.
- Proper namespace removal and reference counting support from
Keith.
- Device resume fix on IO failure, also from Keith.
- Dependency fix from Keith, now that nvme isn't under the
umbrella of the block anymore.
- Target location and maintainers update from Jay.
- From Ming Lei, the long awaited DIO/AIO support for loop.
- Enable BD-RE writeable opens, from Georgios"
* 'for-4.4/drivers' of git://git.kernel.dk/linux-block: (24 commits)
Update target repo for nvme patch contributions
NVMe: initialize error to '0'
nvme: use an integer value to Linux errno values
nvme: fix 32-bit build warning
NVMe: Add explicit block config dependency
nvme: include <linux/types.ĥ> in <linux/nvme.h>
nvme: move to a new drivers/nvme/host directory
nvme.h: add missing nvme_id_ctrl endianess annotations
nvme: move hardware structures out of the uapi version of nvme.h
nvme: add a local nvme.h header
nvme: properly handle partially initialized queues in nvme_create_io_queues
nvme: merge nvme_dev_start, nvme_dev_resume and nvme_async_probe
nvme: factor reset code into a common helper
nvme: merge nvme_dev_reset into nvme_reset_failed_dev
nvme: delete dev from dev_list in nvme_reset
NVMe: Simplify device resume on io queue failure
NVMe: Namespace removal simplifications
NVMe: Reference count open namespaces
cdrom: Random writing support for BD-RE media
block: loop: support DIO & AIO
...
-rw-r--r-- | MAINTAINERS | 8 | ||||
-rw-r--r-- | drivers/Kconfig | 2 | ||||
-rw-r--r-- | drivers/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/Kconfig | 11 | ||||
-rw-r--r-- | drivers/block/Makefile | 2 | ||||
-rw-r--r-- | drivers/block/loop.c | 274 | ||||
-rw-r--r-- | drivers/block/loop.h | 13 | ||||
-rw-r--r-- | drivers/cdrom/cdrom.c | 1 | ||||
-rw-r--r-- | drivers/nvme/Kconfig | 1 | ||||
-rw-r--r-- | drivers/nvme/Makefile | 2 | ||||
-rw-r--r-- | drivers/nvme/host/Kconfig | 10 | ||||
-rw-r--r-- | drivers/nvme/host/Makefile | 4 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 133 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c (renamed from drivers/block/nvme-core.c) | 211 | ||||
-rw-r--r-- | drivers/nvme/host/scsi.c (renamed from drivers/block/nvme-scsi.c) | 2 | ||||
-rw-r--r-- | include/linux/nvme.h | 624 | ||||
-rw-r--r-- | include/uapi/linux/loop.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/nvme.h | 589 | ||||
-rw-r--r-- | include/uapi/linux/nvme_ioctl.h | 65 |
19 files changed, 1056 insertions, 899 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index caddb49b207d..249f96923642 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7486,11 +7486,13 @@ F: drivers/video/fbdev/riva/ F: drivers/video/fbdev/nvidia/ NVM EXPRESS DRIVER -M: Matthew Wilcox <willy@linux.intel.com> +M: Keith Busch <keith.busch@intel.com> +M: Jens Axboe <axboe@fb.com> L: linux-nvme@lists.infradead.org -T: git git://git.infradead.org/users/willy/linux-nvme.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git +W: https://kernel.googlesource.com/pub/scm/linux/kernel/git/axboe/linux-block/ S: Supported -F: drivers/block/nvme* +F: drivers/nvme/host/ F: include/linux/nvme.h NVMEM FRAMEWORK diff --git a/drivers/Kconfig b/drivers/Kconfig index 46b4a8e0f859..e69ec82ac80a 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -18,6 +18,8 @@ source "drivers/pnp/Kconfig" source "drivers/block/Kconfig" +source "drivers/nvme/Kconfig" + # misc before ide - BLK_DEV_SGIIOC4 depends on SGI_IOC4 source "drivers/misc/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index b250b36b54f2..42f9dd5f07c8 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ obj-$(CONFIG_IDE) += ide/ obj-$(CONFIG_SCSI) += scsi/ +obj-y += nvme/ obj-$(CONFIG_ATA) += ata/ obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_MTD) += mtd/ diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 1b8094d4d7af..29819e719afa 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -310,17 +310,6 @@ config BLK_DEV_NBD If unsure, say N. -config BLK_DEV_NVME - tristate "NVM Express block device" - depends on PCI - ---help--- - The NVM Express driver is for solid state drives directly - connected to the PCI or PCI Express bus. If you know you - don't have one of these, it is safe to answer N. - - To compile this driver as a module, choose M here: the - module will be called nvme. - config BLK_DEV_SKD tristate "STEC S1120 Block Driver" depends on PCI diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 02b688d1438d..671329023ec2 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_MG_DISK) += mg_disk.o obj-$(CONFIG_SUNVDC) += sunvdc.o -obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_BLK_DEV_SKD) += skd.o obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o @@ -44,6 +43,5 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o obj-$(CONFIG_ZRAM) += zram/ -nvme-y := nvme-core.o nvme-scsi.o skd-y := skd_main.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 674f800a3b57..423f4ca7d712 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -164,6 +164,62 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file) return get_size(lo->lo_offset, lo->lo_sizelimit, file); } +static void __loop_update_dio(struct loop_device *lo, bool dio) +{ + struct file *file = lo->lo_backing_file; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + unsigned short sb_bsize = 0; + unsigned dio_align = 0; + bool use_dio; + + if (inode->i_sb->s_bdev) { + sb_bsize = bdev_logical_block_size(inode->i_sb->s_bdev); + dio_align = sb_bsize - 1; + } + + /* + * We support direct I/O only if lo_offset is aligned with the + * logical I/O size of backing device, and the logical block + * size of loop is bigger than the backing device's and the loop + * needn't transform transfer. + * + * TODO: the above condition may be loosed in the future, and + * direct I/O may be switched runtime at that time because most + * of requests in sane appplications should be PAGE_SIZE algined + */ + if (dio) { + if (queue_logical_block_size(lo->lo_queue) >= sb_bsize && + !(lo->lo_offset & dio_align) && + mapping->a_ops->direct_IO && + !lo->transfer) + use_dio = true; + else + use_dio = false; + } else { + use_dio = false; + } + + if (lo->use_dio == use_dio) + return; + + /* flush dirty pages before changing direct IO */ + vfs_fsync(file, 0); + + /* + * The flag of LO_FLAGS_DIRECT_IO is handled similarly with + * LO_FLAGS_READ_ONLY, both are set from kernel, and losetup + * will get updated by ioctl(LOOP_GET_STATUS) + */ + blk_mq_freeze_queue(lo->lo_queue); + lo->use_dio = use_dio; + if (use_dio) + lo->lo_flags |= LO_FLAGS_DIRECT_IO; + else + lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; + blk_mq_unfreeze_queue(lo->lo_queue); +} + static int figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { @@ -389,6 +445,89 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq) return ret; } +static inline void handle_partial_read(struct loop_cmd *cmd, long bytes) +{ + if (bytes < 0 || (cmd->rq->cmd_flags & REQ_WRITE)) + return; + + if (unlikely(bytes < blk_rq_bytes(cmd->rq))) { + struct bio *bio = cmd->rq->bio; + + bio_advance(bio, bytes); + zero_fill_bio(bio); + } +} + +static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); + struct request *rq = cmd->rq; + + handle_partial_read(cmd, ret); + + if (ret > 0) + ret = 0; + else if (ret < 0) + ret = -EIO; + + blk_mq_complete_request(rq, ret); +} + +static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, + loff_t pos, bool rw) +{ + struct iov_iter iter; + struct bio_vec *bvec; + struct bio *bio = cmd->rq->bio; + struct file *file = lo->lo_backing_file; + int ret; + + /* nomerge for loop request queue */ + WARN_ON(cmd->rq->bio != cmd->rq->biotail); + + bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); + iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, + bio_segments(bio), blk_rq_bytes(cmd->rq)); + + cmd->iocb.ki_pos = pos; + cmd->iocb.ki_filp = file; + cmd->iocb.ki_complete = lo_rw_aio_complete; + cmd->iocb.ki_flags = IOCB_DIRECT; + + if (rw == WRITE) + ret = file->f_op->write_iter(&cmd->iocb, &iter); + else + ret = file->f_op->read_iter(&cmd->iocb, &iter); + + if (ret != -EIOCBQUEUED) + cmd->iocb.ki_complete(&cmd->iocb, ret, 0); + return 0; +} + + +static inline int lo_rw_simple(struct loop_device *lo, + struct request *rq, loff_t pos, bool rw) +{ + struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); + + if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, rw); + + /* + * lo_write_simple and lo_read_simple should have been covered + * by io submit style function like lo_rw_aio(), one blocker + * is that lo_read_simple() need to call flush_dcache_page after + * the page is written from kernel, and it isn't easy to handle + * this in io submit style function which submits all segments + * of the req at one time. And direct read IO doesn't need to + * run flush_dcache_page(). + */ + if (rw == WRITE) + return lo_write_simple(lo, rq, pos); + else + return lo_read_simple(lo, rq, pos); +} + static int do_req_filebacked(struct loop_device *lo, struct request *rq) { loff_t pos; @@ -404,13 +543,13 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) else if (lo->transfer) ret = lo_write_transfer(lo, rq, pos); else - ret = lo_write_simple(lo, rq, pos); + ret = lo_rw_simple(lo, rq, pos, WRITE); } else { if (lo->transfer) ret = lo_read_transfer(lo, rq, pos); else - ret = lo_read_simple(lo, rq, pos); + ret = lo_rw_simple(lo, rq, pos, READ); } return ret; @@ -421,6 +560,12 @@ struct switch_request { struct completion wait; }; +static inline void loop_update_dio(struct loop_device *lo) +{ + __loop_update_dio(lo, io_is_direct(lo->lo_backing_file) | + lo->use_dio); +} + /* * Do the actual switch; called from the BIO completion routine */ @@ -441,6 +586,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p) mapping->host->i_bdev->bd_block_size : PAGE_SIZE; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); + loop_update_dio(lo); } /* @@ -627,11 +773,19 @@ static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) return sprintf(buf, "%s\n", partscan ? "1" : "0"); } +static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) +{ + int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); + + return sprintf(buf, "%s\n", dio ? "1" : "0"); +} + LOOP_ATTR_RO(backing_file); LOOP_ATTR_RO(offset); LOOP_ATTR_RO(sizelimit); LOOP_ATTR_RO(autoclear); LOOP_ATTR_RO(partscan); +LOOP_ATTR_RO(dio); static struct attribute *loop_attrs[] = { &loop_attr_backing_file.attr, @@ -639,6 +793,7 @@ static struct attribute *loop_attrs[] = { &loop_attr_sizelimit.attr, &loop_attr_autoclear.attr, &loop_attr_partscan.attr, + &loop_attr_dio.attr, NULL, }; @@ -688,6 +843,23 @@ static void loop_config_discard(struct loop_device *lo) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } +static void loop_unprepare_queue(struct loop_device *lo) +{ + flush_kthread_worker(&lo->worker); + kthread_stop(lo->worker_task); +} + +static int loop_prepare_queue(struct loop_device *lo) +{ + init_kthread_worker(&lo->worker); + lo->worker_task = kthread_run(kthread_worker_fn, + &lo->worker, "loop%d", lo->lo_number); + if (IS_ERR(lo->worker_task)) + return -ENOMEM; + set_user_nice(lo->worker_task, MIN_NICE); + return 0; +} + static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct block_device *bdev, unsigned int arg) { @@ -745,17 +917,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) goto out_putf; - error = -ENOMEM; - lo->wq = alloc_workqueue("kloopd%d", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16, - lo->lo_number); - if (!lo->wq) + error = loop_prepare_queue(lo); + if (error) goto out_putf; error = 0; set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); + lo->use_dio = false; lo->lo_blocksize = lo_blocksize; lo->lo_device = bdev; lo->lo_flags = lo_flags; @@ -769,6 +939,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) blk_queue_flush(lo->lo_queue, REQ_FLUSH); + loop_update_dio(lo); set_capacity(lo->lo_disk, size); bd_set_size(bdev, size << 9); loop_sysfs_init(lo); @@ -903,8 +1074,7 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; - destroy_workqueue(lo->wq); - lo->wq = NULL; + loop_unprepare_queue(lo); mutex_unlock(&lo->lo_ctl_mutex); /* * Need not hold lo_ctl_mutex to fput backing file. @@ -988,6 +1158,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_key_owner = uid; } + /* update dio if lo_offset or transfer is changed */ + __loop_update_dio(lo, lo->use_dio); + return 0; } @@ -1138,6 +1311,20 @@ static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } +static int loop_set_dio(struct loop_device *lo, unsigned long arg) +{ + int error = -ENXIO; + if (lo->lo_state != Lo_bound) + goto out; + + __loop_update_dio(lo, !!arg); + if (lo->use_dio == !!arg) + return 0; + error = -EINVAL; + out: + return error; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1181,6 +1368,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) err = loop_set_capacity(lo, bdev); break; + case LOOP_SET_DIRECT_IO: + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_dio(lo, arg); + break; default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } @@ -1461,23 +1653,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (lo->lo_state != Lo_bound) return -EIO; - if (cmd->rq->cmd_flags & REQ_WRITE) { - struct loop_device *lo = cmd->rq->q->queuedata; - bool need_sched = true; - - spin_lock_irq(&lo->lo_lock); - if (lo->write_started) - need_sched = false; - else - lo->write_started = true; - list_add_tail(&cmd->list, &lo->write_cmd_head); - spin_unlock_irq(&lo->lo_lock); + if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH | + REQ_DISCARD))) + cmd->use_aio = true; + else + cmd->use_aio = false; - if (need_sched) - queue_work(lo->wq, &lo->write_work); - } else { - queue_work(lo->wq, &cmd->read_work); - } + queue_kthread_work(&lo->worker, &cmd->work); return BLK_MQ_RQ_QUEUE_OK; } @@ -1495,38 +1677,15 @@ static void loop_handle_cmd(struct loop_cmd *cmd) ret = do_req_filebacked(lo, cmd->rq); failed: - blk_mq_complete_request(cmd->rq, ret ? -EIO : 0); + /* complete non-aio request */ + if (!cmd->use_aio || ret) + blk_mq_complete_request(cmd->rq, ret ? -EIO : 0); } -static void loop_queue_write_work(struct work_struct *work) -{ - struct loop_device *lo = - container_of(work, struct loop_device, write_work); - LIST_HEAD(cmd_list); - - spin_lock_irq(&lo->lo_lock); - repeat: - list_splice_init(&lo->write_cmd_head, &cmd_list); - spin_unlock_irq(&lo->lo_lock); - - while (!list_empty(&cmd_list)) { - struct loop_cmd *cmd = list_first_entry(&cmd_list, - struct loop_cmd, list); - list_del_init(&cmd->list); - loop_handle_cmd(cmd); - } - - spin_lock_irq(&lo->lo_lock); - if (!list_empty(&lo->write_cmd_head)) - goto repeat; - lo->write_started = false; - spin_unlock_irq(&lo->lo_lock); -} - -static void loop_queue_read_work(struct work_struct *work) +static void loop_queue_work(struct kthread_work *work) { struct loop_cmd *cmd = - container_of(work, struct loop_cmd, read_work); + container_of(work, struct loop_cmd, work); loop_handle_cmd(cmd); } @@ -1538,7 +1697,7 @@ static int loop_init_request(void *data, struct request *rq, struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->rq = rq; - INIT_WORK(&cmd->read_work, loop_queue_read_work); + init_kthread_work(&cmd->work, loop_queue_work); return 0; } @@ -1594,8 +1753,11 @@ static int loop_add(struct loop_device **l, int i) } lo->lo_queue->queuedata = lo; - INIT_LIST_HEAD(&lo->write_cmd_head); - INIT_WORK(&lo->write_work, loop_queue_write_work); + /* + * It doesn't make sense to enable merge because the I/O + * submitted to backing file is handled page by page. + */ + queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); disk = lo->lo_disk = alloc_disk(1 << part_shift); if (!disk) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 25e8997ed246..fb2237c73e61 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -14,7 +14,7 @@ #include <linux/blk-mq.h> #include <linux/spinlock.h> #include <linux/mutex.h> -#include <linux/workqueue.h> +#include <linux/kthread.h> #include <uapi/linux/loop.h> /* Possible states of device */ @@ -54,12 +54,11 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; - struct workqueue_struct *wq; - struct list_head write_cmd_head; - struct work_struct write_work; - bool write_started; int lo_state; struct mutex lo_ctl_mutex; + struct kthread_worker worker; + struct task_struct *worker_task; + bool use_dio; struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; @@ -67,9 +66,11 @@ struct loop_device { }; struct loop_cmd { - struct work_struct read_work; + struct kthread_work work; struct request *rq; struct list_head list; + bool use_aio; /* use AIO interface to handle I/O */ + struct kiocb iocb; }; /* Support for loadable transfer modules */ diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 5d28a45d2960..c206ccda899b 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -885,6 +885,7 @@ static int cdrom_is_dvd_rw(struct cdrom_device_info *cdi) switch (cdi->mmc3_profile) { case 0x12: /* DVD-RAM */ case 0x1A: /* DVD+RW */ + case 0x43: /* BD-RE */ return 0; default: return 1; diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig new file mode 100644 index 000000000000..a39d9431eaec --- /dev/null +++ b/drivers/nvme/Kconfig @@ -0,0 +1 @@ +source "drivers/nvme/host/Kconfig" diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile new file mode 100644 index 000000000000..9421e829d2a9 --- /dev/null +++ b/drivers/nvme/Makefile @@ -0,0 +1,2 @@ + +obj-y += host/ diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig new file mode 100644 index 000000000000..002a94abdbc4 --- /dev/null +++ b/drivers/nvme/host/Kconfig @@ -0,0 +1,10 @@ +config BLK_DEV_NVME + tristate "NVM Express block device" + depends on PCI && BLOCK + ---help--- + The NVM Express driver is for solid state drives directly + connected to the PCI or PCI Express bus. If you know you + don't have one of these, it is safe to answer N. + + To compile this driver as a module, choose M here: the + module will be called nvme. diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile new file mode 100644 index 000000000000..cfb6679ec245 --- /dev/null +++ b/drivers/nvme/host/Makefile @@ -0,0 +1,4 @@ + +obj-$(CONFIG_BLK_DEV_NVME) += nvme.o + +nvme-y += pci.o scsi.o diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h new file mode 100644 index 000000000000..c1f41bf3c0f2 --- /dev/null +++ b/drivers/nvme/host/nvme.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVME_H +#define _NVME_H + +#include <linux/nvme.h> +#include <linux/pci.h> +#include <linux/kref.h> +#include <linux/blk-mq.h> + +extern unsigned char nvme_io_timeout; +#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) + +/* + * Represents an NVM Express device. Each nvme_dev is a PCI function. + */ +struct nvme_dev { + struct list_head node; + struct nvme_queue **queues; + struct request_queue *admin_q; + struct blk_mq_tag_set tagset; + struct blk_mq_tag_set admin_tagset; + u32 __iomem *dbs; + struct device *dev; + struct dma_pool *prp_page_pool; + struct dma_pool *prp_small_pool; + int instance; + unsigned queue_count; + unsigned online_queues; + unsigned max_qid; + int q_depth; + u32 db_stride; + u32 ctrl_config; + struct msix_entry *entry; + struct nvme_bar __iomem *bar; + struct list_head namespaces; + struct kref kref; + struct device *device; + struct work_struct reset_work; + struct work_struct probe_work; + struct work_struct scan_work; + char name[12]; + char serial[20]; + char model[40]; + char firmware_rev[8]; + bool subsystem; + u32 max_hw_sectors; + u32 stripe_size; + u32 page_size; + void __iomem *cmb; + dma_addr_t cmb_dma_addr; + u64 cmb_size; + u32 cmbsz; + u16 oncs; + u16 abort_limit; + u8 event_limit; + u8 vwc; +}; + +/* + * An NVM Express namespace is equivalent to a SCSI LUN + */ +struct nvme_ns { + struct list_head list; + + struct nvme_dev *dev; + struct request_queue *queue; + struct gendisk *disk; + struct kref kref; + + unsigned ns_id; + int lba_shift; + u16 ms; + bool ext; + u8 pi_type; + u64 mode_select_num_blocks; + u32 mode_select_block_len; +}; + +/* + * The nvme_iod describes the data in an I/O, including the list of PRP + * entries. You can't see it in this data structure because C doesn't let + * me express that. Use nvme_alloc_iod to ensure there's enough space + * allocated to store the PRP list. + */ +struct nvme_iod { + unsigned long private; /* For the use of the submitter of the I/O */ + int npages; /* In the PRP list. 0 means small pool in use */ + int offset; /* Of PRP list */ + int nents; /* Used in scatterlist */ + int length; /* Of data, in bytes */ + dma_addr_t first_dma; + struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */ + struct scatterlist sg[0]; +}; + +static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) +{ + return (sector >> (ns->lba_shift - 9)); +} + +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buf, unsigned bufflen); +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout); +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id); +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); +int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result); +int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result); + +struct sg_io_hdr; + +int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); +int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg); +int nvme_sg_get_version_num(int __user *ip); + +#endif /* _NVME_H */ diff --git a/drivers/block/nvme-core.c b/drivers/nvme/host/pci.c index ccc0c1f93daa..0a179ed9ddef 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/nvme/host/pci.c @@ -12,7 +12,6 @@ * more details. */ -#include <linux/nvme.h> #include <linux/bitops.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> @@ -43,6 +42,9 @@ #include <scsi/sg.h> #include <asm-generic/io-64-nonatomic-lo-hi.h> +#include <uapi/linux/nvme_ioctl.h> +#include "nvme.h" + #define NVME_MINORS (1U << MINORBITS) #define NVME_Q_DEPTH 1024 #define NVME_AQ_DEPTH 256 @@ -84,9 +86,10 @@ static wait_queue_head_t nvme_kthread_wait; static struct class *nvme_class; -static void nvme_reset_failed_dev(struct work_struct *ws); +static int __nvme_reset(struct nvme_dev *dev); static int nvme_reset(struct nvme_dev *dev); static int nvme_process_cq(struct nvme_queue *nvmeq); +static void nvme_dead_ctrl(struct nvme_dev *dev); struct async_cmd_info { struct kthread_work work; @@ -1283,18 +1286,13 @@ static void nvme_abort_req(struct request *req) struct nvme_command cmd; if (!nvmeq->qid || cmd_rq->aborted) { - unsigned long flags; - - spin_lock_irqsave(&dev_list_lock, flags); - if (work_busy(&dev->reset_work)) - goto out; - list_del_init(&dev->node); - dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n", - req->tag, nvmeq->qid); - dev->reset_workfn = nvme_reset_failed_dev; - queue_work(nvme_workq, &dev->reset_work); - out: - spin_unlock_irqrestore(&dev_list_lock, flags); + spin_lock(&dev_list_lock); + if (!__nvme_reset(dev)) { + dev_warn(dev->dev, + "I/O %d QID %d timeout, reset controller\n", + req->tag, nvmeq->qid); + } + spin_unlock(&dev_list_lock); return; } @@ -1949,6 +1947,20 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, #define nvme_compat_ioctl NULL #endif +static void nvme_free_dev(struct kref *kref); +static void nvme_free_ns(struct kref *kref) +{ + struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); + + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + + kref_put(&ns->dev->kref, nvme_free_dev); + put_disk(ns->disk); + kfree(ns); +} + static int nvme_open(struct block_device *bdev, fmode_t mode) { int ret = 0; @@ -1958,21 +1970,17 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) ns = bdev->bd_disk->private_data; if (!ns) ret = -ENXIO; - else if (!kref_get_unless_zero(&ns->dev->kref)) + else if (!kref_get_unless_zero(&ns->kref)) ret = -ENXIO; spin_unlock(&dev_list_lock); return ret; } -static void nvme_free_dev(struct kref *kref); - static void nvme_release(struct gendisk *disk, fmode_t mode) { struct nvme_ns *ns = disk->private_data; - struct nvme_dev *dev = ns->dev; - - kref_put(&dev->kref, nvme_free_dev); + kref_put(&ns->kref, nvme_free_ns); } static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo) @@ -2079,14 +2087,11 @@ static int nvme_kthread(void *data) if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) || csts & NVME_CSTS_CFS) { - if (work_busy(&dev->reset_work)) - continue; - list_del_init(&dev->node); - dev_warn(dev->dev, - "Failed status: %x, reset controller\n", - readl(&dev->bar->csts)); - dev->reset_workfn = nvme_reset_failed_dev; - queue_work(nvme_workq, &dev->reset_work); + if (!__nvme_reset(dev)) { + dev_warn(dev->dev, + "Failed status: %x, reset controller\n", + readl(&dev->bar->csts)); + } continue; } for (i = 0; i < dev->queue_count; i++) { @@ -2132,6 +2137,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) if (!disk) goto out_free_queue; + kref_init(&ns->kref); ns->ns_id = nsid; ns->disk = disk; ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ @@ -2168,6 +2174,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) if (nvme_revalidate_disk(ns->disk)) goto out_free_disk; + kref_get(&dev->kref); add_disk(ns->disk); if (ns->ms) { struct block_device *bd = bdget_disk(ns->disk, 0); @@ -2190,6 +2197,13 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) kfree(ns); } +/* + * Create I/O queues. Failing to create an I/O queue is not an issue, + * we can continue with less than the desired amount of queues, and + * even a controller without I/O queues an still be used to issue + * admin commands. This might be useful to upgrade a buggy firmware + * for example. + */ static void nvme_create_io_queues(struct nvme_dev *dev) { unsigned i; @@ -2199,8 +2213,10 @@ static void nvme_create_io_queues(struct nvme_dev *dev) break; for (i = dev->online_queues; i <= dev->queue_count - 1; i++) - if (nvme_create_queue(dev->queues[i], i)) + if (nvme_create_queue(dev->queues[i], i)) { + nvme_free_queues(dev, i); break; + } } static int set_queue_count(struct nvme_dev *dev, int count) @@ -2363,18 +2379,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return result; } -static void nvme_free_namespace(struct nvme_ns *ns) -{ - list_del(&ns->list); - - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); - - put_disk(ns->disk); - kfree(ns); -} - static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) { struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); @@ -2416,7 +2420,9 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (kill || !blk_queue_dying(ns->queue)) { blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); - } + } + list_del_init(&ns->list); + kref_put(&ns->kref, nvme_free_ns); } static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) @@ -2427,18 +2433,14 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) for (i = 1; i <= nn; i++) { ns = nvme_find_ns(dev, i); if (ns) { - if (revalidate_disk(ns->disk)) { + if (revalidate_disk(ns->disk)) nvme_ns_remove(ns); - nvme_free_namespace(ns); - } } else nvme_alloc_ns(dev, i); } list_for_each_entry_safe(ns, next, &dev->namespaces, list) { - if (ns->ns_id > nn) { + if (ns->ns_id > nn) nvme_ns_remove(ns); - nvme_free_namespace(ns); - } } list_sort(NULL, &dev->namespaces, ns_cmp); } @@ -2828,9 +2830,9 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) static void nvme_dev_remove(struct nvme_dev *dev) { - struct nvme_ns *ns; + struct nvme_ns *ns, *next; - list_for_each_entry(ns, &dev->namespaces, list) + list_for_each_entry_safe(ns, next, &dev->namespaces, list) nvme_ns_remove(ns); } @@ -2886,21 +2888,12 @@ static void nvme_release_instance(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } -static void nvme_free_namespaces(struct nvme_dev *dev) -{ - struct nvme_ns *ns, *next; - - list_for_each_entry_safe(ns, next, &dev->namespaces, list) - nvme_free_namespace(ns); -} - static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); put_device(dev->dev); put_device(dev->device); - nvme_free_namespaces(dev); nvme_release_instance(dev); if (dev->tagset.tags) blk_mq_free_tag_set(&dev->tagset); @@ -2974,14 +2967,15 @@ static const struct file_operations nvme_dev_fops = { .compat_ioctl = nvme_dev_ioctl, }; -static int nvme_dev_start(struct nvme_dev *dev) +static void nvme_probe_work(struct work_struct *work) { - int result; + struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); bool start_thread = false; + int result; result = nvme_dev_map(dev); if (result) - return result; + goto out; result = nvme_configure_admin_queue(dev); if (result) @@ -3016,7 +3010,20 @@ static int nvme_dev_start(struct nvme_dev *dev) goto free_tags; dev->event_limit = 1; - return result; + + /* + * Keep the controller around but remove all namespaces if we don't have + * any working I/O queue. + */ + if (dev->online_queues < 2) { + dev_warn(dev->dev, "IO queues not created\n"); + nvme_dev_remove(dev); + } else { + nvme_unfreeze_queues(dev); + nvme_dev_add(dev); + } + + return; free_tags: nvme_dev_remove_admin(dev); @@ -3028,7 +3035,9 @@ static int nvme_dev_start(struct nvme_dev *dev) nvme_dev_list_remove(dev); unmap: nvme_dev_unmap(dev); - return result; + out: + if (!work_busy(&dev->reset_work)) + nvme_dead_ctrl(dev); } static int nvme_remove_dead_ctrl(void *arg) @@ -3042,33 +3051,6 @@ static int nvme_remove_dead_ctrl(void *arg) return 0; } -static void nvme_remove_disks(struct work_struct *ws) -{ - struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); - - nvme_free_queues(dev, 1); - nvme_dev_remove(dev); -} - -static int nvme_dev_resume(struct nvme_dev *dev) -{ - int ret; - - ret = nvme_dev_start(dev); - if (ret) - return ret; - if (dev->online_queues < 2) { - spin_lock(&dev_list_lock); - dev->reset_workfn = nvme_remove_disks; - queue_work(nvme_workq, &dev->reset_work); - spin_unlock(&dev_list_lock); - } else { - nvme_unfreeze_queues(dev); - nvme_dev_add(dev); - } - return 0; -} - static void nvme_dead_ctrl(struct nvme_dev *dev) { dev_warn(dev->dev, "Device failed to resume\n"); @@ -3081,8 +3063,9 @@ static void nvme_dead_ctrl(struct nvme_dev *dev) } } -static void nvme_dev_reset(struct nvme_dev *dev) +static void nvme_reset_work(struct work_struct *ws) { + struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); bool in_probe = work_busy(&dev->probe_work); nvme_dev_shutdown(dev); @@ -3102,31 +3085,24 @@ static void nvme_dev_reset(struct nvme_dev *dev) schedule_work(&dev->probe_work); } -static void nvme_reset_failed_dev(struct work_struct *ws) -{ - struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); - nvme_dev_reset(dev); -} - -static void nvme_reset_workfn(struct work_struct *work) +static int __nvme_reset(struct nvme_dev *dev) { - struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); - dev->reset_workfn(work); + if (work_pending(&dev->reset_work)) + return -EBUSY; + list_del_init(&dev->node); + queue_work(nvme_workq, &dev->reset_work); + return 0; } static int nvme_reset(struct nvme_dev *dev) { - int ret = -EBUSY; + int ret; if (!dev->admin_q || blk_queue_dying(dev->admin_q)) return -ENODEV; spin_lock(&dev_list_lock); - if (!work_pending(&dev->reset_work)) { - dev->reset_workfn = nvme_reset_failed_dev; - queue_work(nvme_workq, &dev->reset_work); - ret = 0; - } + ret = __nvme_reset(dev); spin_unlock(&dev_list_lock); if (!ret) { @@ -3153,7 +3129,6 @@ static ssize_t nvme_sysfs_reset(struct device *dev, } static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); -static void nvme_async_probe(struct work_struct *work); static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; @@ -3176,8 +3151,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto free; INIT_LIST_HEAD(&dev->namespaces); - dev->reset_workfn = nvme_reset_failed_dev; - INIT_WORK(&dev->reset_work, nvme_reset_workfn); + INIT_WORK(&dev->reset_work, nvme_reset_work); dev->dev = get_device(&pdev->dev); pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); @@ -3205,7 +3179,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->scan_work, nvme_dev_scan); - INIT_WORK(&dev->probe_work, nvme_async_probe); + INIT_WORK(&dev->probe_work, nvme_probe_work); schedule_work(&dev->probe_work); return 0; @@ -3225,14 +3199,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) return result; } -static void nvme_async_probe(struct work_struct *work) -{ - struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); - - if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work)) - nvme_dead_ctrl(dev); -} - static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) { struct nvme_dev *dev = pci_get_drvdata(pdev); @@ -3240,7 +3206,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) if (prepare) nvme_dev_shutdown(dev); else - nvme_dev_resume(dev); + schedule_work(&dev->probe_work); } static void nvme_shutdown(struct pci_dev *pdev) @@ -3294,10 +3260,7 @@ static int nvme_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) { - ndev->reset_workfn = nvme_reset_failed_dev; - queue_work(nvme_workq, &ndev->reset_work); - } + schedule_work(&ndev->probe_work); return 0; } #endif diff --git a/drivers/block/nvme-scsi.c b/drivers/nvme/host/scsi.c index e5a63f06fb0f..c3d8d3887a31 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/nvme/host/scsi.c @@ -17,7 +17,6 @@ * each command is translated. */ -#include <linux/nvme.h> #include <linux/bio.h> #include <linux/bitops.h> #include <linux/blkdev.h> @@ -45,6 +44,7 @@ #include <scsi/sg.h> #include <scsi/scsi.h> +#include "nvme.h" static int sg_version_num = 30534; /* 2 digits for each component */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b5812c395351..3af5f454c04a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -15,10 +15,7 @@ #ifndef _LINUX_NVME_H #define _LINUX_NVME_H -#include <uapi/linux/nvme.h> -#include <linux/pci.h> -#include <linux/kref.h> -#include <linux/blk-mq.h> +#include <linux/types.h> struct nvme_bar { __u64 cap; /* Controller Capabilities */ @@ -76,115 +73,528 @@ enum { NVME_CSTS_SHST_MASK = 3 << 2, }; -extern unsigned char nvme_io_timeout; -#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) +struct nvme_id_power_state { + __le16 max_power; /* centiwatts */ + __u8 rsvd2; + __u8 flags; + __le32 entry_lat; /* microseconds */ + __le32 exit_lat; /* microseconds */ + __u8 read_tput; + __u8 read_lat; + __u8 write_tput; + __u8 write_lat; + __le16 idle_power; + __u8 idle_scale; + __u8 rsvd19; + __le16 active_power; + __u8 active_work_scale; + __u8 rsvd23[9]; +}; -/* - * Represents an NVM Express device. Each nvme_dev is a PCI function. - */ -struct nvme_dev { - struct list_head node; - struct nvme_queue **queues; - struct request_queue *admin_q; - struct blk_mq_tag_set tagset; - struct blk_mq_tag_set admin_tagset; - u32 __iomem *dbs; - struct device *dev; - struct dma_pool *prp_page_pool; - struct dma_pool *prp_small_pool; - int instance; - unsigned queue_count; - unsigned online_queues; - unsigned max_qid; - int q_depth; - u32 db_stride; - u32 ctrl_config; - struct msix_entry *entry; - struct nvme_bar __iomem *bar; - struct list_head namespaces; - struct kref kref; - struct device *device; - work_func_t reset_workfn; - struct work_struct reset_work; - struct work_struct probe_work; - struct work_struct scan_work; - char name[12]; - char serial[20]; - char model[40]; - char firmware_rev[8]; - bool subsystem; - u32 max_hw_sectors; - u32 stripe_size; - u32 page_size; - void __iomem *cmb; - dma_addr_t cmb_dma_addr; - u64 cmb_size; - u32 cmbsz; - u16 oncs; - u16 abort_limit; - u8 event_limit; - u8 vwc; +enum { + NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, + NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, }; -/* - * An NVM Express namespace is equivalent to a SCSI LUN - */ -struct nvme_ns { - struct list_head list; +struct nvme_id_ctrl { + __le16 vid; + __le16 ssvid; + char sn[20]; + char mn[40]; + char fr[8]; + __u8 rab; + __u8 ieee[3]; + __u8 mic; + __u8 mdts; + __le16 cntlid; + __le32 ver; + __u8 rsvd84[172]; + __le16 oacs; + __u8 acl; + __u8 aerl; + __u8 frmw; + __u8 lpa; + __u8 elpe; + __u8 npss; + __u8 avscc; + __u8 apsta; + __le16 wctemp; + __le16 cctemp; + __u8 rsvd270[242]; + __u8 sqes; + __u8 cqes; + __u8 rsvd514[2]; + __le32 nn; + __le16 oncs; + __le16 fuses; + __u8 fna; + __u8 vwc; + __le16 awun; + __le16 awupf; + __u8 nvscc; + __u8 rsvd531; + __le16 acwu; + __u8 rsvd534[2]; + __le32 sgls; + __u8 rsvd540[1508]; + struct nvme_id_power_state psd[32]; + __u8 vs[1024]; +}; - struct nvme_dev *dev; - struct request_queue *queue; - struct gendisk *disk; +enum { + NVME_CTRL_ONCS_COMPARE = 1 << 0, + NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, + NVME_CTRL_ONCS_DSM = 1 << 2, + NVME_CTRL_VWC_PRESENT = 1 << 0, +}; - unsigned ns_id; - int lba_shift; - u16 ms; - bool ext; - u8 pi_type; - u64 mode_select_num_blocks; - u32 mode_select_block_len; +struct nvme_lbaf { + __le16 ms; + __u8 ds; + __u8 rp; }; -/* - * The nvme_iod describes the data in an I/O, including the list of PRP - * entries. You can't see it in this data structure because C doesn't let - * me express that. Use nvme_alloc_iod to ensure there's enough space - * allocated to store the PRP list. - */ -struct nvme_iod { - unsigned long private; /* For the use of the submitter of the I/O */ - int npages; /* In the PRP list. 0 means small pool in use */ - int offset; /* Of PRP list */ - int nents; /* Used in scatterlist */ - int length; /* Of data, in bytes */ - dma_addr_t first_dma; - struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */ - struct scatterlist sg[0]; -}; - -static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) -{ - return (sector >> (ns->lba_shift - 9)); -} - -int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - void *buf, unsigned bufflen); -int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - void *buffer, void __user *ubuffer, unsigned bufflen, - u32 *result, unsigned timeout); -int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); -int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, - struct nvme_id_ns **id); -int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); -int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, - dma_addr_t dma_addr, u32 *result); -int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, - dma_addr_t dma_addr, u32 *result); - -struct sg_io_hdr; - -int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); -int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg); -int nvme_sg_get_version_num(int __user *ip); +struct nvme_id_ns { + __le64 nsze; + __le64 ncap; + __le64 nuse; + __u8 nsfeat; + __u8 nlbaf; + __u8 flbas; + __u8 mc; + __u8 dpc; + __u8 dps; + __u8 nmic; + __u8 rescap; + __u8 fpi; + __u8 rsvd33; + __le16 nawun; + __le16 nawupf; + __le16 nacwu; + __le16 nabsn; + __le16 nabo; + __le16 nabspf; + __u16 rsvd46; + __le64 nvmcap[2]; + __u8 rsvd64[40]; + __u8 nguid[16]; + __u8 eui64[8]; + struct nvme_lbaf lbaf[16]; + __u8 rsvd192[192]; + __u8 vs[3712]; +}; + +enum { + NVME_NS_FEAT_THIN = 1 << 0, + NVME_NS_FLBAS_LBA_MASK = 0xf, + NVME_NS_FLBAS_META_EXT = 0x10, + NVME_LBAF_RP_BEST = 0, + NVME_LBAF_RP_BETTER = 1, + NVME_LBAF_RP_GOOD = 2, + NVME_LBAF_RP_DEGRADED = 3, + NVME_NS_DPC_PI_LAST = 1 << 4, + NVME_NS_DPC_PI_FIRST = 1 << 3, + NVME_NS_DPC_PI_TYPE3 = 1 << 2, + NVME_NS_DPC_PI_TYPE2 = 1 << 1, + NVME_NS_DPC_PI_TYPE1 = 1 << 0, + NVME_NS_DPS_PI_FIRST = 1 << 3, + NVME_NS_DPS_PI_MASK = 0x7, + NVME_NS_DPS_PI_TYPE1 = 1, + NVME_NS_DPS_PI_TYPE2 = 2, + NVME_NS_DPS_PI_TYPE3 = 3, +}; + +struct nvme_smart_log { + __u8 critical_warning; + __u8 temperature[2]; + __u8 avail_spare; + __u8 spare_thresh; + __u8 percent_used; + __u8 rsvd6[26]; + __u8 data_units_read[16]; + __u8 data_units_written[16]; + __u8 host_reads[16]; + __u8 host_writes[16]; + __u8 ctrl_busy_time[16]; + __u8 power_cycles[16]; + __u8 power_on_hours[16]; + __u8 unsafe_shutdowns[16]; + __u8 media_errors[16]; + __u8 num_err_log_entries[16]; + __le32 warning_temp_time; + __le32 critical_comp_time; + __le16 temp_sensor[8]; + __u8 rsvd216[296]; +}; + +enum { + NVME_SMART_CRIT_SPARE = 1 << 0, + NVME_SMART_CRIT_TEMPERATURE = 1 << 1, + NVME_SMART_CRIT_RELIABILITY = 1 << 2, + NVME_SMART_CRIT_MEDIA = 1 << 3, + NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, +}; + +enum { + NVME_AER_NOTICE_NS_CHANGED = 0x0002, +}; + +struct nvme_lba_range_type { + __u8 type; + __u8 attributes; + __u8 rsvd2[14]; + __u64 slba; + __u64 nlb; + __u8 guid[16]; + __u8 rsvd48[16]; +}; + +enum { + NVME_LBART_TYPE_FS = 0x01, + NVME_LBART_TYPE_RAID = 0x02, + NVME_LBART_TYPE_CACHE = 0x03, + NVME_LBART_TYPE_SWAP = 0x04, + + NVME_LBART_ATTRIB_TEMP = 1 << 0, + NVME_LBART_ATTRIB_HIDE = 1 << 1, +}; + +struct nvme_reservation_status { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[13]; + struct { + __le16 cntlid; + __u8 rcsts; + __u8 resv3[5]; + __le64 hostid; + __le64 rkey; + } regctl_ds[]; +}; + +/* I/O commands */ + +enum nvme_opcode { + nvme_cmd_flush = 0x00, + nvme_cmd_write = 0x01, + nvme_cmd_read = 0x02, + nvme_cmd_write_uncor = 0x04, + nvme_cmd_compare = 0x05, + nvme_cmd_write_zeroes = 0x08, + nvme_cmd_dsm = 0x09, + nvme_cmd_resv_register = 0x0d, + nvme_cmd_resv_report = 0x0e, + nvme_cmd_resv_acquire = 0x11, + nvme_cmd_resv_release = 0x15, +}; + +struct nvme_common_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le32 cdw2[2]; + __le64 metadata; + __le64 prp1; + __le64 prp2; + __le32 cdw10[6]; +}; + +struct nvme_rw_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2; + __le64 metadata; + __le64 prp1; + __le64 prp2; + __le64 slba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le32 reftag; + __le16 apptag; + __le16 appmask; +}; + +enum { + NVME_RW_LR = 1 << 15, + NVME_RW_FUA = 1 << 14, + NVME_RW_DSM_FREQ_UNSPEC = 0, + NVME_RW_DSM_FREQ_TYPICAL = 1, + NVME_RW_DSM_FREQ_RARE = 2, + NVME_RW_DSM_FREQ_READS = 3, + NVME_RW_DSM_FREQ_WRITES = 4, + NVME_RW_DSM_FREQ_RW = 5, + NVME_RW_DSM_FREQ_ONCE = 6, + NVME_RW_DSM_FREQ_PREFETCH = 7, + NVME_RW_DSM_FREQ_TEMP = 8, + NVME_RW_DSM_LATENCY_NONE = 0 << 4, + NVME_RW_DSM_LATENCY_IDLE = 1 << 4, + NVME_RW_DSM_LATENCY_NORM = 2 << 4, + NVME_RW_DSM_LATENCY_LOW = 3 << 4, + NVME_RW_DSM_SEQ_REQ = 1 << 6, + NVME_RW_DSM_COMPRESSED = 1 << 7, + NVME_RW_PRINFO_PRCHK_REF = 1 << 10, + NVME_RW_PRINFO_PRCHK_APP = 1 << 11, + NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, + NVME_RW_PRINFO_PRACT = 1 << 13, +}; + +struct nvme_dsm_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 nr; + __le32 attributes; + __u32 rsvd12[4]; +}; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +struct nvme_dsm_range { + __le32 cattr; + __le32 nlb; + __le64 slba; +}; + +/* Admin commands */ + +enum nvme_admin_opcode { + nvme_admin_delete_sq = 0x00, + nvme_admin_create_sq = 0x01, + nvme_admin_get_log_page = 0x02, + nvme_admin_delete_cq = 0x04, + nvme_admin_create_cq = 0x05, + nvme_admin_identify = 0x06, + nvme_admin_abort_cmd = 0x08, + nvme_admin_set_features = 0x09, + nvme_admin_get_features = 0x0a, + nvme_admin_async_event = 0x0c, + nvme_admin_activate_fw = 0x10, + nvme_admin_download_fw = 0x11, + nvme_admin_format_nvm = 0x80, + nvme_admin_security_send = 0x81, + nvme_admin_security_recv = 0x82, +}; + +enum { + NVME_QUEUE_PHYS_CONTIG = (1 << 0), + NVME_CQ_IRQ_ENABLED = (1 << 1), + NVME_SQ_PRIO_URGENT = (0 << 1), + NVME_SQ_PRIO_HIGH = (1 << 1), + NVME_SQ_PRIO_MEDIUM = (2 << 1), + NVME_SQ_PRIO_LOW = (3 << 1), + NVME_FEAT_ARBITRATION = 0x01, + NVME_FEAT_POWER_MGMT = 0x02, + NVME_FEAT_LBA_RANGE = 0x03, + NVME_FEAT_TEMP_THRESH = 0x04, + NVME_FEAT_ERR_RECOVERY = 0x05, + NVME_FEAT_VOLATILE_WC = 0x06, + NVME_FEAT_NUM_QUEUES = 0x07, + NVME_FEAT_IRQ_COALESCE = 0x08, + NVME_FEAT_IRQ_CONFIG = 0x09, + NVME_FEAT_WRITE_ATOMIC = 0x0a, + NVME_FEAT_ASYNC_EVENT = 0x0b, + NVME_FEAT_AUTO_PST = 0x0c, + NVME_FEAT_SW_PROGRESS = 0x80, + NVME_FEAT_HOST_ID = 0x81, + NVME_FEAT_RESV_MASK = 0x82, + NVME_FEAT_RESV_PERSIST = 0x83, + NVME_LOG_ERROR = 0x01, + NVME_LOG_SMART = 0x02, + NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_RESERVATION = 0x80, + NVME_FWACT_REPL = (0 << 3), + NVME_FWACT_REPL_ACTV = (1 << 3), + NVME_FWACT_ACTV = (2 << 3), +}; + +struct nvme_identify { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 cns; + __u32 rsvd11[5]; +}; + +struct nvme_features { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 fid; + __le32 dword11; + __u32 rsvd12[4]; +}; + +struct nvme_create_cq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 cqid; + __le16 qsize; + __le16 cq_flags; + __le16 irq_vector; + __u32 rsvd12[4]; +}; + +struct nvme_create_sq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 sqid; + __le16 qsize; + __le16 sq_flags; + __le16 cqid; + __u32 rsvd12[4]; +}; + +struct nvme_delete_queue { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 qid; + __u16 rsvd10; + __u32 rsvd11[5]; +}; + +struct nvme_abort_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 sqid; + __u16 cid; + __u32 rsvd11[5]; +}; + +struct nvme_download_firmware { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __le64 prp2; + __le32 numd; + __le32 offset; + __u32 rsvd12[4]; +}; + +struct nvme_format_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[4]; + __le32 cdw10; + __u32 rsvd11[5]; +}; + +struct nvme_command { + union { + struct nvme_common_command common; + struct nvme_rw_command rw; + struct nvme_identify identify; + struct nvme_features features; + struct nvme_create_cq create_cq; + struct nvme_create_sq create_sq; + struct nvme_delete_queue delete_queue; + struct nvme_download_firmware dlfw; + struct nvme_format_cmd format; + struct nvme_dsm_cmd dsm; + struct nvme_abort_cmd abort; + }; +}; + +enum { + NVME_SC_SUCCESS = 0x0, + NVME_SC_INVALID_OPCODE = 0x1, + NVME_SC_INVALID_FIELD = 0x2, + NVME_SC_CMDID_CONFLICT = 0x3, + NVME_SC_DATA_XFER_ERROR = 0x4, + NVME_SC_POWER_LOSS = 0x5, + NVME_SC_INTERNAL = 0x6, + NVME_SC_ABORT_REQ = 0x7, + NVME_SC_ABORT_QUEUE = 0x8, + NVME_SC_FUSED_FAIL = 0x9, + NVME_SC_FUSED_MISSING = 0xa, + NVME_SC_INVALID_NS = 0xb, + NVME_SC_CMD_SEQ_ERROR = 0xc, + NVME_SC_SGL_INVALID_LAST = 0xd, + NVME_SC_SGL_INVALID_COUNT = 0xe, + NVME_SC_SGL_INVALID_DATA = 0xf, + NVME_SC_SGL_INVALID_METADATA = 0x10, + NVME_SC_SGL_INVALID_TYPE = 0x11, + NVME_SC_LBA_RANGE = 0x80, + NVME_SC_CAP_EXCEEDED = 0x81, + NVME_SC_NS_NOT_READY = 0x82, + NVME_SC_RESERVATION_CONFLICT = 0x83, + NVME_SC_CQ_INVALID = 0x100, + NVME_SC_QID_INVALID = 0x101, + NVME_SC_QUEUE_SIZE = 0x102, + NVME_SC_ABORT_LIMIT = 0x103, + NVME_SC_ABORT_MISSING = 0x104, + NVME_SC_ASYNC_LIMIT = 0x105, + NVME_SC_FIRMWARE_SLOT = 0x106, + NVME_SC_FIRMWARE_IMAGE = 0x107, + NVME_SC_INVALID_VECTOR = 0x108, + NVME_SC_INVALID_LOG_PAGE = 0x109, + NVME_SC_INVALID_FORMAT = 0x10a, + NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b, + NVME_SC_INVALID_QUEUE = 0x10c, + NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, + NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, + NVME_SC_FEATURE_NOT_PER_NS = 0x10f, + NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, + NVME_SC_BAD_ATTRIBUTES = 0x180, + NVME_SC_INVALID_PI = 0x181, + NVME_SC_READ_ONLY = 0x182, + NVME_SC_WRITE_FAULT = 0x280, + NVME_SC_READ_ERROR = 0x281, + NVME_SC_GUARD_CHECK = 0x282, + NVME_SC_APPTAG_CHECK = 0x283, + NVME_SC_REFTAG_CHECK = 0x284, + NVME_SC_COMPARE_FAILED = 0x285, + NVME_SC_ACCESS_DENIED = 0x286, + NVME_SC_DNR = 0x4000, +}; + +struct nvme_completion { + __le32 result; /* Used by admin commands to return data */ + __u32 rsvd; + __le16 sq_head; /* how much of this queue may be reclaimed */ + __le16 sq_id; /* submission queue that generated this entry */ + __u16 command_id; /* of the command which completed */ + __le16 status; /* did the command fail, and if so, why? */ +}; + +#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) #endif /* _LINUX_NVME_H */ diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index e0cecd2eabdc..c8125ec1f4f2 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -21,6 +21,7 @@ enum { LO_FLAGS_READ_ONLY = 1, LO_FLAGS_AUTOCLEAR = 4, LO_FLAGS_PARTSCAN = 8, + LO_FLAGS_DIRECT_IO = 16, }; #include <asm/posix_types.h> /* for __kernel_old_dev_t */ @@ -86,6 +87,7 @@ struct loop_info64 { #define LOOP_GET_STATUS64 0x4C05 #define LOOP_CHANGE_FD 0x4C06 #define LOOP_SET_CAPACITY 0x4C07 +#define LOOP_SET_DIRECT_IO 0x4C08 /* /dev/loop-control interface */ #define LOOP_CTL_ADD 0x4C80 diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h deleted file mode 100644 index 8864194a4151..000000000000 --- a/include/uapi/linux/nvme.h +++ /dev/null @@ -1,589 +0,0 @@ -/* - * Definitions for the NVM Express interface - * Copyright (c) 2011-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef _UAPI_LINUX_NVME_H -#define _UAPI_LINUX_NVME_H - -#include <linux/types.h> - -struct nvme_id_power_state { - __le16 max_power; /* centiwatts */ - __u8 rsvd2; - __u8 flags; - __le32 entry_lat; /* microseconds */ - __le32 exit_lat; /* microseconds */ - __u8 read_tput; - __u8 read_lat; - __u8 write_tput; - __u8 write_lat; - __le16 idle_power; - __u8 idle_scale; - __u8 rsvd19; - __le16 active_power; - __u8 active_work_scale; - __u8 rsvd23[9]; -}; - -enum { - NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, - NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, -}; - -struct nvme_id_ctrl { - __le16 vid; - __le16 ssvid; - char sn[20]; - char mn[40]; - char fr[8]; - __u8 rab; - __u8 ieee[3]; - __u8 mic; - __u8 mdts; - __u16 cntlid; - __u32 ver; - __u8 rsvd84[172]; - __le16 oacs; - __u8 acl; - __u8 aerl; - __u8 frmw; - __u8 lpa; - __u8 elpe; - __u8 npss; - __u8 avscc; - __u8 apsta; - __le16 wctemp; - __le16 cctemp; - __u8 rsvd270[242]; - __u8 sqes; - __u8 cqes; - __u8 rsvd514[2]; - __le32 nn; - __le16 oncs; - __le16 fuses; - __u8 fna; - __u8 vwc; - __le16 awun; - __le16 awupf; - __u8 nvscc; - __u8 rsvd531; - __le16 acwu; - __u8 rsvd534[2]; - __le32 sgls; - __u8 rsvd540[1508]; - struct nvme_id_power_state psd[32]; - __u8 vs[1024]; -}; - -enum { - NVME_CTRL_ONCS_COMPARE = 1 << 0, - NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, - NVME_CTRL_ONCS_DSM = 1 << 2, - NVME_CTRL_VWC_PRESENT = 1 << 0, -}; - -struct nvme_lbaf { - __le16 ms; - __u8 ds; - __u8 rp; -}; - -struct nvme_id_ns { - __le64 nsze; - __le64 ncap; - __le64 nuse; - __u8 nsfeat; - __u8 nlbaf; - __u8 flbas; - __u8 mc; - __u8 dpc; - __u8 dps; - __u8 nmic; - __u8 rescap; - __u8 fpi; - __u8 rsvd33; - __le16 nawun; - __le16 nawupf; - __le16 nacwu; - __le16 nabsn; - __le16 nabo; - __le16 nabspf; - __u16 rsvd46; - __le64 nvmcap[2]; - __u8 rsvd64[40]; - __u8 nguid[16]; - __u8 eui64[8]; - struct nvme_lbaf lbaf[16]; - __u8 rsvd192[192]; - __u8 vs[3712]; -}; - -enum { - NVME_NS_FEAT_THIN = 1 << 0, - NVME_NS_FLBAS_LBA_MASK = 0xf, - NVME_NS_FLBAS_META_EXT = 0x10, - NVME_LBAF_RP_BEST = 0, - NVME_LBAF_RP_BETTER = 1, - NVME_LBAF_RP_GOOD = 2, - NVME_LBAF_RP_DEGRADED = 3, - NVME_NS_DPC_PI_LAST = 1 << 4, - NVME_NS_DPC_PI_FIRST = 1 << 3, - NVME_NS_DPC_PI_TYPE3 = 1 << 2, - NVME_NS_DPC_PI_TYPE2 = 1 << 1, - NVME_NS_DPC_PI_TYPE1 = 1 << 0, - NVME_NS_DPS_PI_FIRST = 1 << 3, - NVME_NS_DPS_PI_MASK = 0x7, - NVME_NS_DPS_PI_TYPE1 = 1, - NVME_NS_DPS_PI_TYPE2 = 2, - NVME_NS_DPS_PI_TYPE3 = 3, -}; - -struct nvme_smart_log { - __u8 critical_warning; - __u8 temperature[2]; - __u8 avail_spare; - __u8 spare_thresh; - __u8 percent_used; - __u8 rsvd6[26]; - __u8 data_units_read[16]; - __u8 data_units_written[16]; - __u8 host_reads[16]; - __u8 host_writes[16]; - __u8 ctrl_busy_time[16]; - __u8 power_cycles[16]; - __u8 power_on_hours[16]; - __u8 unsafe_shutdowns[16]; - __u8 media_errors[16]; - __u8 num_err_log_entries[16]; - __le32 warning_temp_time; - __le32 critical_comp_time; - __le16 temp_sensor[8]; - __u8 rsvd216[296]; -}; - -enum { - NVME_SMART_CRIT_SPARE = 1 << 0, - NVME_SMART_CRIT_TEMPERATURE = 1 << 1, - NVME_SMART_CRIT_RELIABILITY = 1 << 2, - NVME_SMART_CRIT_MEDIA = 1 << 3, - NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, -}; - -enum { - NVME_AER_NOTICE_NS_CHANGED = 0x0002, -}; - -struct nvme_lba_range_type { - __u8 type; - __u8 attributes; - __u8 rsvd2[14]; - __u64 slba; - __u64 nlb; - __u8 guid[16]; - __u8 rsvd48[16]; -}; - -enum { - NVME_LBART_TYPE_FS = 0x01, - NVME_LBART_TYPE_RAID = 0x02, - NVME_LBART_TYPE_CACHE = 0x03, - NVME_LBART_TYPE_SWAP = 0x04, - - NVME_LBART_ATTRIB_TEMP = 1 << 0, - NVME_LBART_ATTRIB_HIDE = 1 << 1, -}; - -struct nvme_reservation_status { - __le32 gen; - __u8 rtype; - __u8 regctl[2]; - __u8 resv5[2]; - __u8 ptpls; - __u8 resv10[13]; - struct { - __le16 cntlid; - __u8 rcsts; - __u8 resv3[5]; - __le64 hostid; - __le64 rkey; - } regctl_ds[]; -}; - -/* I/O commands */ - -enum nvme_opcode { - nvme_cmd_flush = 0x00, - nvme_cmd_write = 0x01, - nvme_cmd_read = 0x02, - nvme_cmd_write_uncor = 0x04, - nvme_cmd_compare = 0x05, - nvme_cmd_write_zeroes = 0x08, - nvme_cmd_dsm = 0x09, - nvme_cmd_resv_register = 0x0d, - nvme_cmd_resv_report = 0x0e, - nvme_cmd_resv_acquire = 0x11, - nvme_cmd_resv_release = 0x15, -}; - -struct nvme_common_command { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __le32 cdw2[2]; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le32 cdw10[6]; -}; - -struct nvme_rw_command { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le64 slba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le32 reftag; - __le16 apptag; - __le16 appmask; -}; - -enum { - NVME_RW_LR = 1 << 15, - NVME_RW_FUA = 1 << 14, - NVME_RW_DSM_FREQ_UNSPEC = 0, - NVME_RW_DSM_FREQ_TYPICAL = 1, - NVME_RW_DSM_FREQ_RARE = 2, - NVME_RW_DSM_FREQ_READS = 3, - NVME_RW_DSM_FREQ_WRITES = 4, - NVME_RW_DSM_FREQ_RW = 5, - NVME_RW_DSM_FREQ_ONCE = 6, - NVME_RW_DSM_FREQ_PREFETCH = 7, - NVME_RW_DSM_FREQ_TEMP = 8, - NVME_RW_DSM_LATENCY_NONE = 0 << 4, - NVME_RW_DSM_LATENCY_IDLE = 1 << 4, - NVME_RW_DSM_LATENCY_NORM = 2 << 4, - NVME_RW_DSM_LATENCY_LOW = 3 << 4, - NVME_RW_DSM_SEQ_REQ = 1 << 6, - NVME_RW_DSM_COMPRESSED = 1 << 7, - NVME_RW_PRINFO_PRCHK_REF = 1 << 10, - NVME_RW_PRINFO_PRCHK_APP = 1 << 11, - NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, - NVME_RW_PRINFO_PRACT = 1 << 13, -}; - -struct nvme_dsm_cmd { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; - __le32 nr; - __le32 attributes; - __u32 rsvd12[4]; -}; - -enum { - NVME_DSMGMT_IDR = 1 << 0, - NVME_DSMGMT_IDW = 1 << 1, - NVME_DSMGMT_AD = 1 << 2, -}; - -struct nvme_dsm_range { - __le32 cattr; - __le32 nlb; - __le64 slba; -}; - -/* Admin commands */ - -enum nvme_admin_opcode { - nvme_admin_delete_sq = 0x00, - nvme_admin_create_sq = 0x01, - nvme_admin_get_log_page = 0x02, - nvme_admin_delete_cq = 0x04, - nvme_admin_create_cq = 0x05, - nvme_admin_identify = 0x06, - nvme_admin_abort_cmd = 0x08, - nvme_admin_set_features = 0x09, - nvme_admin_get_features = 0x0a, - nvme_admin_async_event = 0x0c, - nvme_admin_activate_fw = 0x10, - nvme_admin_download_fw = 0x11, - nvme_admin_format_nvm = 0x80, - nvme_admin_security_send = 0x81, - nvme_admin_security_recv = 0x82, -}; - -enum { - NVME_QUEUE_PHYS_CONTIG = (1 << 0), - NVME_CQ_IRQ_ENABLED = (1 << 1), - NVME_SQ_PRIO_URGENT = (0 << 1), - NVME_SQ_PRIO_HIGH = (1 << 1), - NVME_SQ_PRIO_MEDIUM = (2 << 1), - NVME_SQ_PRIO_LOW = (3 << 1), - NVME_FEAT_ARBITRATION = 0x01, - NVME_FEAT_POWER_MGMT = 0x02, - NVME_FEAT_LBA_RANGE = 0x03, - NVME_FEAT_TEMP_THRESH = 0x04, - NVME_FEAT_ERR_RECOVERY = 0x05, - NVME_FEAT_VOLATILE_WC = 0x06, - NVME_FEAT_NUM_QUEUES = 0x07, - NVME_FEAT_IRQ_COALESCE = 0x08, - NVME_FEAT_IRQ_CONFIG = 0x09, - NVME_FEAT_WRITE_ATOMIC = 0x0a, - NVME_FEAT_ASYNC_EVENT = 0x0b, - NVME_FEAT_AUTO_PST = 0x0c, - NVME_FEAT_SW_PROGRESS = 0x80, - NVME_FEAT_HOST_ID = 0x81, - NVME_FEAT_RESV_MASK = 0x82, - NVME_FEAT_RESV_PERSIST = 0x83, - NVME_LOG_ERROR = 0x01, - NVME_LOG_SMART = 0x02, - NVME_LOG_FW_SLOT = 0x03, - NVME_LOG_RESERVATION = 0x80, - NVME_FWACT_REPL = (0 << 3), - NVME_FWACT_REPL_ACTV = (1 << 3), - NVME_FWACT_ACTV = (2 << 3), -}; - -struct nvme_identify { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; - __le32 cns; - __u32 rsvd11[5]; -}; - -struct nvme_features { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; - __le32 fid; - __le32 dword11; - __u32 rsvd12[4]; -}; - -struct nvme_create_cq { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[5]; - __le64 prp1; - __u64 rsvd8; - __le16 cqid; - __le16 qsize; - __le16 cq_flags; - __le16 irq_vector; - __u32 rsvd12[4]; -}; - -struct nvme_create_sq { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[5]; - __le64 prp1; - __u64 rsvd8; - __le16 sqid; - __le16 qsize; - __le16 sq_flags; - __le16 cqid; - __u32 rsvd12[4]; -}; - -struct nvme_delete_queue { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[9]; - __le16 qid; - __u16 rsvd10; - __u32 rsvd11[5]; -}; - -struct nvme_abort_cmd { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[9]; - __le16 sqid; - __u16 cid; - __u32 rsvd11[5]; -}; - -struct nvme_download_firmware { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[5]; - __le64 prp1; - __le64 prp2; - __le32 numd; - __le32 offset; - __u32 rsvd12[4]; -}; - -struct nvme_format_cmd { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2[4]; - __le32 cdw10; - __u32 rsvd11[5]; -}; - -struct nvme_command { - union { - struct nvme_common_command common; - struct nvme_rw_command rw; - struct nvme_identify identify; - struct nvme_features features; - struct nvme_create_cq create_cq; - struct nvme_create_sq create_sq; - struct nvme_delete_queue delete_queue; - struct nvme_download_firmware dlfw; - struct nvme_format_cmd format; - struct nvme_dsm_cmd dsm; - struct nvme_abort_cmd abort; - }; -}; - -enum { - NVME_SC_SUCCESS = 0x0, - NVME_SC_INVALID_OPCODE = 0x1, - NVME_SC_INVALID_FIELD = 0x2, - NVME_SC_CMDID_CONFLICT = 0x3, - NVME_SC_DATA_XFER_ERROR = 0x4, - NVME_SC_POWER_LOSS = 0x5, - NVME_SC_INTERNAL = 0x6, - NVME_SC_ABORT_REQ = 0x7, - NVME_SC_ABORT_QUEUE = 0x8, - NVME_SC_FUSED_FAIL = 0x9, - NVME_SC_FUSED_MISSING = 0xa, - NVME_SC_INVALID_NS = 0xb, - NVME_SC_CMD_SEQ_ERROR = 0xc, - NVME_SC_SGL_INVALID_LAST = 0xd, - NVME_SC_SGL_INVALID_COUNT = 0xe, - NVME_SC_SGL_INVALID_DATA = 0xf, - NVME_SC_SGL_INVALID_METADATA = 0x10, - NVME_SC_SGL_INVALID_TYPE = 0x11, - NVME_SC_LBA_RANGE = 0x80, - NVME_SC_CAP_EXCEEDED = 0x81, - NVME_SC_NS_NOT_READY = 0x82, - NVME_SC_RESERVATION_CONFLICT = 0x83, - NVME_SC_CQ_INVALID = 0x100, - NVME_SC_QID_INVALID = 0x101, - NVME_SC_QUEUE_SIZE = 0x102, - NVME_SC_ABORT_LIMIT = 0x103, - NVME_SC_ABORT_MISSING = 0x104, - NVME_SC_ASYNC_LIMIT = 0x105, - NVME_SC_FIRMWARE_SLOT = 0x106, - NVME_SC_FIRMWARE_IMAGE = 0x107, - NVME_SC_INVALID_VECTOR = 0x108, - NVME_SC_INVALID_LOG_PAGE = 0x109, - NVME_SC_INVALID_FORMAT = 0x10a, - NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b, - NVME_SC_INVALID_QUEUE = 0x10c, - NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, - NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, - NVME_SC_FEATURE_NOT_PER_NS = 0x10f, - NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, - NVME_SC_BAD_ATTRIBUTES = 0x180, - NVME_SC_INVALID_PI = 0x181, - NVME_SC_READ_ONLY = 0x182, - NVME_SC_WRITE_FAULT = 0x280, - NVME_SC_READ_ERROR = 0x281, - NVME_SC_GUARD_CHECK = 0x282, - NVME_SC_APPTAG_CHECK = 0x283, - NVME_SC_REFTAG_CHECK = 0x284, - NVME_SC_COMPARE_FAILED = 0x285, - NVME_SC_ACCESS_DENIED = 0x286, - NVME_SC_DNR = 0x4000, -}; - -struct nvme_completion { - __le32 result; /* Used by admin commands to return data */ - __u32 rsvd; - __le16 sq_head; /* how much of this queue may be reclaimed */ - __le16 sq_id; /* submission queue that generated this entry */ - __u16 command_id; /* of the command which completed */ - __le16 status; /* did the command fail, and if so, why? */ -}; - -struct nvme_user_io { - __u8 opcode; - __u8 flags; - __u16 control; - __u16 nblocks; - __u16 rsvd; - __u64 metadata; - __u64 addr; - __u64 slba; - __u32 dsmgmt; - __u32 reftag; - __u16 apptag; - __u16 appmask; -}; - -struct nvme_passthru_cmd { - __u8 opcode; - __u8 flags; - __u16 rsvd1; - __u32 nsid; - __u32 cdw2; - __u32 cdw3; - __u64 metadata; - __u64 addr; - __u32 metadata_len; - __u32 data_len; - __u32 cdw10; - __u32 cdw11; - __u32 cdw12; - __u32 cdw13; - __u32 cdw14; - __u32 cdw15; - __u32 timeout_ms; - __u32 result; -}; - -#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) - -#define nvme_admin_cmd nvme_passthru_cmd - -#define NVME_IOCTL_ID _IO('N', 0x40) -#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) -#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) -#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) -#define NVME_IOCTL_RESET _IO('N', 0x44) -#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) - -#endif /* _UAPI_LINUX_NVME_H */ diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h new file mode 100644 index 000000000000..c4b2a3f90829 --- /dev/null +++ b/include/uapi/linux/nvme_ioctl.h @@ -0,0 +1,65 @@ +/* + * Definitions for the NVM Express ioctl interface + * Copyright (c) 2011-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI_LINUX_NVME_IOCTL_H +#define _UAPI_LINUX_NVME_IOCTL_H + +#include <linux/types.h> + +struct nvme_user_io { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +}; + +struct nvme_passthru_cmd { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 result; +}; + +#define nvme_admin_cmd nvme_passthru_cmd + +#define NVME_IOCTL_ID _IO('N', 0x40) +#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) +#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) +#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) +#define NVME_IOCTL_RESET _IO('N', 0x44) +#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) + +#endif /* _UAPI_LINUX_NVME_IOCTL_H */ |