diff options
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | drivers/block/nbd.c | 5 | ||||
-rw-r--r-- | drivers/nvme/host/Kconfig | 10 | ||||
-rw-r--r-- | drivers/nvme/host/Makefile | 1 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 18 | ||||
-rw-r--r-- | drivers/nvme/host/hwmon.c | 259 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 13 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 3 | ||||
-rw-r--r-- | drivers/scsi/sd.c | 15 | ||||
-rw-r--r-- | drivers/scsi/sd.h | 8 | ||||
-rw-r--r-- | drivers/scsi/sd_zbc.c | 22 | ||||
-rw-r--r-- | include/linux/nvme.h | 6 |
12 files changed, 341 insertions, 20 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 5626b33909ae..bf8daba4afbb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11638,6 +11638,7 @@ F: drivers/nvme/target/fcloop.c NVM EXPRESS TARGET DRIVER M: Christoph Hellwig <hch@lst.de> M: Sagi Grimberg <sagi@grimberg.me> +M: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> L: linux-nvme@lists.infradead.org T: git://git.infradead.org/nvme.git W: http://git.infradead.org/nvme.git diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 19e75999bb15..57532465fb83 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1032,14 +1032,15 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, sockfd_put(sock); return -ENOMEM; } + + config->socks = socks; + nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); if (!nsock) { sockfd_put(sock); return -ENOMEM; } - config->socks = socks; - nsock->fallback_index = -1; nsock->dead = false; mutex_init(&nsock->tx_lock); diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 2b36f052bfb9..c6439638a419 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -23,6 +23,16 @@ config NVME_MULTIPATH /dev/nvmeXnY device will show up for each NVMe namespaces, even if it is accessible through multiple controllers. +config NVME_HWMON + bool "NVMe hardware monitoring" + depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON) + help + This provides support for NVMe hardware monitoring. If enabled, + a hardware monitoring device will be created for each NVMe drive + in the system. + + If unsure, say N. + config NVME_FABRICS tristate diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 8a4b671c5f0c..fc7b26be692d 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o +nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o nvme-y += pci.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c5e434c9af62..8e8527408db3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -613,8 +613,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_dsm_range *range; struct bio *bio; - range = kmalloc_array(segments, sizeof(*range), - GFP_ATOMIC | __GFP_NOWARN); + /* + * Some devices do not consider the DSM 'Number of Ranges' field when + * determining how much data to DMA. Always allocate memory for maximum + * number of segments to prevent device reading beyond end of buffer. + */ + static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES; + + range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN); if (!range) { /* * If we fail allocation our range, fallback to the controller @@ -654,7 +660,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, req->special_vec.bv_page = virt_to_page(range); req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = sizeof(*range) * segments; + req->special_vec.bv_len = alloc_size; req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_STS_OK; @@ -2798,6 +2804,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oncs = le16_to_cpu(id->oncs); ctrl->mtfa = le16_to_cpu(id->mtfa); ctrl->oaes = le32_to_cpu(id->oaes); + ctrl->wctemp = le16_to_cpu(id->wctemp); + ctrl->cctemp = le16_to_cpu(id->cctemp); + atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; if (id->mdts) @@ -2897,6 +2906,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; + if (!ctrl->identified) + nvme_hwmon_init(ctrl); + ctrl->identified = true; return 0; diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c new file mode 100644 index 000000000000..a5af21f5d370 --- /dev/null +++ b/drivers/nvme/host/hwmon.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVM Express hardware monitoring support + * Copyright (c) 2019, Guenter Roeck + */ + +#include <linux/hwmon.h> +#include <asm/unaligned.h> + +#include "nvme.h" + +/* These macros should be moved to linux/temperature.h */ +#define MILLICELSIUS_TO_KELVIN(t) DIV_ROUND_CLOSEST((t) + 273150, 1000) +#define KELVIN_TO_MILLICELSIUS(t) ((t) * 1000L - 273150) + +struct nvme_hwmon_data { + struct nvme_ctrl *ctrl; + struct nvme_smart_log log; + struct mutex read_lock; +}; + +static int nvme_get_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, + long *temp) +{ + unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT; + u32 status; + int ret; + + if (under) + threshold |= NVME_TEMP_THRESH_TYPE_UNDER; + + ret = nvme_get_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0, + &status); + if (ret > 0) + return -EIO; + if (ret < 0) + return ret; + *temp = KELVIN_TO_MILLICELSIUS(status & NVME_TEMP_THRESH_MASK); + + return 0; +} + +static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, + long temp) +{ + unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT; + int ret; + + temp = MILLICELSIUS_TO_KELVIN(temp); + threshold |= clamp_val(temp, 0, NVME_TEMP_THRESH_MASK); + + if (under) + threshold |= NVME_TEMP_THRESH_TYPE_UNDER; + + ret = nvme_set_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0, + NULL); + if (ret > 0) + return -EIO; + + return ret; +} + +static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data) +{ + int ret; + + ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, + &data->log, sizeof(data->log), 0); + + return ret <= 0 ? ret : -EIO; +} + +static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct nvme_hwmon_data *data = dev_get_drvdata(dev); + struct nvme_smart_log *log = &data->log; + int temp; + int err; + + /* + * First handle attributes which don't require us to read + * the smart log. + */ + switch (attr) { + case hwmon_temp_max: + return nvme_get_temp_thresh(data->ctrl, channel, false, val); + case hwmon_temp_min: + return nvme_get_temp_thresh(data->ctrl, channel, true, val); + case hwmon_temp_crit: + *val = KELVIN_TO_MILLICELSIUS(data->ctrl->cctemp); + return 0; + default: + break; + } + + mutex_lock(&data->read_lock); + err = nvme_hwmon_get_smart_log(data); + if (err) + goto unlock; + + switch (attr) { + case hwmon_temp_input: + if (!channel) + temp = get_unaligned_le16(log->temperature); + else + temp = le16_to_cpu(log->temp_sensor[channel - 1]); + *val = KELVIN_TO_MILLICELSIUS(temp); + break; + case hwmon_temp_alarm: + *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE); + break; + default: + err = -EOPNOTSUPP; + break; + } +unlock: + mutex_unlock(&data->read_lock); + return err; +} + +static int nvme_hwmon_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct nvme_hwmon_data *data = dev_get_drvdata(dev); + + switch (attr) { + case hwmon_temp_max: + return nvme_set_temp_thresh(data->ctrl, channel, false, val); + case hwmon_temp_min: + return nvme_set_temp_thresh(data->ctrl, channel, true, val); + default: + break; + } + + return -EOPNOTSUPP; +} + +static const char * const nvme_hwmon_sensor_names[] = { + "Composite", + "Sensor 1", + "Sensor 2", + "Sensor 3", + "Sensor 4", + "Sensor 5", + "Sensor 6", + "Sensor 7", + "Sensor 8", +}; + +static int nvme_hwmon_read_string(struct device *dev, + enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) +{ + *str = nvme_hwmon_sensor_names[channel]; + return 0; +} + +static umode_t nvme_hwmon_is_visible(const void *_data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + const struct nvme_hwmon_data *data = _data; + + switch (attr) { + case hwmon_temp_crit: + if (!channel && data->ctrl->cctemp) + return 0444; + break; + case hwmon_temp_max: + case hwmon_temp_min: + if ((!channel && data->ctrl->wctemp) || + (channel && data->log.temp_sensor[channel - 1])) { + if (data->ctrl->quirks & + NVME_QUIRK_NO_TEMP_THRESH_CHANGE) + return 0444; + return 0644; + } + break; + case hwmon_temp_alarm: + if (!channel) + return 0444; + break; + case hwmon_temp_input: + case hwmon_temp_label: + if (!channel || data->log.temp_sensor[channel - 1]) + return 0444; + break; + default: + break; + } + return 0; +} + +static const struct hwmon_channel_info *nvme_hwmon_info[] = { + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_CRIT | HWMON_T_LABEL | HWMON_T_ALARM, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL), + NULL +}; + +static const struct hwmon_ops nvme_hwmon_ops = { + .is_visible = nvme_hwmon_is_visible, + .read = nvme_hwmon_read, + .read_string = nvme_hwmon_read_string, + .write = nvme_hwmon_write, +}; + +static const struct hwmon_chip_info nvme_hwmon_chip_info = { + .ops = &nvme_hwmon_ops, + .info = nvme_hwmon_info, +}; + +void nvme_hwmon_init(struct nvme_ctrl *ctrl) +{ + struct device *dev = ctrl->dev; + struct nvme_hwmon_data *data; + struct device *hwmon; + int err; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return; + + data->ctrl = ctrl; + mutex_init(&data->read_lock); + + err = nvme_hwmon_get_smart_log(data); + if (err) { + dev_warn(dev, "Failed to read smart log (error %d)\n", err); + devm_kfree(dev, data); + return; + } + + hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data, + &nvme_hwmon_chip_info, + NULL); + if (IS_ERR(hwmon)) { + dev_warn(dev, "Failed to instantiate hwmon device\n"); + devm_kfree(dev, data); + } +} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index fdda273cd2f6..3b9cbe0668fa 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -115,6 +115,11 @@ enum nvme_quirks { * Prevent tag overlap between queues */ NVME_QUIRK_SHARED_TAGS = (1 << 13), + + /* + * Don't change the value of the temperature threshold feature + */ + NVME_QUIRK_NO_TEMP_THRESH_CHANGE = (1 << 14), }; /* @@ -231,6 +236,8 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + u16 wctemp; + u16 cctemp; u32 oaes; u32 aen_result; u32 ctratt; @@ -668,4 +675,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) return dev_to_disk(dev)->private_data; } +#ifdef CONFIG_NVME_HWMON +void nvme_hwmon_init(struct nvme_ctrl *ctrl); +#else +static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { } +#endif + #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3aab92bd2263..dcaad5831cee 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3080,7 +3080,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | - NVME_QUIRK_MEDIUM_PRIO_SQ }, + NVME_QUIRK_MEDIUM_PRIO_SQ | + NVME_QUIRK_NO_TEMP_THRESH_CHANGE }, { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ebb40160539f..470ee6dc3f7e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1291,9 +1291,17 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd) case REQ_OP_WRITE: return sd_setup_read_write_cmnd(cmd); case REQ_OP_ZONE_RESET: - return sd_zbc_setup_reset_cmnd(cmd, false); + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, + false); case REQ_OP_ZONE_RESET_ALL: - return sd_zbc_setup_reset_cmnd(cmd, true); + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, + true); + case REQ_OP_ZONE_OPEN: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_OPEN_ZONE, false); + case REQ_OP_ZONE_CLOSE: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_CLOSE_ZONE, false); + case REQ_OP_ZONE_FINISH: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_FINISH_ZONE, false); default: WARN_ON_ONCE(1); return BLK_STS_NOTSUPP; @@ -1961,6 +1969,9 @@ static int sd_done(struct scsi_cmnd *SCpnt) case REQ_OP_WRITE_SAME: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: if (!result) { good_bytes = blk_rq_bytes(req); scsi_set_resid(SCpnt, 0); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 1eab779f812b..bf2102a749bc 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -209,7 +209,8 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp) extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); extern void sd_zbc_print_zones(struct scsi_disk *sdkp); -extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all); +blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, + unsigned char op, bool all); extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr); extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, @@ -225,8 +226,9 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {} -static inline blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, - bool all) +static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, + unsigned char op, + bool all) { return BLK_STS_TARGET; } diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 1efc69e194f8..39f10ec0dfcf 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -207,13 +207,17 @@ static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) } /** - * sd_zbc_setup_reset_cmnd - Prepare a RESET WRITE POINTER scsi command. + * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations + * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. * @cmd: the command to setup - * @all: Reset all zones control. + * @op: Operation to be performed + * @all: All zones control * - * Called from sd_init_command() for a REQ_OP_ZONE_RESET request. + * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL, + * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests. */ -blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all) +blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, + unsigned char op, bool all) { struct request *rq = cmd->request; struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); @@ -234,7 +238,7 @@ blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all) cmd->cmd_len = 16; memset(cmd->cmnd, 0, cmd->cmd_len); cmd->cmnd[0] = ZBC_OUT; - cmd->cmnd[1] = ZO_RESET_WRITE_POINTER; + cmd->cmnd[1] = op; if (all) cmd->cmnd[14] = 0x1; else @@ -263,14 +267,14 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, int result = cmd->result; struct request *rq = cmd->request; - if (req_op(rq) == REQ_OP_ZONE_RESET && + if (op_is_zone_mgmt(req_op(rq)) && result && sshdr->sense_key == ILLEGAL_REQUEST && sshdr->asc == 0x24) { /* - * INVALID FIELD IN CDB error: reset of a conventional - * zone was attempted. Nothing to worry about, so be - * quiet about the error. + * INVALID FIELD IN CDB error: a zone management command was + * attempted on a conventional zone. Nothing to worry about, + * so be quiet about the error. */ rq->rq_flags |= RQF_QUIET; } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3eca4f7d8510..3d5189f46cb1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -804,6 +804,12 @@ struct nvme_write_zeroes_cmd { /* Features */ +enum { + NVME_TEMP_THRESH_MASK = 0xffff, + NVME_TEMP_THRESH_SELECT_SHIFT = 16, + NVME_TEMP_THRESH_TYPE_UNDER = 0x100000, +}; + struct nvme_feat_auto_pst { __le64 entries[32]; }; |