From e5d2f910cfeca852f6e2dc19dfa8dab264ce0cde Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 22 Aug 2019 05:05:37 +0000 Subject: PCI: hv: Add a paravirtual backchannel in software Windows SR-IOV provides a backchannel mechanism in software for communication between a VF driver and a PF driver. These "configuration blocks" are similar in concept to PCI configuration space, but instead of doing reads and writes in 32-bit chunks through a very slow path, packets of up to 128 bytes can be sent or received asynchronously. Nearly every SR-IOV device contains just such a communications channel in hardware, so using this one in software is usually optional. Using the software channel, however, allows driver implementers to leverage software tools that fuzz the communications channel looking for vulnerabilities. The usage model for these packets puts the responsibility for reading or writing on the VF driver. The VF driver sends a read or a write packet, indicating which "block" is being referred to by number. If the PF driver wishes to initiate communication, it can "invalidate" one or more of the first 64 blocks. This invalidation is delivered via a callback supplied by the VF driver by this driver. No protocol is implied, except that supplied by the PF and VF drivers. Signed-off-by: Jake Oshins Signed-off-by: Dexuan Cui Cc: Haiyang Zhang Cc: K. Y. Srinivasan Cc: Stephen Hemminger Signed-off-by: Saeed Mahameed Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/pci/controller/pci-hyperv.c | 302 ++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 40b625458afa..57adeca7bda9 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -365,6 +365,39 @@ struct pci_delete_interrupt { struct tran_int_desc int_desc; } __packed; +/* + * Note: the VM must pass a valid block id, wslot and bytes_requested. + */ +struct pci_read_block { + struct pci_message message_type; + u32 block_id; + union win_slot_encoding wslot; + u32 bytes_requested; +} __packed; + +struct pci_read_block_response { + struct vmpacket_descriptor hdr; + u32 status; + u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX]; +} __packed; + +/* + * Note: the VM must pass a valid block id, wslot and byte_count. + */ +struct pci_write_block { + struct pci_message message_type; + u32 block_id; + union win_slot_encoding wslot; + u32 byte_count; + u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX]; +} __packed; + +struct pci_dev_inval_block { + struct pci_incoming_message incoming; + union win_slot_encoding wslot; + u64 block_mask; +} __packed; + struct pci_dev_incoming { struct pci_incoming_message incoming; union win_slot_encoding wslot; @@ -499,6 +532,9 @@ struct hv_pci_dev { struct hv_pcibus_device *hbus; struct work_struct wrk; + void (*block_invalidate)(void *context, u64 block_mask); + void *invalidate_context; + /* * What would be observed if one wrote 0xFFFFFFFF to a BAR and then * read it back, for each of the BAR offsets within config space. @@ -817,6 +853,256 @@ static struct pci_ops hv_pcifront_ops = { .write = hv_pcifront_write_config, }; +/* + * Paravirtual backchannel + * + * Hyper-V SR-IOV provides a backchannel mechanism in software for + * communication between a VF driver and a PF driver. These + * "configuration blocks" are similar in concept to PCI configuration space, + * but instead of doing reads and writes in 32-bit chunks through a very slow + * path, packets of up to 128 bytes can be sent or received asynchronously. + * + * Nearly every SR-IOV device contains just such a communications channel in + * hardware, so using this one in software is usually optional. Using the + * software channel, however, allows driver implementers to leverage software + * tools that fuzz the communications channel looking for vulnerabilities. + * + * The usage model for these packets puts the responsibility for reading or + * writing on the VF driver. The VF driver sends a read or a write packet, + * indicating which "block" is being referred to by number. + * + * If the PF driver wishes to initiate communication, it can "invalidate" one or + * more of the first 64 blocks. This invalidation is delivered via a callback + * supplied by the VF driver by this driver. + * + * No protocol is implied, except that supplied by the PF and VF drivers. + */ + +struct hv_read_config_compl { + struct hv_pci_compl comp_pkt; + void *buf; + unsigned int len; + unsigned int bytes_returned; +}; + +/** + * hv_pci_read_config_compl() - Invoked when a response packet + * for a read config block operation arrives. + * @context: Identifies the read config operation + * @resp: The response packet itself + * @resp_packet_size: Size in bytes of the response packet + */ +static void hv_pci_read_config_compl(void *context, struct pci_response *resp, + int resp_packet_size) +{ + struct hv_read_config_compl *comp = context; + struct pci_read_block_response *read_resp = + (struct pci_read_block_response *)resp; + unsigned int data_len, hdr_len; + + hdr_len = offsetof(struct pci_read_block_response, bytes); + if (resp_packet_size < hdr_len) { + comp->comp_pkt.completion_status = -1; + goto out; + } + + data_len = resp_packet_size - hdr_len; + if (data_len > 0 && read_resp->status == 0) { + comp->bytes_returned = min(comp->len, data_len); + memcpy(comp->buf, read_resp->bytes, comp->bytes_returned); + } else { + comp->bytes_returned = 0; + } + + comp->comp_pkt.completion_status = read_resp->status; +out: + complete(&comp->comp_pkt.host_event); +} + +/** + * hv_read_config_block() - Sends a read config block request to + * the back-end driver running in the Hyper-V parent partition. + * @pdev: The PCI driver's representation for this device. + * @buf: Buffer into which the config block will be copied. + * @len: Size in bytes of buf. + * @block_id: Identifies the config block which has been requested. + * @bytes_returned: Size which came back from the back-end driver. + * + * Return: 0 on success, -errno on failure + */ +int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len, + unsigned int block_id, unsigned int *bytes_returned) +{ + struct hv_pcibus_device *hbus = + container_of(pdev->bus->sysdata, struct hv_pcibus_device, + sysdata); + struct { + struct pci_packet pkt; + char buf[sizeof(struct pci_read_block)]; + } pkt; + struct hv_read_config_compl comp_pkt; + struct pci_read_block *read_blk; + int ret; + + if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + init_completion(&comp_pkt.comp_pkt.host_event); + comp_pkt.buf = buf; + comp_pkt.len = len; + + memset(&pkt, 0, sizeof(pkt)); + pkt.pkt.completion_func = hv_pci_read_config_compl; + pkt.pkt.compl_ctxt = &comp_pkt; + read_blk = (struct pci_read_block *)&pkt.pkt.message; + read_blk->message_type.type = PCI_READ_BLOCK; + read_blk->wslot.slot = devfn_to_wslot(pdev->devfn); + read_blk->block_id = block_id; + read_blk->bytes_requested = len; + + ret = vmbus_sendpacket(hbus->hdev->channel, read_blk, + sizeof(*read_blk), (unsigned long)&pkt.pkt, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret) + return ret; + + ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event); + if (ret) + return ret; + + if (comp_pkt.comp_pkt.completion_status != 0 || + comp_pkt.bytes_returned == 0) { + dev_err(&hbus->hdev->device, + "Read Config Block failed: 0x%x, bytes_returned=%d\n", + comp_pkt.comp_pkt.completion_status, + comp_pkt.bytes_returned); + return -EIO; + } + + *bytes_returned = comp_pkt.bytes_returned; + return 0; +} +EXPORT_SYMBOL(hv_read_config_block); + +/** + * hv_pci_write_config_compl() - Invoked when a response packet for a write + * config block operation arrives. + * @context: Identifies the write config operation + * @resp: The response packet itself + * @resp_packet_size: Size in bytes of the response packet + */ +static void hv_pci_write_config_compl(void *context, struct pci_response *resp, + int resp_packet_size) +{ + struct hv_pci_compl *comp_pkt = context; + + comp_pkt->completion_status = resp->status; + complete(&comp_pkt->host_event); +} + +/** + * hv_write_config_block() - Sends a write config block request to the + * back-end driver running in the Hyper-V parent partition. + * @pdev: The PCI driver's representation for this device. + * @buf: Buffer from which the config block will be copied. + * @len: Size in bytes of buf. + * @block_id: Identifies the config block which is being written. + * + * Return: 0 on success, -errno on failure + */ +int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len, + unsigned int block_id) +{ + struct hv_pcibus_device *hbus = + container_of(pdev->bus->sysdata, struct hv_pcibus_device, + sysdata); + struct { + struct pci_packet pkt; + char buf[sizeof(struct pci_write_block)]; + u32 reserved; + } pkt; + struct hv_pci_compl comp_pkt; + struct pci_write_block *write_blk; + u32 pkt_size; + int ret; + + if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + init_completion(&comp_pkt.host_event); + + memset(&pkt, 0, sizeof(pkt)); + pkt.pkt.completion_func = hv_pci_write_config_compl; + pkt.pkt.compl_ctxt = &comp_pkt; + write_blk = (struct pci_write_block *)&pkt.pkt.message; + write_blk->message_type.type = PCI_WRITE_BLOCK; + write_blk->wslot.slot = devfn_to_wslot(pdev->devfn); + write_blk->block_id = block_id; + write_blk->byte_count = len; + memcpy(write_blk->bytes, buf, len); + pkt_size = offsetof(struct pci_write_block, bytes) + len; + /* + * This quirk is required on some hosts shipped around 2018, because + * these hosts don't check the pkt_size correctly (new hosts have been + * fixed since early 2019). The quirk is also safe on very old hosts + * and new hosts, because, on them, what really matters is the length + * specified in write_blk->byte_count. + */ + pkt_size += sizeof(pkt.reserved); + + ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size, + (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret) + return ret; + + ret = wait_for_response(hbus->hdev, &comp_pkt.host_event); + if (ret) + return ret; + + if (comp_pkt.completion_status != 0) { + dev_err(&hbus->hdev->device, + "Write Config Block failed: 0x%x\n", + comp_pkt.completion_status); + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL(hv_write_config_block); + +/** + * hv_register_block_invalidate() - Invoked when a config block invalidation + * arrives from the back-end driver. + * @pdev: The PCI driver's representation for this device. + * @context: Identifies the device. + * @block_invalidate: Identifies all of the blocks being invalidated. + * + * Return: 0 on success, -errno on failure + */ +int hv_register_block_invalidate(struct pci_dev *pdev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + struct hv_pcibus_device *hbus = + container_of(pdev->bus->sysdata, struct hv_pcibus_device, + sysdata); + struct hv_pci_dev *hpdev; + + hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn)); + if (!hpdev) + return -ENODEV; + + hpdev->block_invalidate = block_invalidate; + hpdev->invalidate_context = context; + + put_pcichild(hpdev); + return 0; + +} +EXPORT_SYMBOL(hv_register_block_invalidate); + /* Interrupt management hooks */ static void hv_int_desc_free(struct hv_pci_dev *hpdev, struct tran_int_desc *int_desc) @@ -1968,6 +2254,7 @@ static void hv_pci_onchannelcallback(void *context) struct pci_response *response; struct pci_incoming_message *new_message; struct pci_bus_relations *bus_rel; + struct pci_dev_inval_block *inval; struct pci_dev_incoming *dev_message; struct hv_pci_dev *hpdev; @@ -2045,6 +2332,21 @@ static void hv_pci_onchannelcallback(void *context) } break; + case PCI_INVALIDATE_BLOCK: + + inval = (struct pci_dev_inval_block *)buffer; + hpdev = get_pcichild_wslot(hbus, + inval->wslot.slot); + if (hpdev) { + if (hpdev->block_invalidate) { + hpdev->block_invalidate( + hpdev->invalidate_context, + inval->block_mask); + } + put_pcichild(hpdev); + } + break; + default: dev_warn(&hbus->hdev->device, "Unimplemented protocol message %x\n", -- cgit v1.2.3 From 348dd93e40c112afda3cd07daa6f470e474d29dc Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 22 Aug 2019 05:05:41 +0000 Subject: PCI: hv: Add a Hyper-V PCI interface driver for software backchannel interface This interface driver is a helper driver allows other drivers to have a common interface with the Hyper-V PCI frontend driver. Signed-off-by: Haiyang Zhang Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- MAINTAINERS | 1 + drivers/pci/Kconfig | 1 + drivers/pci/controller/Kconfig | 7 ++++ drivers/pci/controller/Makefile | 1 + drivers/pci/controller/pci-hyperv-intf.c | 67 ++++++++++++++++++++++++++++++++ drivers/pci/controller/pci-hyperv.c | 12 ++++-- include/linux/hyperv.h | 30 ++++++++++---- 7 files changed, 108 insertions(+), 11 deletions(-) create mode 100644 drivers/pci/controller/pci-hyperv-intf.c (limited to 'drivers/pci') diff --git a/MAINTAINERS b/MAINTAINERS index a406947b369e..986085351d79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7469,6 +7469,7 @@ F: drivers/hid/hid-hyperv.c F: drivers/hv/ F: drivers/input/serio/hyperv-keyboard.c F: drivers/pci/controller/pci-hyperv.c +F: drivers/pci/controller/pci-hyperv-intf.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 2ab92409210a..c313de96a357 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -182,6 +182,7 @@ config PCI_LABEL config PCI_HYPERV tristate "Hyper-V PCI Frontend" depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64 + select PCI_HYPERV_INTERFACE help The PCI device frontend driver allows the kernel to import arbitrary PCI devices from a PCI backend to support PCI driver domains. diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index fe9f9f13ce11..70e078238899 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -281,5 +281,12 @@ config VMD To compile this driver as a module, choose M here: the module will be called vmd. +config PCI_HYPERV_INTERFACE + tristate "Hyper-V PCI Interface" + depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64 + help + The Hyper-V PCI Interface is a helper driver allows other drivers to + have a common interface with the Hyper-V PCI frontend driver. + source "drivers/pci/controller/dwc/Kconfig" endmenu diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index d56a507495c5..a2a22c9d91af 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o +obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c new file mode 100644 index 000000000000..cc96be450360 --- /dev/null +++ b/drivers/pci/controller/pci-hyperv-intf.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) Microsoft Corporation. + * + * Author: + * Haiyang Zhang + * + * This small module is a helper driver allows other drivers to + * have a common interface with the Hyper-V PCI frontend driver. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +struct hyperv_pci_block_ops hvpci_block_ops; +EXPORT_SYMBOL_GPL(hvpci_block_ops); + +int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len, + unsigned int block_id, unsigned int *bytes_returned) +{ + if (!hvpci_block_ops.read_block) + return -EOPNOTSUPP; + + return hvpci_block_ops.read_block(dev, buf, buf_len, block_id, + bytes_returned); +} +EXPORT_SYMBOL_GPL(hyperv_read_cfg_blk); + +int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len, + unsigned int block_id) +{ + if (!hvpci_block_ops.write_block) + return -EOPNOTSUPP; + + return hvpci_block_ops.write_block(dev, buf, len, block_id); +} +EXPORT_SYMBOL_GPL(hyperv_write_cfg_blk); + +int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + if (!hvpci_block_ops.reg_blk_invalidate) + return -EOPNOTSUPP; + + return hvpci_block_ops.reg_blk_invalidate(dev, context, + block_invalidate); +} +EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate); + +static void __exit exit_hv_pci_intf(void) +{ +} + +static int __init init_hv_pci_intf(void) +{ + return 0; +} + +module_init(init_hv_pci_intf); +module_exit(exit_hv_pci_intf); + +MODULE_DESCRIPTION("Hyper-V PCI Interface"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 57adeca7bda9..9c93ac2215b7 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len, *bytes_returned = comp_pkt.bytes_returned; return 0; } -EXPORT_SYMBOL(hv_read_config_block); /** * hv_pci_write_config_compl() - Invoked when a response packet for a write @@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len, return 0; } -EXPORT_SYMBOL(hv_write_config_block); /** * hv_register_block_invalidate() - Invoked when a config block invalidation @@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context, return 0; } -EXPORT_SYMBOL(hv_register_block_invalidate); /* Interrupt management hooks */ static void hv_int_desc_free(struct hv_pci_dev *hpdev, @@ -3045,10 +3042,19 @@ static struct hv_driver hv_pci_drv = { static void __exit exit_hv_pci_drv(void) { vmbus_driver_unregister(&hv_pci_drv); + + hvpci_block_ops.read_block = NULL; + hvpci_block_ops.write_block = NULL; + hvpci_block_ops.reg_blk_invalidate = NULL; } static int __init init_hv_pci_drv(void) { + /* Initialize PCI block r/w interface */ + hvpci_block_ops.read_block = hv_read_config_block; + hvpci_block_ops.write_block = hv_write_config_block; + hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate; + return vmbus_driver_register(&hv_pci_drv); } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 9d37f8cf1245..2afe6fdc1dda 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1579,18 +1579,32 @@ hv_pkt_iter_next(struct vmbus_channel *channel, pkt = hv_pkt_iter_next(channel, pkt)) /* - * Functions for passing data between SR-IOV PF and VF drivers. The VF driver + * Interface for passing data between SR-IOV PF and VF drivers. The VF driver * sends requests to read and write blocks. Each block must be 128 bytes or * smaller. Optionally, the VF driver can register a callback function which * will be invoked when the host says that one or more of the first 64 block * IDs is "invalid" which means that the VF driver should reread them. */ #define HV_CONFIG_BLOCK_SIZE_MAX 128 -int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len, - unsigned int block_id, unsigned int *bytes_returned); -int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len, - unsigned int block_id); -int hv_register_block_invalidate(struct pci_dev *dev, void *context, - void (*block_invalidate)(void *context, - u64 block_mask)); + +int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len, + unsigned int block_id, unsigned int *bytes_returned); +int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len, + unsigned int block_id); +int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); + +struct hyperv_pci_block_ops { + int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len, + unsigned int block_id, unsigned int *bytes_returned); + int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len, + unsigned int block_id); + int (*reg_blk_invalidate)(struct pci_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); +}; + +extern struct hyperv_pci_block_ops hvpci_block_ops; + #endif /* _HYPERV_H */ -- cgit v1.2.3