diff options
Diffstat (limited to 'drivers/dma/idxd')
-rw-r--r-- | drivers/dma/idxd/device.c | 222 | ||||
-rw-r--r-- | drivers/dma/idxd/dma.c | 40 | ||||
-rw-r--r-- | drivers/dma/idxd/idxd.h | 67 | ||||
-rw-r--r-- | drivers/dma/idxd/init.c | 196 | ||||
-rw-r--r-- | drivers/dma/idxd/irq.c | 239 | ||||
-rw-r--r-- | drivers/dma/idxd/registers.h | 15 | ||||
-rw-r--r-- | drivers/dma/idxd/submit.c | 69 | ||||
-rw-r--r-- | drivers/dma/idxd/sysfs.c | 215 |
8 files changed, 703 insertions, 360 deletions
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fab412349f7f..573ad8b86804 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -19,30 +19,6 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd); static void idxd_wq_disable_cleanup(struct idxd_wq *wq); /* Interrupt control bits */ -void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id) -{ - struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); - - pci_msi_mask_irq(data); -} - -void idxd_mask_msix_vectors(struct idxd_device *idxd) -{ - struct pci_dev *pdev = idxd->pdev; - int msixcnt = pci_msix_vec_count(pdev); - int i; - - for (i = 0; i < msixcnt; i++) - idxd_mask_msix_vector(idxd, i); -} - -void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id) -{ - struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector); - - pci_msi_unmask_irq(data); -} - void idxd_unmask_error_interrupts(struct idxd_device *idxd) { union genctrl_reg genctrl; @@ -382,14 +358,24 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) lockdep_assert_held(&wq->wq_lock); memset(wq->wqcfg, 0, idxd->wqcfg_size); wq->type = IDXD_WQT_NONE; - wq->size = 0; - wq->group = NULL; wq->threshold = 0; wq->priority = 0; wq->ats_dis = 0; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; +} + +static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) +{ + lockdep_assert_held(&wq->wq_lock); + + idxd_wq_disable_cleanup(wq); + wq->size = 0; + wq->group = NULL; } static void idxd_wq_ref_release(struct percpu_ref *ref) @@ -404,19 +390,31 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq) int rc; memset(&wq->wq_active, 0, sizeof(wq->wq_active)); - rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); if (rc < 0) return rc; reinit_completion(&wq->wq_dead); + reinit_completion(&wq->wq_resurrect); return 0; } -void idxd_wq_quiesce(struct idxd_wq *wq) +void __idxd_wq_quiesce(struct idxd_wq *wq) { + lockdep_assert_held(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); percpu_ref_kill(&wq->wq_active); + complete_all(&wq->wq_resurrect); wait_for_completion(&wq->wq_dead); } +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + mutex_lock(&wq->wq_lock); + __idxd_wq_quiesce(wq); + mutex_unlock(&wq->wq_lock); +} + /* Device control bits */ static inline bool idxd_is_enabled(struct idxd_device *idxd) { @@ -572,7 +570,6 @@ void idxd_device_reset(struct idxd_device *idxd) idxd_device_clear_state(idxd); idxd->state = IDXD_DEV_DISABLED; idxd_unmask_error_interrupts(idxd); - idxd_msix_perm_setup(idxd); spin_unlock(&idxd->dev_lock); } @@ -681,9 +678,9 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) memset(&group->grpcfg, 0, sizeof(group->grpcfg)); group->num_engines = 0; group->num_wqs = 0; - group->use_token_limit = false; - group->tokens_allowed = 0; - group->tokens_reserved = 0; + group->use_rdbuf_limit = false; + group->rdbufs_allowed = 0; + group->rdbufs_reserved = 0; group->tc_a = -1; group->tc_b = -1; } @@ -699,6 +696,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) if (wq->state == IDXD_WQ_ENABLED) { idxd_wq_disable_cleanup(wq); + idxd_wq_device_reset_cleanup(wq); wq->state = IDXD_WQ_DISABLED; } } @@ -711,36 +709,6 @@ void idxd_device_clear_state(struct idxd_device *idxd) idxd_device_wqs_clear_state(idxd); } -void idxd_msix_perm_setup(struct idxd_device *idxd) -{ - union msix_perm mperm; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(idxd->pdev); - if (msixcnt < 0) - return; - - mperm.bits = 0; - mperm.pasid = idxd->pasid; - mperm.pasid_en = device_pasid_enabled(idxd); - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); -} - -void idxd_msix_perm_clear(struct idxd_device *idxd) -{ - union msix_perm mperm; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(idxd->pdev); - if (msixcnt < 0) - return; - - mperm.bits = 0; - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); -} - static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; @@ -780,10 +748,10 @@ static int idxd_groups_config_write(struct idxd_device *idxd) int i; struct device *dev = &idxd->pdev->dev; - /* Setup bandwidth token limit */ - if (idxd->hw.gen_cap.config_en && idxd->token_limit) { + /* Setup bandwidth rdbuf limit */ + if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) { reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); - reg.token_limit = idxd->token_limit; + reg.rdbuf_limit = idxd->rdbuf_limit; iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); } @@ -827,15 +795,12 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); } + if (wq->size == 0 && wq->type != IDXD_WQT_NONE) + wq->size = WQ_DEFAULT_QUEUE_DEPTH; + /* byte 0-3 */ wq->wqcfg->wq_size = wq->size; - if (wq->size == 0) { - idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE; - dev_warn(dev, "Incorrect work queue size: 0\n"); - return -EINVAL; - } - /* bytes 4-7 */ wq->wqcfg->wq_thresh = wq->threshold; @@ -924,13 +889,12 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->tc_b = group->grpcfg.flags.tc_b = 1; else group->grpcfg.flags.tc_b = group->tc_b; - group->grpcfg.flags.use_token_limit = group->use_token_limit; - group->grpcfg.flags.tokens_reserved = group->tokens_reserved; - if (group->tokens_allowed) - group->grpcfg.flags.tokens_allowed = - group->tokens_allowed; + group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit; + group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved; + if (group->rdbufs_allowed) + group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed; else - group->grpcfg.flags.tokens_allowed = idxd->max_tokens; + group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; } } @@ -981,8 +945,6 @@ static int idxd_wqs_setup(struct idxd_device *idxd) if (!wq->group) continue; - if (!wq->size) - continue; if (wq_shared(wq) && !device_swq_supported(idxd)) { idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; @@ -1123,7 +1085,7 @@ int idxd_device_load_config(struct idxd_device *idxd) int i, rc; reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); - idxd->token_limit = reg.token_limit; + idxd->rdbuf_limit = reg.rdbuf_limit; for (i = 0; i < idxd->max_groups; i++) { struct idxd_group *group = idxd->groups[i]; @@ -1142,6 +1104,106 @@ int idxd_device_load_config(struct idxd_device *idxd) return 0; } +static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) +{ + struct idxd_desc *desc, *itr; + struct llist_node *head; + LIST_HEAD(flist); + enum idxd_complete_type ctype; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(desc, itr, head, llnode) + list_add_tail(&desc->list, &ie->work_list); + } + + list_for_each_entry_safe(desc, itr, &ie->work_list, list) + list_move_tail(&desc->list, &flist); + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(desc, itr, &flist, list) { + list_del(&desc->list); + ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; + idxd_dma_complete_txd(desc, ctype, true); + } +} + +static void idxd_device_set_perm_entry(struct idxd_device *idxd, + struct idxd_irq_entry *ie) +{ + union msix_perm mperm; + + if (ie->pasid == INVALID_IOASID) + return; + + mperm.bits = 0; + mperm.pasid = ie->pasid; + mperm.pasid_en = 1; + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8); +} + +static void idxd_device_clear_perm_entry(struct idxd_device *idxd, + struct idxd_irq_entry *ie) +{ + iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8); +} + +void idxd_wq_free_irq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = &wq->ie; + + synchronize_irq(ie->vector); + free_irq(ie->vector, ie); + idxd_flush_pending_descs(ie); + if (idxd->request_int_handles) + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); + idxd_device_clear_perm_entry(idxd, ie); + ie->vector = -1; + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = INVALID_IOASID; +} + +int idxd_wq_request_irq(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct idxd_irq_entry *ie; + int rc; + + ie = &wq->ie; + ie->vector = pci_irq_vector(pdev, ie->id); + ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID; + idxd_device_set_perm_entry(idxd, ie); + + rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); + if (rc < 0) { + dev_err(dev, "Failed to request irq %d.\n", ie->vector); + goto err_irq; + } + + if (idxd->request_int_handles) { + rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle, + IDXD_IRQ_MSIX); + if (rc < 0) + goto err_int_handle; + } else { + ie->int_handle = ie->id; + } + + return 0; + +err_int_handle: + ie->int_handle = INVALID_INT_HANDLE; + free_irq(ie->vector, ie); +err_irq: + idxd_device_clear_perm_entry(idxd, ie); + ie->pasid = INVALID_IOASID; + return rc; +} + int __drv_enable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index c39e9483206a..bfff59617d04 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -21,20 +21,27 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c) } void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type) + enum idxd_complete_type comp_type, + bool free_desc) { + struct idxd_device *idxd = desc->wq->idxd; struct dma_async_tx_descriptor *tx; struct dmaengine_result res; int complete = 1; - if (desc->completion->status == DSA_COMP_SUCCESS) + if (desc->completion->status == DSA_COMP_SUCCESS) { res.result = DMA_TRANS_NOERROR; - else if (desc->completion->status) + } else if (desc->completion->status) { + if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT && + desc->completion->status == DSA_COMP_INT_HANDLE_INVAL && + idxd_queue_int_handle_resubmit(desc)) + return; res.result = DMA_TRANS_WRITE_FAILED; - else if (comp_type == IDXD_COMPLETE_ABORT) + } else if (comp_type == IDXD_COMPLETE_ABORT) { res.result = DMA_TRANS_ABORTED; - else + } else { complete = 0; + } tx = &desc->txd; if (complete && tx->cookie) { @@ -44,6 +51,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, tx->callback = NULL; tx->callback_result = NULL; } + + if (free_desc) + idxd_free_desc(desc->wq, desc); } static void op_flag_setup(unsigned long flags, u32 *desc_flags) @@ -153,8 +163,10 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); rc = idxd_submit_desc(wq, desc); - if (rc < 0) + if (rc < 0) { + idxd_free_desc(wq, desc); return rc; + } return cookie; } @@ -277,6 +289,14 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) mutex_lock(&wq->wq_lock); wq->type = IDXD_WQT_KERNEL; + + rc = idxd_wq_request_irq(wq); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR; + dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc); + goto err_irq; + } + rc = __drv_enable_wq(wq); if (rc < 0) { dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc); @@ -310,13 +330,15 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) return 0; err_dma: - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); percpu_ref_exit(&wq->wq_active); err_ref: idxd_wq_free_resources(wq); err_res_alloc: __drv_disable_wq(wq); err: + idxd_wq_free_irq(wq); +err_irq: wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); return rc; @@ -327,11 +349,13 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); mutex_lock(&wq->wq_lock); - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); idxd_unregister_dma_channel(wq); idxd_wq_free_resources(wq); __drv_disable_wq(wq); percpu_ref_exit(&wq->wq_active); + idxd_wq_free_irq(wq); + wq->type = IDXD_WQT_NONE; mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 0cf8d3145870..da72eb15f610 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -10,6 +10,7 @@ #include <linux/cdev.h> #include <linux/idr.h> #include <linux/pci.h> +#include <linux/ioasid.h> #include <linux/perf_event.h> #include <uapi/linux/idxd.h> #include "registers.h" @@ -51,6 +52,9 @@ enum idxd_type { #define IDXD_NAME_SIZE 128 #define IDXD_PMU_EVENT_MAX 64 +#define IDXD_ENQCMDS_RETRIES 32 +#define IDXD_ENQCMDS_MAX_RETRIES 64 + struct idxd_device_driver { const char *name; enum idxd_dev_type *type; @@ -64,8 +68,8 @@ extern struct idxd_device_driver idxd_drv; extern struct idxd_device_driver idxd_dmaengine_drv; extern struct idxd_device_driver idxd_user_drv; +#define INVALID_INT_HANDLE -1 struct idxd_irq_entry { - struct idxd_device *idxd; int id; int vector; struct llist_head pending_llist; @@ -75,6 +79,8 @@ struct idxd_irq_entry { * and irq thread processing error descriptor. */ spinlock_t list_lock; + int int_handle; + ioasid_t pasid; }; struct idxd_group { @@ -84,9 +90,9 @@ struct idxd_group { int id; int num_engines; int num_wqs; - bool use_token_limit; - u8 tokens_allowed; - u8 tokens_reserved; + bool use_rdbuf_limit; + u8 rdbufs_allowed; + u8 rdbufs_reserved; int tc_a; int tc_b; }; @@ -145,6 +151,10 @@ struct idxd_cdev { #define WQ_NAME_SIZE 1024 #define WQ_TYPE_SIZE 10 +#define WQ_DEFAULT_QUEUE_DEPTH 16 +#define WQ_DEFAULT_MAX_XFER SZ_2M +#define WQ_DEFAULT_MAX_BATCH 32 + enum idxd_op_type { IDXD_OP_BLOCK = 0, IDXD_OP_NONBLOCK = 1, @@ -164,13 +174,16 @@ struct idxd_dma_chan { struct idxd_wq { void __iomem *portal; u32 portal_offset; + unsigned int enqcmds_retries; struct percpu_ref wq_active; struct completion wq_dead; + struct completion wq_resurrect; struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; struct idxd_device *idxd; int id; + struct idxd_irq_entry ie; enum idxd_wq_type type; struct idxd_group *group; int client_count; @@ -251,6 +264,7 @@ struct idxd_device { int id; int major; u32 cmd_status; + struct idxd_irq_entry ie; /* misc irq, msix 0 */ struct pci_dev *pdev; void __iomem *reg_base; @@ -266,6 +280,8 @@ struct idxd_device { unsigned int pasid; int num_groups; + int irq_cnt; + bool request_int_handles; u32 msix_perm_offset; u32 wqcfg_offset; @@ -276,24 +292,20 @@ struct idxd_device { u32 max_batch_size; int max_groups; int max_engines; - int max_tokens; + int max_rdbufs; int max_wqs; int max_wq_size; - int token_limit; - int nr_tokens; /* non-reserved tokens */ + int rdbuf_limit; + int nr_rdbufs; /* non-reserved read buffers */ unsigned int wqcfg_size; union sw_err_reg sw_err; wait_queue_head_t cmd_waitq; - int num_wq_irqs; - struct idxd_irq_entry *irq_entries; struct idxd_dma_dev *idxd_dma; struct workqueue_struct *wq; struct work_struct work; - int *int_handles; - struct idxd_pmu *idxd_pmu; }; @@ -380,6 +392,21 @@ static inline void idxd_dev_set_type(struct idxd_dev *idev, int type) idev->type = type; } +static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx) +{ + return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie; +} + +static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie) +{ + return container_of(ie, struct idxd_wq, ie); +} + +static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie) +{ + return container_of(ie, struct idxd_device, ie); +} + extern struct bus_type dsa_bus_type; extern bool support_enqcmd; @@ -518,17 +545,13 @@ void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); void idxd_wqs_quiesce(struct idxd_device *idxd); +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); /* device interrupt control */ -void idxd_msix_perm_setup(struct idxd_device *idxd); -void idxd_msix_perm_clear(struct idxd_device *idxd); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); void idxd_mask_error_interrupts(struct idxd_device *idxd); void idxd_unmask_error_interrupts(struct idxd_device *idxd); -void idxd_mask_msix_vectors(struct idxd_device *idxd); -void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); -void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ int idxd_register_idxd_drv(void); @@ -564,13 +587,17 @@ int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); +void __idxd_wq_quiesce(struct idxd_wq *wq); void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); +void idxd_wq_free_irq(struct idxd_wq *wq); +int idxd_wq_request_irq(struct idxd_wq *wq); /* submission */ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc); struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype); void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc); +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc); /* dmaengine */ int idxd_register_dma_device(struct idxd_device *idxd); @@ -579,7 +606,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq); void idxd_unregister_dma_channel(struct idxd_wq *wq); void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res); void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type); + enum idxd_complete_type comp_type, bool free_desc); /* cdev */ int idxd_cdev_register(void); @@ -603,10 +630,4 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7bf03f371ce1..08a5f4310188 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -72,7 +72,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct device *dev = &pdev->dev; - struct idxd_irq_entry *irq_entry; + struct idxd_irq_entry *ie; int i, msixcnt; int rc = 0; @@ -81,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) dev_err(dev, "Not MSI-X interrupt capable.\n"); return -ENOSPC; } + idxd->irq_cnt = msixcnt; rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); if (rc != msixcnt) { @@ -89,87 +90,34 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt); - /* - * We implement 1 completion list per MSI-X entry except for - * entry 0, which is for errors and others. - */ - idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry), - GFP_KERNEL, dev_to_node(dev)); - if (!idxd->irq_entries) { - rc = -ENOMEM; - goto err_irq_entries; - } - - for (i = 0; i < msixcnt; i++) { - idxd->irq_entries[i].id = i; - idxd->irq_entries[i].idxd = idxd; - idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); - spin_lock_init(&idxd->irq_entries[i].list_lock); - } - - idxd_msix_perm_setup(idxd); - irq_entry = &idxd->irq_entries[0]; - rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, - 0, "idxd-misc", irq_entry); + ie = idxd_get_ie(idxd, 0); + ie->vector = pci_irq_vector(pdev, 0); + rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie); if (rc < 0) { dev_err(dev, "Failed to allocate misc interrupt.\n"); goto err_misc_irq; } + dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector); - dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector); - - /* first MSI-X entry is not for wq interrupts */ - idxd->num_wq_irqs = msixcnt - 1; + for (i = 0; i < idxd->max_wqs; i++) { + int msix_idx = i + 1; - for (i = 1; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; + ie = idxd_get_ie(idxd, msix_idx); + ie->id = msix_idx; + ie->int_handle = INVALID_INT_HANDLE; + ie->pasid = INVALID_IOASID; - init_llist_head(&idxd->irq_entries[i].pending_llist); - INIT_LIST_HEAD(&idxd->irq_entries[i].work_list); - rc = request_threaded_irq(irq_entry->vector, NULL, - idxd_wq_thread, 0, "idxd-portal", irq_entry); - if (rc < 0) { - dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector); - goto err_wq_irqs; - } - - dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - /* - * The MSIX vector enumeration starts at 1 with vector 0 being the - * misc interrupt that handles non I/O completion events. The - * interrupt handles are for IMS enumeration on guest. The misc - * interrupt vector does not require a handle and therefore we start - * the int_handles at index 0. Since 'i' starts at 1, the first - * int_handles index will be 0. - */ - rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1], - IDXD_IRQ_MSIX); - if (rc < 0) { - free_irq(irq_entry->vector, irq_entry); - goto err_wq_irqs; - } - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]); - } + spin_lock_init(&ie->list_lock); + init_llist_head(&ie->pending_llist); + INIT_LIST_HEAD(&ie->work_list); } idxd_unmask_error_interrupts(idxd); return 0; - err_wq_irqs: - while (--i >= 0) { - irq_entry = &idxd->irq_entries[i]; - free_irq(irq_entry->vector, irq_entry); - if (i != 0) - idxd_device_release_int_handle(idxd, - idxd->int_handles[i], IDXD_IRQ_MSIX); - } err_misc_irq: - /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); - idxd_msix_perm_clear(idxd); - err_irq_entries: pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); return rc; @@ -178,26 +126,16 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; - struct idxd_irq_entry *irq_entry; - int i, msixcnt; + struct idxd_irq_entry *ie; + int msixcnt; msixcnt = pci_msix_vec_count(pdev); if (msixcnt <= 0) return; - irq_entry = &idxd->irq_entries[0]; - free_irq(irq_entry->vector, irq_entry); - - for (i = 1; i < msixcnt; i++) { - - irq_entry = &idxd->irq_entries[i]; - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) - idxd_device_release_int_handle(idxd, idxd->int_handles[i], - IDXD_IRQ_MSIX); - free_irq(irq_entry->vector, irq_entry); - } - + ie = idxd_get_ie(idxd, 0); idxd_mask_error_interrupts(idxd); + free_irq(ie->vector, ie); pci_free_irq_vectors(pdev); } @@ -237,8 +175,10 @@ static int idxd_setup_wqs(struct idxd_device *idxd) mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); init_completion(&wq->wq_dead); - wq->max_xfer_bytes = idxd->max_xfer_bytes; - wq->max_batch_size = idxd->max_batch_size; + init_completion(&wq->wq_resurrect); + wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; + wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { put_device(conf_dev); @@ -379,13 +319,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) init_waitqueue_head(&idxd->cmd_waitq); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL, - dev_to_node(dev)); - if (!idxd->int_handles) - return -ENOMEM; - } - rc = idxd_setup_wqs(idxd); if (rc < 0) goto err_wqs; @@ -416,7 +349,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) put_device(wq_confdev(idxd->wqs[i])); err_wqs: - kfree(idxd->int_handles); return rc; } @@ -451,6 +383,10 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); } + /* reading command capabilities */ + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) + idxd->request_int_handles = true; + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; @@ -464,9 +400,9 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits); idxd->max_groups = idxd->hw.group_cap.num_groups; dev_dbg(dev, "max groups: %u\n", idxd->max_groups); - idxd->max_tokens = idxd->hw.group_cap.total_tokens; - dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens); - idxd->nr_tokens = idxd->max_tokens; + idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs; + dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs); + idxd->nr_rdbufs = idxd->max_rdbufs; /* read engine capabilities */ idxd->hw.engine_cap.bits = @@ -611,8 +547,6 @@ static int idxd_probe(struct idxd_device *idxd) if (rc) goto err_config; - dev_dbg(dev, "IDXD interrupt setup complete.\n"); - idxd->major = idxd_cdev_get_major(idxd); rc = perfmon_pmu_init(idxd); @@ -708,32 +642,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; } -static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) -{ - struct idxd_desc *desc, *itr; - struct llist_node *head; - - head = llist_del_all(&ie->pending_llist); - if (!head) - return; - - llist_for_each_entry_safe(desc, itr, head, llnode) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); - } -} - -static void idxd_flush_work_list(struct idxd_irq_entry *ie) -{ - struct idxd_desc *desc, *iter; - - list_for_each_entry_safe(desc, iter, &ie->work_list, list) { - list_del(&desc->list); - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); - } -} - void idxd_wqs_quiesce(struct idxd_device *idxd) { struct idxd_wq *wq; @@ -746,47 +654,19 @@ void idxd_wqs_quiesce(struct idxd_device *idxd) } } -static void idxd_release_int_handles(struct idxd_device *idxd) -{ - struct device *dev = &idxd->pdev->dev; - int i, rc; - - for (i = 0; i < idxd->num_wq_irqs; i++) { - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) { - rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i], - IDXD_IRQ_MSIX); - if (rc < 0) - dev_warn(dev, "irq handle %d release failed\n", - idxd->int_handles[i]); - else - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]); - } - } -} - static void idxd_shutdown(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); - int rc, i; struct idxd_irq_entry *irq_entry; - int msixcnt = pci_msix_vec_count(pdev); + int rc; rc = idxd_device_disable(idxd); if (rc) dev_err(&pdev->dev, "Disabling device failed\n"); - dev_dbg(&pdev->dev, "%s called\n", __func__); - idxd_mask_msix_vectors(idxd); + irq_entry = &idxd->ie; + synchronize_irq(irq_entry->vector); idxd_mask_error_interrupts(idxd); - - for (i = 0; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; - synchronize_irq(irq_entry->vector); - if (i == 0) - continue; - idxd_flush_pending_llist(irq_entry); - idxd_flush_work_list(irq_entry); - } flush_workqueue(idxd->wq); } @@ -794,8 +674,6 @@ static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); struct idxd_irq_entry *irq_entry; - int msixcnt = pci_msix_vec_count(pdev); - int i; idxd_unregister_devices(idxd); /* @@ -811,12 +689,8 @@ static void idxd_remove(struct pci_dev *pdev) if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); - for (i = 0; i < msixcnt; i++) { - irq_entry = &idxd->irq_entries[i]; - free_irq(irq_entry->vector, irq_entry); - } - idxd_msix_perm_clear(idxd); - idxd_release_int_handles(idxd); + irq_entry = idxd_get_ie(idxd, 0); + free_irq(irq_entry->vector, irq_entry); pci_free_irq_vectors(pdev); pci_iounmap(pdev, idxd->reg_base); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index cf2c8bc4f147..743ead5ebc57 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/dmaengine.h> +#include <linux/delay.h> #include <uapi/linux/idxd.h> #include "../dmaengine.h" #include "idxd.h" @@ -22,6 +23,16 @@ struct idxd_fault { struct idxd_device *idxd; }; +struct idxd_resubmit { + struct work_struct work; + struct idxd_desc *desc; +}; + +struct idxd_int_handle_revoke { + struct work_struct work; + struct idxd_device *idxd; +}; + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -55,6 +66,162 @@ static void idxd_device_reinit(struct work_struct *work) idxd_device_clear_state(idxd); } +/* + * The function sends a drain descriptor for the interrupt handle. The drain ensures + * all descriptors with this interrupt handle is flushed and the interrupt + * will allow the cleanup of the outstanding descriptors. + */ +static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) +{ + struct idxd_wq *wq = ie_to_wq(ie); + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + struct dsa_hw_desc desc = {}; + void __iomem *portal; + int rc; + + /* Issue a simple drain operation with interrupt but no completion record */ + desc.flags = IDXD_OP_FLAG_RCI; + desc.opcode = DSA_OPCODE_DRAIN; + desc.priv = 1; + + if (ie->pasid != INVALID_IOASID) + desc.pasid = ie->pasid; + desc.int_handle = ie->int_handle; + portal = idxd_wq_portal_addr(wq); + + /* + * The wmb() makes sure that the descriptor is all there before we + * issue. + */ + wmb(); + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, &desc, 1); + } else { + rc = idxd_enqcmds(wq, portal, &desc); + /* This should not fail unless hardware failed. */ + if (rc < 0) + dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); + } +} + +static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) +{ + LIST_HEAD(flist); + struct idxd_desc *d, *t; + struct llist_node *head; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) + list_add_tail(&d->list, &ie->work_list); + } + + list_for_each_entry_safe(d, t, &ie->work_list, list) { + if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) + list_move_tail(&d->list, &flist); + } + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(d, t, &flist, list) { + list_del(&d->list); + idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true); + } +} + +static void idxd_int_handle_revoke(struct work_struct *work) +{ + struct idxd_int_handle_revoke *revoke = + container_of(work, struct idxd_int_handle_revoke, work); + struct idxd_device *idxd = revoke->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + int i, new_handle, rc; + + if (!idxd->request_int_handles) { + kfree(revoke); + dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); + return; + } + + /* + * The loop attempts to acquire new interrupt handle for all interrupt + * vectors that supports a handle. If a new interrupt handle is acquired and the + * wq is kernel type, the driver will kill the percpu_ref to pause all + * ongoing descriptor submissions. The interrupt handle is then changed. + * After change, the percpu_ref is revived and all the pending submissions + * are woken to try again. A drain is sent to for the interrupt handle + * at the end to make sure all invalid int handle descriptors are processed. + */ + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = idxd_get_ie(idxd, i); + struct idxd_wq *wq = ie_to_wq(ie); + + if (ie->int_handle == INVALID_INT_HANDLE) + continue; + + rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); + if (rc < 0) { + dev_warn(dev, "get int handle %d failed: %d\n", i, rc); + /* + * Failed to acquire new interrupt handle. Kill the WQ + * and release all the pending submitters. The submitters will + * get error return code and handle appropriately. + */ + ie->int_handle = INVALID_INT_HANDLE; + idxd_wq_quiesce(wq); + idxd_abort_invalid_int_handle_descs(ie); + continue; + } + + /* No change in interrupt handle, nothing needs to be done */ + if (ie->int_handle == new_handle) + continue; + + if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { + /* + * All the MSIX interrupts are allocated at once during probe. + * Therefore we need to update all interrupts even if the WQ + * isn't supporting interrupt operations. + */ + ie->int_handle = new_handle; + continue; + } + + mutex_lock(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); + + /* Kill percpu_ref to pause additional descriptor submissions */ + percpu_ref_kill(&wq->wq_active); + + /* Wait for all submitters quiesce before we change interrupt handle */ + wait_for_completion(&wq->wq_dead); + + ie->int_handle = new_handle; + + /* Revive percpu ref and wake up all the waiting submitters */ + percpu_ref_reinit(&wq->wq_active); + complete_all(&wq->wq_resurrect); + mutex_unlock(&wq->wq_lock); + + /* + * The delay here is to wait for all possible MOVDIR64B that + * are issued before percpu_ref_kill() has happened to have + * reached the PCIe domain before the drain is issued. The driver + * needs to ensure that the drain descriptor issued does not pass + * all the other issued descriptors that contain the invalid + * interrupt handle in order to ensure that the drain descriptor + * interrupt will allow the cleanup of all the descriptors with + * invalid interrupt handle. + */ + if (wq_dedicated(wq)) + udelay(100); + idxd_int_handle_revoke_drain(ie); + } + kfree(revoke); +} + static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; @@ -101,6 +268,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) err = true; } + if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { + struct idxd_int_handle_revoke *revoke; + + val |= IDXD_INTC_INT_HANDLE_REVOKED; + + revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC); + if (revoke) { + revoke->idxd = idxd; + INIT_WORK(&revoke->work, idxd_int_handle_revoke); + queue_work(idxd->wq, &revoke->work); + + } else { + dev_err(dev, "Failed to allocate work for int handle revoke\n"); + idxd_wqs_quiesce(idxd); + } + } + if (cause & IDXD_INTC_CMD) { val |= IDXD_INTC_CMD; complete(idxd->cmd_done); @@ -157,7 +341,7 @@ halt: irqreturn_t idxd_misc_thread(int vec, void *data) { struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = irq_entry->idxd; + struct idxd_device *idxd = ie_to_idxd(irq_entry); int rc; u32 cause; @@ -177,6 +361,51 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } +static void idxd_int_handle_resubmit_work(struct work_struct *work) +{ + struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); + struct idxd_desc *desc = irw->desc; + struct idxd_wq *wq = desc->wq; + int rc; + + desc->completion->status = 0; + rc = idxd_submit_desc(wq, desc); + if (rc < 0) { + dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", + desc->id, wq->id); + /* + * If the error is not -EAGAIN, it means the submission failed due to wq + * has been killed instead of ENQCMDS failure. Here the driver needs to + * notify the submitter of the failure by reporting abort status. + * + * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the + * abort. + */ + if (rc != -EAGAIN) { + desc->completion->status = IDXD_COMP_DESC_ABORT; + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false); + } + idxd_free_desc(wq, desc); + } + kfree(irw); +} + +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) +{ + struct idxd_wq *wq = desc->wq; + struct idxd_device *idxd = wq->idxd; + struct idxd_resubmit *irw; + + irw = kzalloc(sizeof(*irw), GFP_KERNEL); + if (!irw) + return false; + + irw->desc = desc; + INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); + queue_work(idxd->wq, &irw->work); + return true; +} + static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) { struct idxd_desc *desc, *t; @@ -195,11 +424,11 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } else { spin_lock(&irq_entry->list_lock); list_add_tail(&desc->list, @@ -238,11 +467,11 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 262c8220adbd..aa642aecdc0b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -64,9 +64,9 @@ union wq_cap_reg { union group_cap_reg { struct { u64 num_groups:8; - u64 total_tokens:8; - u64 token_en:1; - u64 token_limit:1; + u64 total_rdbufs:8; /* formerly total_tokens */ + u64 rdbuf_ctrl:1; /* formerly token_en */ + u64 rdbuf_limit:1; /* formerly token_limit */ u64 rsvd:46; }; u64 bits; @@ -110,7 +110,7 @@ union offsets_reg { #define IDXD_GENCFG_OFFSET 0x80 union gencfg_reg { struct { - u32 token_limit:8; + u32 rdbuf_limit:8; u32 rsvd:4; u32 user_int_en:1; u32 rsvd2:19; @@ -158,6 +158,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 union idxd_command_reg { @@ -287,10 +288,10 @@ union group_flags { u32 tc_a:3; u32 tc_b:3; u32 rsvd:1; - u32 use_token_limit:1; - u32 tokens_reserved:8; + u32 use_rdbuf_limit:1; + u32 rdbufs_reserved:8; u32 rsvd2:4; - u32 tokens_allowed:8; + u32 rdbufs_allowed:8; u32 rsvd3:4; }; u32 bits; diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 83452fbbb168..e289fd48711a 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -21,15 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; - /* - * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match - * vector 1:1 to the WQ id, we need to add 1 - */ - if (!idxd->int_handles) - desc->hw->int_handle = wq->id + 1; - else - desc->hw->int_handle = idxd->int_handles[wq->id]; - return desc; } @@ -134,35 +125,58 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, spin_unlock(&ie->list_lock); if (found) - complete_desc(found, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); /* - * complete_desc() will return desc to allocator and the desc can be - * acquired by a different process and the desc->list can be modified. - * Delete desc from list so the list trasversing does not get corrupted - * by the other process. + * completing the descriptor will return desc to allocator and + * the desc can be acquired by a different process and the + * desc->list can be modified. Delete desc from list so the + * list trasversing does not get corrupted by the other process. */ list_for_each_entry_safe(d, t, &flist, list) { list_del_init(&d->list); - complete_desc(d, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true); } } +/* + * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver + * has better control of number of descriptors being submitted to a shared wq by limiting + * the number of driver allocated descriptors to the wq size. However, when the swq is + * exported to a guest kernel, it may be shared with multiple guest kernels. This means + * the likelihood of getting busy returned on the swq when submitting goes significantly up. + * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving + * up. The sysfs knob can be tuned by the system administrator. + */ +int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) +{ + int rc, retries = 0; + + do { + rc = enqcmds(portal, desc); + if (rc == 0) + break; + cpu_relax(); + } while (retries++ < wq->enqcmds_retries); + + return rc; +} + int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; struct idxd_irq_entry *ie = NULL; + u32 desc_flags = desc->hw->flags; void __iomem *portal; int rc; - if (idxd->state != IDXD_DEV_ENABLED) { - idxd_free_desc(wq, desc); + if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - } if (!percpu_ref_tryget_live(&wq->wq_active)) { - idxd_free_desc(wq, desc); - return -ENXIO; + wait_for_completion(&wq->wq_resurrect); + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; } portal = idxd_wq_portal_addr(wq); @@ -178,28 +192,21 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - ie = &idxd->irq_entries[wq->id + 1]; + if (desc_flags & IDXD_OP_FLAG_RCI) { + ie = &wq->ie; + desc->hw->int_handle = ie->int_handle; llist_add(&desc->llnode, &ie->pending_llist); } if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { - /* - * It's not likely that we would receive queue full rejection - * since the descriptor allocation gates at wq size. If we - * receive a -EAGAIN, that means something went wrong such as the - * device is not accepting descriptor at all. - */ - rc = enqcmds(portal, desc->hw); + rc = idxd_enqcmds(wq, portal, desc->hw); if (rc < 0) { percpu_ref_put(&wq->wq_active); /* abort operation frees the descriptor */ if (ie) llist_abort_desc(wq, ie, desc); - else - idxd_free_desc(wq, desc); return rc; } } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index a9025be940db..7e19ab92b61a 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -99,31 +99,39 @@ struct device_type idxd_engine_device_type = { /* Group attributes */ -static void idxd_set_free_tokens(struct idxd_device *idxd) +static void idxd_set_free_rdbufs(struct idxd_device *idxd) { - int i, tokens; + int i, rdbufs; - for (i = 0, tokens = 0; i < idxd->max_groups; i++) { + for (i = 0, rdbufs = 0; i < idxd->max_groups; i++) { struct idxd_group *g = idxd->groups[i]; - tokens += g->tokens_reserved; + rdbufs += g->rdbufs_reserved; } - idxd->nr_tokens = idxd->max_tokens - tokens; + idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs; +} + +static ssize_t group_read_buffers_reserved_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%u\n", group->rdbufs_reserved); } static ssize_t group_tokens_reserved_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = confdev_to_group(dev); - - return sysfs_emit(buf, "%u\n", group->tokens_reserved); + dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n"); + return group_read_buffers_reserved_show(dev, attr, buf); } -static ssize_t group_tokens_reserved_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_read_buffers_reserved_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -143,33 +151,53 @@ static ssize_t group_tokens_reserved_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; - if (val > idxd->max_tokens) + if (val > idxd->max_rdbufs) return -EINVAL; - if (val > idxd->nr_tokens + group->tokens_reserved) + if (val > idxd->nr_rdbufs + group->rdbufs_reserved) return -EINVAL; - group->tokens_reserved = val; - idxd_set_free_tokens(idxd); + group->rdbufs_reserved = val; + idxd_set_free_rdbufs(idxd); return count; } +static ssize_t group_tokens_reserved_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n"); + return group_read_buffers_reserved_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_tokens_reserved = __ATTR(tokens_reserved, 0644, group_tokens_reserved_show, group_tokens_reserved_store); +static struct device_attribute dev_attr_group_read_buffers_reserved = + __ATTR(read_buffers_reserved, 0644, group_read_buffers_reserved_show, + group_read_buffers_reserved_store); + +static ssize_t group_read_buffers_allowed_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%u\n", group->rdbufs_allowed); +} + static ssize_t group_tokens_allowed_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = confdev_to_group(dev); - - return sysfs_emit(buf, "%u\n", group->tokens_allowed); + dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n"); + return group_read_buffers_allowed_show(dev, attr, buf); } -static ssize_t group_tokens_allowed_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_read_buffers_allowed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -190,29 +218,49 @@ static ssize_t group_tokens_allowed_store(struct device *dev, return -EPERM; if (val < 4 * group->num_engines || - val > group->tokens_reserved + idxd->nr_tokens) + val > group->rdbufs_reserved + idxd->nr_rdbufs) return -EINVAL; - group->tokens_allowed = val; + group->rdbufs_allowed = val; return count; } +static ssize_t group_tokens_allowed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n"); + return group_read_buffers_allowed_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_tokens_allowed = __ATTR(tokens_allowed, 0644, group_tokens_allowed_show, group_tokens_allowed_store); +static struct device_attribute dev_attr_group_read_buffers_allowed = + __ATTR(read_buffers_allowed, 0644, group_read_buffers_allowed_show, + group_read_buffers_allowed_store); + +static ssize_t group_use_read_buffer_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit); +} + static ssize_t group_use_token_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_group *group = confdev_to_group(dev); - - return sysfs_emit(buf, "%u\n", group->use_token_limit); + dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n"); + return group_use_read_buffer_limit_show(dev, attr, buf); } -static ssize_t group_use_token_limit_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t group_use_read_buffer_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_group *group = confdev_to_group(dev); struct idxd_device *idxd = group->idxd; @@ -232,17 +280,29 @@ static ssize_t group_use_token_limit_store(struct device *dev, if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; - if (idxd->token_limit == 0) + if (idxd->rdbuf_limit == 0) return -EPERM; - group->use_token_limit = !!val; + group->use_rdbuf_limit = !!val; return count; } +static ssize_t group_use_token_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n"); + return group_use_read_buffer_limit_store(dev, attr, buf, count); +} + static struct device_attribute dev_attr_group_use_token_limit = __ATTR(use_token_limit, 0644, group_use_token_limit_show, group_use_token_limit_store); +static struct device_attribute dev_attr_group_use_read_buffer_limit = + __ATTR(use_read_buffer_limit, 0644, group_use_read_buffer_limit_show, + group_use_read_buffer_limit_store); + static ssize_t group_engines_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -387,8 +447,11 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, &dev_attr_group_use_token_limit.attr, + &dev_attr_group_use_read_buffer_limit.attr, &dev_attr_group_tokens_allowed.attr, + &dev_attr_group_read_buffers_allowed.attr, &dev_attr_group_tokens_reserved.attr, + &dev_attr_group_read_buffers_reserved.attr, &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, NULL, @@ -945,6 +1008,41 @@ static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *at static struct device_attribute dev_attr_wq_occupancy = __ATTR(occupancy, 0444, wq_occupancy_show, NULL); +static ssize_t wq_enqcmds_retries_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", wq->enqcmds_retries); +} + +static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + int rc; + unsigned int retries; + + if (wq_dedicated(wq)) + return -EOPNOTSUPP; + + rc = kstrtouint(buf, 10, &retries); + if (rc < 0) + return rc; + + if (retries > IDXD_ENQCMDS_MAX_RETRIES) + retries = IDXD_ENQCMDS_MAX_RETRIES; + + wq->enqcmds_retries = retries; + return count; +} + +static struct device_attribute dev_attr_wq_enqcmds_retries = + __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -961,6 +1059,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, &dev_attr_wq_occupancy.attr, + &dev_attr_wq_enqcmds_retries.attr, NULL, }; @@ -1156,26 +1255,42 @@ static ssize_t errors_show(struct device *dev, } static DEVICE_ATTR_RO(errors); +static ssize_t max_read_buffers_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + return sysfs_emit(buf, "%u\n", idxd->max_rdbufs); +} + static ssize_t max_tokens_show(struct device *dev, struct device_attribute *attr, char *buf) { + dev_warn_once(dev, "attribute deprecated, see max_read_buffers.\n"); + return max_read_buffers_show(dev, attr, buf); +} + +static DEVICE_ATTR_RO(max_tokens); /* deprecated */ +static DEVICE_ATTR_RO(max_read_buffers); + +static ssize_t read_buffer_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ struct idxd_device *idxd = confdev_to_idxd(dev); - return sysfs_emit(buf, "%u\n", idxd->max_tokens); + return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit); } -static DEVICE_ATTR_RO(max_tokens); static ssize_t token_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct idxd_device *idxd = confdev_to_idxd(dev); - - return sysfs_emit(buf, "%u\n", idxd->token_limit); + dev_warn_once(dev, "attribute deprecated, see read_buffer_limit.\n"); + return read_buffer_limit_show(dev, attr, buf); } -static ssize_t token_limit_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t read_buffer_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct idxd_device *idxd = confdev_to_idxd(dev); unsigned long val; @@ -1191,16 +1306,26 @@ static ssize_t token_limit_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (!idxd->hw.group_cap.token_limit) + if (!idxd->hw.group_cap.rdbuf_limit) return -EPERM; - if (val > idxd->hw.group_cap.total_tokens) + if (val > idxd->hw.group_cap.total_rdbufs) return -EINVAL; - idxd->token_limit = val; + idxd->rdbuf_limit = val; return count; } -static DEVICE_ATTR_RW(token_limit); + +static ssize_t token_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + dev_warn_once(dev, "attribute deprecated, see read_buffer_limit\n"); + return read_buffer_limit_store(dev, attr, buf, count); +} + +static DEVICE_ATTR_RW(token_limit); /* deprecated */ +static DEVICE_ATTR_RW(read_buffer_limit); static ssize_t cdev_major_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1246,7 +1371,9 @@ static struct attribute *idxd_device_attributes[] = { &dev_attr_state.attr, &dev_attr_errors.attr, &dev_attr_max_tokens.attr, + &dev_attr_max_read_buffers.attr, &dev_attr_token_limit.attr, + &dev_attr_read_buffer_limit.attr, &dev_attr_cdev_major.attr, &dev_attr_cmd_status.attr, NULL, @@ -1268,8 +1395,6 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->groups); kfree(idxd->wqs); kfree(idxd->engines); - kfree(idxd->irq_entries); - kfree(idxd->int_handles); ida_free(&idxd_ida, idxd->id); kfree(idxd); } |