summaryrefslogtreecommitdiffstats
path: root/drivers/dma/idxd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma/idxd')
-rw-r--r--drivers/dma/idxd/device.c222
-rw-r--r--drivers/dma/idxd/dma.c40
-rw-r--r--drivers/dma/idxd/idxd.h67
-rw-r--r--drivers/dma/idxd/init.c196
-rw-r--r--drivers/dma/idxd/irq.c239
-rw-r--r--drivers/dma/idxd/registers.h15
-rw-r--r--drivers/dma/idxd/submit.c69
-rw-r--r--drivers/dma/idxd/sysfs.c215
8 files changed, 703 insertions, 360 deletions
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index fab412349f7f..573ad8b86804 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -19,30 +19,6 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd);
static void idxd_wq_disable_cleanup(struct idxd_wq *wq);
/* Interrupt control bits */
-void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
-{
- struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
-
- pci_msi_mask_irq(data);
-}
-
-void idxd_mask_msix_vectors(struct idxd_device *idxd)
-{
- struct pci_dev *pdev = idxd->pdev;
- int msixcnt = pci_msix_vec_count(pdev);
- int i;
-
- for (i = 0; i < msixcnt; i++)
- idxd_mask_msix_vector(idxd, i);
-}
-
-void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
-{
- struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
-
- pci_msi_unmask_irq(data);
-}
-
void idxd_unmask_error_interrupts(struct idxd_device *idxd)
{
union genctrl_reg genctrl;
@@ -382,14 +358,24 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
lockdep_assert_held(&wq->wq_lock);
memset(wq->wqcfg, 0, idxd->wqcfg_size);
wq->type = IDXD_WQT_NONE;
- wq->size = 0;
- wq->group = NULL;
wq->threshold = 0;
wq->priority = 0;
wq->ats_dis = 0;
+ wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
memset(wq->name, 0, WQ_NAME_SIZE);
+ wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
+ wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
+}
+
+static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
+{
+ lockdep_assert_held(&wq->wq_lock);
+
+ idxd_wq_disable_cleanup(wq);
+ wq->size = 0;
+ wq->group = NULL;
}
static void idxd_wq_ref_release(struct percpu_ref *ref)
@@ -404,19 +390,31 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq)
int rc;
memset(&wq->wq_active, 0, sizeof(wq->wq_active));
- rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL);
+ rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
if (rc < 0)
return rc;
reinit_completion(&wq->wq_dead);
+ reinit_completion(&wq->wq_resurrect);
return 0;
}
-void idxd_wq_quiesce(struct idxd_wq *wq)
+void __idxd_wq_quiesce(struct idxd_wq *wq)
{
+ lockdep_assert_held(&wq->wq_lock);
+ reinit_completion(&wq->wq_resurrect);
percpu_ref_kill(&wq->wq_active);
+ complete_all(&wq->wq_resurrect);
wait_for_completion(&wq->wq_dead);
}
+void idxd_wq_quiesce(struct idxd_wq *wq)
+{
+ mutex_lock(&wq->wq_lock);
+ __idxd_wq_quiesce(wq);
+ mutex_unlock(&wq->wq_lock);
+}
+
/* Device control bits */
static inline bool idxd_is_enabled(struct idxd_device *idxd)
{
@@ -572,7 +570,6 @@ void idxd_device_reset(struct idxd_device *idxd)
idxd_device_clear_state(idxd);
idxd->state = IDXD_DEV_DISABLED;
idxd_unmask_error_interrupts(idxd);
- idxd_msix_perm_setup(idxd);
spin_unlock(&idxd->dev_lock);
}
@@ -681,9 +678,9 @@ static void idxd_groups_clear_state(struct idxd_device *idxd)
memset(&group->grpcfg, 0, sizeof(group->grpcfg));
group->num_engines = 0;
group->num_wqs = 0;
- group->use_token_limit = false;
- group->tokens_allowed = 0;
- group->tokens_reserved = 0;
+ group->use_rdbuf_limit = false;
+ group->rdbufs_allowed = 0;
+ group->rdbufs_reserved = 0;
group->tc_a = -1;
group->tc_b = -1;
}
@@ -699,6 +696,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
if (wq->state == IDXD_WQ_ENABLED) {
idxd_wq_disable_cleanup(wq);
+ idxd_wq_device_reset_cleanup(wq);
wq->state = IDXD_WQ_DISABLED;
}
}
@@ -711,36 +709,6 @@ void idxd_device_clear_state(struct idxd_device *idxd)
idxd_device_wqs_clear_state(idxd);
}
-void idxd_msix_perm_setup(struct idxd_device *idxd)
-{
- union msix_perm mperm;
- int i, msixcnt;
-
- msixcnt = pci_msix_vec_count(idxd->pdev);
- if (msixcnt < 0)
- return;
-
- mperm.bits = 0;
- mperm.pasid = idxd->pasid;
- mperm.pasid_en = device_pasid_enabled(idxd);
- for (i = 1; i < msixcnt; i++)
- iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
-}
-
-void idxd_msix_perm_clear(struct idxd_device *idxd)
-{
- union msix_perm mperm;
- int i, msixcnt;
-
- msixcnt = pci_msix_vec_count(idxd->pdev);
- if (msixcnt < 0)
- return;
-
- mperm.bits = 0;
- for (i = 1; i < msixcnt; i++)
- iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
-}
-
static void idxd_group_config_write(struct idxd_group *group)
{
struct idxd_device *idxd = group->idxd;
@@ -780,10 +748,10 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
int i;
struct device *dev = &idxd->pdev->dev;
- /* Setup bandwidth token limit */
- if (idxd->hw.gen_cap.config_en && idxd->token_limit) {
+ /* Setup bandwidth rdbuf limit */
+ if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) {
reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
- reg.token_limit = idxd->token_limit;
+ reg.rdbuf_limit = idxd->rdbuf_limit;
iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
}
@@ -827,15 +795,12 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset);
}
+ if (wq->size == 0 && wq->type != IDXD_WQT_NONE)
+ wq->size = WQ_DEFAULT_QUEUE_DEPTH;
+
/* byte 0-3 */
wq->wqcfg->wq_size = wq->size;
- if (wq->size == 0) {
- idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE;
- dev_warn(dev, "Incorrect work queue size: 0\n");
- return -EINVAL;
- }
-
/* bytes 4-7 */
wq->wqcfg->wq_thresh = wq->threshold;
@@ -924,13 +889,12 @@ static void idxd_group_flags_setup(struct idxd_device *idxd)
group->tc_b = group->grpcfg.flags.tc_b = 1;
else
group->grpcfg.flags.tc_b = group->tc_b;
- group->grpcfg.flags.use_token_limit = group->use_token_limit;
- group->grpcfg.flags.tokens_reserved = group->tokens_reserved;
- if (group->tokens_allowed)
- group->grpcfg.flags.tokens_allowed =
- group->tokens_allowed;
+ group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit;
+ group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved;
+ if (group->rdbufs_allowed)
+ group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed;
else
- group->grpcfg.flags.tokens_allowed = idxd->max_tokens;
+ group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs;
}
}
@@ -981,8 +945,6 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
if (!wq->group)
continue;
- if (!wq->size)
- continue;
if (wq_shared(wq) && !device_swq_supported(idxd)) {
idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT;
@@ -1123,7 +1085,7 @@ int idxd_device_load_config(struct idxd_device *idxd)
int i, rc;
reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
- idxd->token_limit = reg.token_limit;
+ idxd->rdbuf_limit = reg.rdbuf_limit;
for (i = 0; i < idxd->max_groups; i++) {
struct idxd_group *group = idxd->groups[i];
@@ -1142,6 +1104,106 @@ int idxd_device_load_config(struct idxd_device *idxd)
return 0;
}
+static void idxd_flush_pending_descs(struct idxd_irq_entry *ie)
+{
+ struct idxd_desc *desc, *itr;
+ struct llist_node *head;
+ LIST_HEAD(flist);
+ enum idxd_complete_type ctype;
+
+ spin_lock(&ie->list_lock);
+ head = llist_del_all(&ie->pending_llist);
+ if (head) {
+ llist_for_each_entry_safe(desc, itr, head, llnode)
+ list_add_tail(&desc->list, &ie->work_list);
+ }
+
+ list_for_each_entry_safe(desc, itr, &ie->work_list, list)
+ list_move_tail(&desc->list, &flist);
+ spin_unlock(&ie->list_lock);
+
+ list_for_each_entry_safe(desc, itr, &flist, list) {
+ list_del(&desc->list);
+ ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT;
+ idxd_dma_complete_txd(desc, ctype, true);
+ }
+}
+
+static void idxd_device_set_perm_entry(struct idxd_device *idxd,
+ struct idxd_irq_entry *ie)
+{
+ union msix_perm mperm;
+
+ if (ie->pasid == INVALID_IOASID)
+ return;
+
+ mperm.bits = 0;
+ mperm.pasid = ie->pasid;
+ mperm.pasid_en = 1;
+ iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
+}
+
+static void idxd_device_clear_perm_entry(struct idxd_device *idxd,
+ struct idxd_irq_entry *ie)
+{
+ iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
+}
+
+void idxd_wq_free_irq(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_irq_entry *ie = &wq->ie;
+
+ synchronize_irq(ie->vector);
+ free_irq(ie->vector, ie);
+ idxd_flush_pending_descs(ie);
+ if (idxd->request_int_handles)
+ idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
+ idxd_device_clear_perm_entry(idxd, ie);
+ ie->vector = -1;
+ ie->int_handle = INVALID_INT_HANDLE;
+ ie->pasid = INVALID_IOASID;
+}
+
+int idxd_wq_request_irq(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ struct idxd_irq_entry *ie;
+ int rc;
+
+ ie = &wq->ie;
+ ie->vector = pci_irq_vector(pdev, ie->id);
+ ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID;
+ idxd_device_set_perm_entry(idxd, ie);
+
+ rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie);
+ if (rc < 0) {
+ dev_err(dev, "Failed to request irq %d.\n", ie->vector);
+ goto err_irq;
+ }
+
+ if (idxd->request_int_handles) {
+ rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle,
+ IDXD_IRQ_MSIX);
+ if (rc < 0)
+ goto err_int_handle;
+ } else {
+ ie->int_handle = ie->id;
+ }
+
+ return 0;
+
+err_int_handle:
+ ie->int_handle = INVALID_INT_HANDLE;
+ free_irq(ie->vector, ie);
+err_irq:
+ idxd_device_clear_perm_entry(idxd, ie);
+ ie->pasid = INVALID_IOASID;
+ return rc;
+}
+
int __drv_enable_wq(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
index c39e9483206a..bfff59617d04 100644
--- a/drivers/dma/idxd/dma.c
+++ b/drivers/dma/idxd/dma.c
@@ -21,20 +21,27 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
}
void idxd_dma_complete_txd(struct idxd_desc *desc,
- enum idxd_complete_type comp_type)
+ enum idxd_complete_type comp_type,
+ bool free_desc)
{
+ struct idxd_device *idxd = desc->wq->idxd;
struct dma_async_tx_descriptor *tx;
struct dmaengine_result res;
int complete = 1;
- if (desc->completion->status == DSA_COMP_SUCCESS)
+ if (desc->completion->status == DSA_COMP_SUCCESS) {
res.result = DMA_TRANS_NOERROR;
- else if (desc->completion->status)
+ } else if (desc->completion->status) {
+ if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT &&
+ desc->completion->status == DSA_COMP_INT_HANDLE_INVAL &&
+ idxd_queue_int_handle_resubmit(desc))
+ return;
res.result = DMA_TRANS_WRITE_FAILED;
- else if (comp_type == IDXD_COMPLETE_ABORT)
+ } else if (comp_type == IDXD_COMPLETE_ABORT) {
res.result = DMA_TRANS_ABORTED;
- else
+ } else {
complete = 0;
+ }
tx = &desc->txd;
if (complete && tx->cookie) {
@@ -44,6 +51,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc,
tx->callback = NULL;
tx->callback_result = NULL;
}
+
+ if (free_desc)
+ idxd_free_desc(desc->wq, desc);
}
static void op_flag_setup(unsigned long flags, u32 *desc_flags)
@@ -153,8 +163,10 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
cookie = dma_cookie_assign(tx);
rc = idxd_submit_desc(wq, desc);
- if (rc < 0)
+ if (rc < 0) {
+ idxd_free_desc(wq, desc);
return rc;
+ }
return cookie;
}
@@ -277,6 +289,14 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
mutex_lock(&wq->wq_lock);
wq->type = IDXD_WQT_KERNEL;
+
+ rc = idxd_wq_request_irq(wq);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR;
+ dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc);
+ goto err_irq;
+ }
+
rc = __drv_enable_wq(wq);
if (rc < 0) {
dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc);
@@ -310,13 +330,15 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
return 0;
err_dma:
- idxd_wq_quiesce(wq);
+ __idxd_wq_quiesce(wq);
percpu_ref_exit(&wq->wq_active);
err_ref:
idxd_wq_free_resources(wq);
err_res_alloc:
__drv_disable_wq(wq);
err:
+ idxd_wq_free_irq(wq);
+err_irq:
wq->type = IDXD_WQT_NONE;
mutex_unlock(&wq->wq_lock);
return rc;
@@ -327,11 +349,13 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev)
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
mutex_lock(&wq->wq_lock);
- idxd_wq_quiesce(wq);
+ __idxd_wq_quiesce(wq);
idxd_unregister_dma_channel(wq);
idxd_wq_free_resources(wq);
__drv_disable_wq(wq);
percpu_ref_exit(&wq->wq_active);
+ idxd_wq_free_irq(wq);
+ wq->type = IDXD_WQT_NONE;
mutex_unlock(&wq->wq_lock);
}
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 0cf8d3145870..da72eb15f610 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -10,6 +10,7 @@
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/pci.h>
+#include <linux/ioasid.h>
#include <linux/perf_event.h>
#include <uapi/linux/idxd.h>
#include "registers.h"
@@ -51,6 +52,9 @@ enum idxd_type {
#define IDXD_NAME_SIZE 128
#define IDXD_PMU_EVENT_MAX 64
+#define IDXD_ENQCMDS_RETRIES 32
+#define IDXD_ENQCMDS_MAX_RETRIES 64
+
struct idxd_device_driver {
const char *name;
enum idxd_dev_type *type;
@@ -64,8 +68,8 @@ extern struct idxd_device_driver idxd_drv;
extern struct idxd_device_driver idxd_dmaengine_drv;
extern struct idxd_device_driver idxd_user_drv;
+#define INVALID_INT_HANDLE -1
struct idxd_irq_entry {
- struct idxd_device *idxd;
int id;
int vector;
struct llist_head pending_llist;
@@ -75,6 +79,8 @@ struct idxd_irq_entry {
* and irq thread processing error descriptor.
*/
spinlock_t list_lock;
+ int int_handle;
+ ioasid_t pasid;
};
struct idxd_group {
@@ -84,9 +90,9 @@ struct idxd_group {
int id;
int num_engines;
int num_wqs;
- bool use_token_limit;
- u8 tokens_allowed;
- u8 tokens_reserved;
+ bool use_rdbuf_limit;
+ u8 rdbufs_allowed;
+ u8 rdbufs_reserved;
int tc_a;
int tc_b;
};
@@ -145,6 +151,10 @@ struct idxd_cdev {
#define WQ_NAME_SIZE 1024
#define WQ_TYPE_SIZE 10
+#define WQ_DEFAULT_QUEUE_DEPTH 16
+#define WQ_DEFAULT_MAX_XFER SZ_2M
+#define WQ_DEFAULT_MAX_BATCH 32
+
enum idxd_op_type {
IDXD_OP_BLOCK = 0,
IDXD_OP_NONBLOCK = 1,
@@ -164,13 +174,16 @@ struct idxd_dma_chan {
struct idxd_wq {
void __iomem *portal;
u32 portal_offset;
+ unsigned int enqcmds_retries;
struct percpu_ref wq_active;
struct completion wq_dead;
+ struct completion wq_resurrect;
struct idxd_dev idxd_dev;
struct idxd_cdev *idxd_cdev;
struct wait_queue_head err_queue;
struct idxd_device *idxd;
int id;
+ struct idxd_irq_entry ie;
enum idxd_wq_type type;
struct idxd_group *group;
int client_count;
@@ -251,6 +264,7 @@ struct idxd_device {
int id;
int major;
u32 cmd_status;
+ struct idxd_irq_entry ie; /* misc irq, msix 0 */
struct pci_dev *pdev;
void __iomem *reg_base;
@@ -266,6 +280,8 @@ struct idxd_device {
unsigned int pasid;
int num_groups;
+ int irq_cnt;
+ bool request_int_handles;
u32 msix_perm_offset;
u32 wqcfg_offset;
@@ -276,24 +292,20 @@ struct idxd_device {
u32 max_batch_size;
int max_groups;
int max_engines;
- int max_tokens;
+ int max_rdbufs;
int max_wqs;
int max_wq_size;
- int token_limit;
- int nr_tokens; /* non-reserved tokens */
+ int rdbuf_limit;
+ int nr_rdbufs; /* non-reserved read buffers */
unsigned int wqcfg_size;
union sw_err_reg sw_err;
wait_queue_head_t cmd_waitq;
- int num_wq_irqs;
- struct idxd_irq_entry *irq_entries;
struct idxd_dma_dev *idxd_dma;
struct workqueue_struct *wq;
struct work_struct work;
- int *int_handles;
-
struct idxd_pmu *idxd_pmu;
};
@@ -380,6 +392,21 @@ static inline void idxd_dev_set_type(struct idxd_dev *idev, int type)
idev->type = type;
}
+static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx)
+{
+ return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie;
+}
+
+static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie)
+{
+ return container_of(ie, struct idxd_wq, ie);
+}
+
+static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie)
+{
+ return container_of(ie, struct idxd_device, ie);
+}
+
extern struct bus_type dsa_bus_type;
extern bool support_enqcmd;
@@ -518,17 +545,13 @@ void idxd_unregister_devices(struct idxd_device *idxd);
int idxd_register_driver(void);
void idxd_unregister_driver(void);
void idxd_wqs_quiesce(struct idxd_device *idxd);
+bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc);
/* device interrupt control */
-void idxd_msix_perm_setup(struct idxd_device *idxd);
-void idxd_msix_perm_clear(struct idxd_device *idxd);
irqreturn_t idxd_misc_thread(int vec, void *data);
irqreturn_t idxd_wq_thread(int irq, void *data);
void idxd_mask_error_interrupts(struct idxd_device *idxd);
void idxd_unmask_error_interrupts(struct idxd_device *idxd);
-void idxd_mask_msix_vectors(struct idxd_device *idxd);
-void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
-void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
/* device control */
int idxd_register_idxd_drv(void);
@@ -564,13 +587,17 @@ int idxd_wq_map_portal(struct idxd_wq *wq);
void idxd_wq_unmap_portal(struct idxd_wq *wq);
int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
int idxd_wq_disable_pasid(struct idxd_wq *wq);
+void __idxd_wq_quiesce(struct idxd_wq *wq);
void idxd_wq_quiesce(struct idxd_wq *wq);
int idxd_wq_init_percpu_ref(struct idxd_wq *wq);
+void idxd_wq_free_irq(struct idxd_wq *wq);
+int idxd_wq_request_irq(struct idxd_wq *wq);
/* submission */
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype);
void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc);
+int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc);
/* dmaengine */
int idxd_register_dma_device(struct idxd_device *idxd);
@@ -579,7 +606,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq);
void idxd_unregister_dma_channel(struct idxd_wq *wq);
void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
void idxd_dma_complete_txd(struct idxd_desc *desc,
- enum idxd_complete_type comp_type);
+ enum idxd_complete_type comp_type, bool free_desc);
/* cdev */
int idxd_cdev_register(void);
@@ -603,10 +630,4 @@ static inline void perfmon_init(void) {}
static inline void perfmon_exit(void) {}
#endif
-static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason)
-{
- idxd_dma_complete_txd(desc, reason);
- idxd_free_desc(desc->wq, desc);
-}
-
#endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 7bf03f371ce1..08a5f4310188 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -72,7 +72,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
struct device *dev = &pdev->dev;
- struct idxd_irq_entry *irq_entry;
+ struct idxd_irq_entry *ie;
int i, msixcnt;
int rc = 0;
@@ -81,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
dev_err(dev, "Not MSI-X interrupt capable.\n");
return -ENOSPC;
}
+ idxd->irq_cnt = msixcnt;
rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
if (rc != msixcnt) {
@@ -89,87 +90,34 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
}
dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
- /*
- * We implement 1 completion list per MSI-X entry except for
- * entry 0, which is for errors and others.
- */
- idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry),
- GFP_KERNEL, dev_to_node(dev));
- if (!idxd->irq_entries) {
- rc = -ENOMEM;
- goto err_irq_entries;
- }
-
- for (i = 0; i < msixcnt; i++) {
- idxd->irq_entries[i].id = i;
- idxd->irq_entries[i].idxd = idxd;
- idxd->irq_entries[i].vector = pci_irq_vector(pdev, i);
- spin_lock_init(&idxd->irq_entries[i].list_lock);
- }
-
- idxd_msix_perm_setup(idxd);
- irq_entry = &idxd->irq_entries[0];
- rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread,
- 0, "idxd-misc", irq_entry);
+ ie = idxd_get_ie(idxd, 0);
+ ie->vector = pci_irq_vector(pdev, 0);
+ rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie);
if (rc < 0) {
dev_err(dev, "Failed to allocate misc interrupt.\n");
goto err_misc_irq;
}
+ dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector);
- dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector);
-
- /* first MSI-X entry is not for wq interrupts */
- idxd->num_wq_irqs = msixcnt - 1;
+ for (i = 0; i < idxd->max_wqs; i++) {
+ int msix_idx = i + 1;
- for (i = 1; i < msixcnt; i++) {
- irq_entry = &idxd->irq_entries[i];
+ ie = idxd_get_ie(idxd, msix_idx);
+ ie->id = msix_idx;
+ ie->int_handle = INVALID_INT_HANDLE;
+ ie->pasid = INVALID_IOASID;
- init_llist_head(&idxd->irq_entries[i].pending_llist);
- INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
- rc = request_threaded_irq(irq_entry->vector, NULL,
- idxd_wq_thread, 0, "idxd-portal", irq_entry);
- if (rc < 0) {
- dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector);
- goto err_wq_irqs;
- }
-
- dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector);
- if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
- /*
- * The MSIX vector enumeration starts at 1 with vector 0 being the
- * misc interrupt that handles non I/O completion events. The
- * interrupt handles are for IMS enumeration on guest. The misc
- * interrupt vector does not require a handle and therefore we start
- * the int_handles at index 0. Since 'i' starts at 1, the first
- * int_handles index will be 0.
- */
- rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1],
- IDXD_IRQ_MSIX);
- if (rc < 0) {
- free_irq(irq_entry->vector, irq_entry);
- goto err_wq_irqs;
- }
- dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]);
- }
+ spin_lock_init(&ie->list_lock);
+ init_llist_head(&ie->pending_llist);
+ INIT_LIST_HEAD(&ie->work_list);
}
idxd_unmask_error_interrupts(idxd);
return 0;
- err_wq_irqs:
- while (--i >= 0) {
- irq_entry = &idxd->irq_entries[i];
- free_irq(irq_entry->vector, irq_entry);
- if (i != 0)
- idxd_device_release_int_handle(idxd,
- idxd->int_handles[i], IDXD_IRQ_MSIX);
- }
err_misc_irq:
- /* Disable error interrupt generation */
idxd_mask_error_interrupts(idxd);
- idxd_msix_perm_clear(idxd);
- err_irq_entries:
pci_free_irq_vectors(pdev);
dev_err(dev, "No usable interrupts\n");
return rc;
@@ -178,26 +126,16 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
static void idxd_cleanup_interrupts(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
- struct idxd_irq_entry *irq_entry;
- int i, msixcnt;
+ struct idxd_irq_entry *ie;
+ int msixcnt;
msixcnt = pci_msix_vec_count(pdev);
if (msixcnt <= 0)
return;
- irq_entry = &idxd->irq_entries[0];
- free_irq(irq_entry->vector, irq_entry);
-
- for (i = 1; i < msixcnt; i++) {
-
- irq_entry = &idxd->irq_entries[i];
- if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
- idxd_device_release_int_handle(idxd, idxd->int_handles[i],
- IDXD_IRQ_MSIX);
- free_irq(irq_entry->vector, irq_entry);
- }
-
+ ie = idxd_get_ie(idxd, 0);
idxd_mask_error_interrupts(idxd);
+ free_irq(ie->vector, ie);
pci_free_irq_vectors(pdev);
}
@@ -237,8 +175,10 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
mutex_init(&wq->wq_lock);
init_waitqueue_head(&wq->err_queue);
init_completion(&wq->wq_dead);
- wq->max_xfer_bytes = idxd->max_xfer_bytes;
- wq->max_batch_size = idxd->max_batch_size;
+ init_completion(&wq->wq_resurrect);
+ wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
+ wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
+ wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
if (!wq->wqcfg) {
put_device(conf_dev);
@@ -379,13 +319,6 @@ static int idxd_setup_internals(struct idxd_device *idxd)
init_waitqueue_head(&idxd->cmd_waitq);
- if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
- idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL,
- dev_to_node(dev));
- if (!idxd->int_handles)
- return -ENOMEM;
- }
-
rc = idxd_setup_wqs(idxd);
if (rc < 0)
goto err_wqs;
@@ -416,7 +349,6 @@ static int idxd_setup_internals(struct idxd_device *idxd)
for (i = 0; i < idxd->max_wqs; i++)
put_device(wq_confdev(idxd->wqs[i]));
err_wqs:
- kfree(idxd->int_handles);
return rc;
}
@@ -451,6 +383,10 @@ static void idxd_read_caps(struct idxd_device *idxd)
dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
}
+ /* reading command capabilities */
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))
+ idxd->request_int_handles = true;
+
idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
@@ -464,9 +400,9 @@ static void idxd_read_caps(struct idxd_device *idxd)
dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
idxd->max_groups = idxd->hw.group_cap.num_groups;
dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
- idxd->max_tokens = idxd->hw.group_cap.total_tokens;
- dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
- idxd->nr_tokens = idxd->max_tokens;
+ idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs;
+ dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs);
+ idxd->nr_rdbufs = idxd->max_rdbufs;
/* read engine capabilities */
idxd->hw.engine_cap.bits =
@@ -611,8 +547,6 @@ static int idxd_probe(struct idxd_device *idxd)
if (rc)
goto err_config;
- dev_dbg(dev, "IDXD interrupt setup complete.\n");
-
idxd->major = idxd_cdev_get_major(idxd);
rc = perfmon_pmu_init(idxd);
@@ -708,32 +642,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return rc;
}
-static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
-{
- struct idxd_desc *desc, *itr;
- struct llist_node *head;
-
- head = llist_del_all(&ie->pending_llist);
- if (!head)
- return;
-
- llist_for_each_entry_safe(desc, itr, head, llnode) {
- idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
- idxd_free_desc(desc->wq, desc);
- }
-}
-
-static void idxd_flush_work_list(struct idxd_irq_entry *ie)
-{
- struct idxd_desc *desc, *iter;
-
- list_for_each_entry_safe(desc, iter, &ie->work_list, list) {
- list_del(&desc->list);
- idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
- idxd_free_desc(desc->wq, desc);
- }
-}
-
void idxd_wqs_quiesce(struct idxd_device *idxd)
{
struct idxd_wq *wq;
@@ -746,47 +654,19 @@ void idxd_wqs_quiesce(struct idxd_device *idxd)
}
}
-static void idxd_release_int_handles(struct idxd_device *idxd)
-{
- struct device *dev = &idxd->pdev->dev;
- int i, rc;
-
- for (i = 0; i < idxd->num_wq_irqs; i++) {
- if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) {
- rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i],
- IDXD_IRQ_MSIX);
- if (rc < 0)
- dev_warn(dev, "irq handle %d release failed\n",
- idxd->int_handles[i]);
- else
- dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]);
- }
- }
-}
-
static void idxd_shutdown(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
- int rc, i;
struct idxd_irq_entry *irq_entry;
- int msixcnt = pci_msix_vec_count(pdev);
+ int rc;
rc = idxd_device_disable(idxd);
if (rc)
dev_err(&pdev->dev, "Disabling device failed\n");
- dev_dbg(&pdev->dev, "%s called\n", __func__);
- idxd_mask_msix_vectors(idxd);
+ irq_entry = &idxd->ie;
+ synchronize_irq(irq_entry->vector);
idxd_mask_error_interrupts(idxd);
-
- for (i = 0; i < msixcnt; i++) {
- irq_entry = &idxd->irq_entries[i];
- synchronize_irq(irq_entry->vector);
- if (i == 0)
- continue;
- idxd_flush_pending_llist(irq_entry);
- idxd_flush_work_list(irq_entry);
- }
flush_workqueue(idxd->wq);
}
@@ -794,8 +674,6 @@ static void idxd_remove(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
struct idxd_irq_entry *irq_entry;
- int msixcnt = pci_msix_vec_count(pdev);
- int i;
idxd_unregister_devices(idxd);
/*
@@ -811,12 +689,8 @@ static void idxd_remove(struct pci_dev *pdev)
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
- for (i = 0; i < msixcnt; i++) {
- irq_entry = &idxd->irq_entries[i];
- free_irq(irq_entry->vector, irq_entry);
- }
- idxd_msix_perm_clear(idxd);
- idxd_release_int_handles(idxd);
+ irq_entry = idxd_get_ie(idxd, 0);
+ free_irq(irq_entry->vector, irq_entry);
pci_free_irq_vectors(pdev);
pci_iounmap(pdev, idxd->reg_base);
iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index cf2c8bc4f147..743ead5ebc57 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -6,6 +6,7 @@
#include <linux/pci.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/dmaengine.h>
+#include <linux/delay.h>
#include <uapi/linux/idxd.h>
#include "../dmaengine.h"
#include "idxd.h"
@@ -22,6 +23,16 @@ struct idxd_fault {
struct idxd_device *idxd;
};
+struct idxd_resubmit {
+ struct work_struct work;
+ struct idxd_desc *desc;
+};
+
+struct idxd_int_handle_revoke {
+ struct work_struct work;
+ struct idxd_device *idxd;
+};
+
static void idxd_device_reinit(struct work_struct *work)
{
struct idxd_device *idxd = container_of(work, struct idxd_device, work);
@@ -55,6 +66,162 @@ static void idxd_device_reinit(struct work_struct *work)
idxd_device_clear_state(idxd);
}
+/*
+ * The function sends a drain descriptor for the interrupt handle. The drain ensures
+ * all descriptors with this interrupt handle is flushed and the interrupt
+ * will allow the cleanup of the outstanding descriptors.
+ */
+static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie)
+{
+ struct idxd_wq *wq = ie_to_wq(ie);
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ struct dsa_hw_desc desc = {};
+ void __iomem *portal;
+ int rc;
+
+ /* Issue a simple drain operation with interrupt but no completion record */
+ desc.flags = IDXD_OP_FLAG_RCI;
+ desc.opcode = DSA_OPCODE_DRAIN;
+ desc.priv = 1;
+
+ if (ie->pasid != INVALID_IOASID)
+ desc.pasid = ie->pasid;
+ desc.int_handle = ie->int_handle;
+ portal = idxd_wq_portal_addr(wq);
+
+ /*
+ * The wmb() makes sure that the descriptor is all there before we
+ * issue.
+ */
+ wmb();
+ if (wq_dedicated(wq)) {
+ iosubmit_cmds512(portal, &desc, 1);
+ } else {
+ rc = idxd_enqcmds(wq, portal, &desc);
+ /* This should not fail unless hardware failed. */
+ if (rc < 0)
+ dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id);
+ }
+}
+
+static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie)
+{
+ LIST_HEAD(flist);
+ struct idxd_desc *d, *t;
+ struct llist_node *head;
+
+ spin_lock(&ie->list_lock);
+ head = llist_del_all(&ie->pending_llist);
+ if (head) {
+ llist_for_each_entry_safe(d, t, head, llnode)
+ list_add_tail(&d->list, &ie->work_list);
+ }
+
+ list_for_each_entry_safe(d, t, &ie->work_list, list) {
+ if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL)
+ list_move_tail(&d->list, &flist);
+ }
+ spin_unlock(&ie->list_lock);
+
+ list_for_each_entry_safe(d, t, &flist, list) {
+ list_del(&d->list);
+ idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true);
+ }
+}
+
+static void idxd_int_handle_revoke(struct work_struct *work)
+{
+ struct idxd_int_handle_revoke *revoke =
+ container_of(work, struct idxd_int_handle_revoke, work);
+ struct idxd_device *idxd = revoke->idxd;
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ int i, new_handle, rc;
+
+ if (!idxd->request_int_handles) {
+ kfree(revoke);
+ dev_warn(dev, "Unexpected int handle refresh interrupt.\n");
+ return;
+ }
+
+ /*
+ * The loop attempts to acquire new interrupt handle for all interrupt
+ * vectors that supports a handle. If a new interrupt handle is acquired and the
+ * wq is kernel type, the driver will kill the percpu_ref to pause all
+ * ongoing descriptor submissions. The interrupt handle is then changed.
+ * After change, the percpu_ref is revived and all the pending submissions
+ * are woken to try again. A drain is sent to for the interrupt handle
+ * at the end to make sure all invalid int handle descriptors are processed.
+ */
+ for (i = 1; i < idxd->irq_cnt; i++) {
+ struct idxd_irq_entry *ie = idxd_get_ie(idxd, i);
+ struct idxd_wq *wq = ie_to_wq(ie);
+
+ if (ie->int_handle == INVALID_INT_HANDLE)
+ continue;
+
+ rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX);
+ if (rc < 0) {
+ dev_warn(dev, "get int handle %d failed: %d\n", i, rc);
+ /*
+ * Failed to acquire new interrupt handle. Kill the WQ
+ * and release all the pending submitters. The submitters will
+ * get error return code and handle appropriately.
+ */
+ ie->int_handle = INVALID_INT_HANDLE;
+ idxd_wq_quiesce(wq);
+ idxd_abort_invalid_int_handle_descs(ie);
+ continue;
+ }
+
+ /* No change in interrupt handle, nothing needs to be done */
+ if (ie->int_handle == new_handle)
+ continue;
+
+ if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) {
+ /*
+ * All the MSIX interrupts are allocated at once during probe.
+ * Therefore we need to update all interrupts even if the WQ
+ * isn't supporting interrupt operations.
+ */
+ ie->int_handle = new_handle;
+ continue;
+ }
+
+ mutex_lock(&wq->wq_lock);
+ reinit_completion(&wq->wq_resurrect);
+
+ /* Kill percpu_ref to pause additional descriptor submissions */
+ percpu_ref_kill(&wq->wq_active);
+
+ /* Wait for all submitters quiesce before we change interrupt handle */
+ wait_for_completion(&wq->wq_dead);
+
+ ie->int_handle = new_handle;
+
+ /* Revive percpu ref and wake up all the waiting submitters */
+ percpu_ref_reinit(&wq->wq_active);
+ complete_all(&wq->wq_resurrect);
+ mutex_unlock(&wq->wq_lock);
+
+ /*
+ * The delay here is to wait for all possible MOVDIR64B that
+ * are issued before percpu_ref_kill() has happened to have
+ * reached the PCIe domain before the drain is issued. The driver
+ * needs to ensure that the drain descriptor issued does not pass
+ * all the other issued descriptors that contain the invalid
+ * interrupt handle in order to ensure that the drain descriptor
+ * interrupt will allow the cleanup of all the descriptors with
+ * invalid interrupt handle.
+ */
+ if (wq_dedicated(wq))
+ udelay(100);
+ idxd_int_handle_revoke_drain(ie);
+ }
+ kfree(revoke);
+}
+
static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
{
struct device *dev = &idxd->pdev->dev;
@@ -101,6 +268,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
err = true;
}
+ if (cause & IDXD_INTC_INT_HANDLE_REVOKED) {
+ struct idxd_int_handle_revoke *revoke;
+
+ val |= IDXD_INTC_INT_HANDLE_REVOKED;
+
+ revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC);
+ if (revoke) {
+ revoke->idxd = idxd;
+ INIT_WORK(&revoke->work, idxd_int_handle_revoke);
+ queue_work(idxd->wq, &revoke->work);
+
+ } else {
+ dev_err(dev, "Failed to allocate work for int handle revoke\n");
+ idxd_wqs_quiesce(idxd);
+ }
+ }
+
if (cause & IDXD_INTC_CMD) {
val |= IDXD_INTC_CMD;
complete(idxd->cmd_done);
@@ -157,7 +341,7 @@ halt:
irqreturn_t idxd_misc_thread(int vec, void *data)
{
struct idxd_irq_entry *irq_entry = data;
- struct idxd_device *idxd = irq_entry->idxd;
+ struct idxd_device *idxd = ie_to_idxd(irq_entry);
int rc;
u32 cause;
@@ -177,6 +361,51 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
return IRQ_HANDLED;
}
+static void idxd_int_handle_resubmit_work(struct work_struct *work)
+{
+ struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work);
+ struct idxd_desc *desc = irw->desc;
+ struct idxd_wq *wq = desc->wq;
+ int rc;
+
+ desc->completion->status = 0;
+ rc = idxd_submit_desc(wq, desc);
+ if (rc < 0) {
+ dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n",
+ desc->id, wq->id);
+ /*
+ * If the error is not -EAGAIN, it means the submission failed due to wq
+ * has been killed instead of ENQCMDS failure. Here the driver needs to
+ * notify the submitter of the failure by reporting abort status.
+ *
+ * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the
+ * abort.
+ */
+ if (rc != -EAGAIN) {
+ desc->completion->status = IDXD_COMP_DESC_ABORT;
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false);
+ }
+ idxd_free_desc(wq, desc);
+ }
+ kfree(irw);
+}
+
+bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc)
+{
+ struct idxd_wq *wq = desc->wq;
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_resubmit *irw;
+
+ irw = kzalloc(sizeof(*irw), GFP_KERNEL);
+ if (!irw)
+ return false;
+
+ irw->desc = desc;
+ INIT_WORK(&irw->work, idxd_int_handle_resubmit_work);
+ queue_work(idxd->wq, &irw->work);
+ return true;
+}
+
static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
{
struct idxd_desc *desc, *t;
@@ -195,11 +424,11 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
*/
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
- complete_desc(desc, IDXD_COMPLETE_ABORT);
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
continue;
}
- complete_desc(desc, IDXD_COMPLETE_NORMAL);
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
} else {
spin_lock(&irq_entry->list_lock);
list_add_tail(&desc->list,
@@ -238,11 +467,11 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
*/
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
- complete_desc(desc, IDXD_COMPLETE_ABORT);
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
continue;
}
- complete_desc(desc, IDXD_COMPLETE_NORMAL);
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
}
}
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index 262c8220adbd..aa642aecdc0b 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -64,9 +64,9 @@ union wq_cap_reg {
union group_cap_reg {
struct {
u64 num_groups:8;
- u64 total_tokens:8;
- u64 token_en:1;
- u64 token_limit:1;
+ u64 total_rdbufs:8; /* formerly total_tokens */
+ u64 rdbuf_ctrl:1; /* formerly token_en */
+ u64 rdbuf_limit:1; /* formerly token_limit */
u64 rsvd:46;
};
u64 bits;
@@ -110,7 +110,7 @@ union offsets_reg {
#define IDXD_GENCFG_OFFSET 0x80
union gencfg_reg {
struct {
- u32 token_limit:8;
+ u32 rdbuf_limit:8;
u32 rsvd:4;
u32 user_int_en:1;
u32 rsvd2:19;
@@ -158,6 +158,7 @@ enum idxd_device_reset_type {
#define IDXD_INTC_OCCUPY 0x04
#define IDXD_INTC_PERFMON_OVFL 0x08
#define IDXD_INTC_HALT_STATE 0x10
+#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000
#define IDXD_CMD_OFFSET 0xa0
union idxd_command_reg {
@@ -287,10 +288,10 @@ union group_flags {
u32 tc_a:3;
u32 tc_b:3;
u32 rsvd:1;
- u32 use_token_limit:1;
- u32 tokens_reserved:8;
+ u32 use_rdbuf_limit:1;
+ u32 rdbufs_reserved:8;
u32 rsvd2:4;
- u32 tokens_allowed:8;
+ u32 rdbufs_allowed:8;
u32 rsvd3:4;
};
u32 bits;
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 83452fbbb168..e289fd48711a 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -21,15 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
if (device_pasid_enabled(idxd))
desc->hw->pasid = idxd->pasid;
- /*
- * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match
- * vector 1:1 to the WQ id, we need to add 1
- */
- if (!idxd->int_handles)
- desc->hw->int_handle = wq->id + 1;
- else
- desc->hw->int_handle = idxd->int_handles[wq->id];
-
return desc;
}
@@ -134,35 +125,58 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
spin_unlock(&ie->list_lock);
if (found)
- complete_desc(found, IDXD_COMPLETE_ABORT);
+ idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
/*
- * complete_desc() will return desc to allocator and the desc can be
- * acquired by a different process and the desc->list can be modified.
- * Delete desc from list so the list trasversing does not get corrupted
- * by the other process.
+ * completing the descriptor will return desc to allocator and
+ * the desc can be acquired by a different process and the
+ * desc->list can be modified. Delete desc from list so the
+ * list trasversing does not get corrupted by the other process.
*/
list_for_each_entry_safe(d, t, &flist, list) {
list_del_init(&d->list);
- complete_desc(d, IDXD_COMPLETE_NORMAL);
+ idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
}
}
+/*
+ * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
+ * has better control of number of descriptors being submitted to a shared wq by limiting
+ * the number of driver allocated descriptors to the wq size. However, when the swq is
+ * exported to a guest kernel, it may be shared with multiple guest kernels. This means
+ * the likelihood of getting busy returned on the swq when submitting goes significantly up.
+ * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
+ * up. The sysfs knob can be tuned by the system administrator.
+ */
+int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
+{
+ int rc, retries = 0;
+
+ do {
+ rc = enqcmds(portal, desc);
+ if (rc == 0)
+ break;
+ cpu_relax();
+ } while (retries++ < wq->enqcmds_retries);
+
+ return rc;
+}
+
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
{
struct idxd_device *idxd = wq->idxd;
struct idxd_irq_entry *ie = NULL;
+ u32 desc_flags = desc->hw->flags;
void __iomem *portal;
int rc;
- if (idxd->state != IDXD_DEV_ENABLED) {
- idxd_free_desc(wq, desc);
+ if (idxd->state != IDXD_DEV_ENABLED)
return -EIO;
- }
if (!percpu_ref_tryget_live(&wq->wq_active)) {
- idxd_free_desc(wq, desc);
- return -ENXIO;
+ wait_for_completion(&wq->wq_resurrect);
+ if (!percpu_ref_tryget_live(&wq->wq_active))
+ return -ENXIO;
}
portal = idxd_wq_portal_addr(wq);
@@ -178,28 +192,21 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
* Pending the descriptor to the lockless list for the irq_entry
* that we designated the descriptor to.
*/
- if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
- ie = &idxd->irq_entries[wq->id + 1];
+ if (desc_flags & IDXD_OP_FLAG_RCI) {
+ ie = &wq->ie;
+ desc->hw->int_handle = ie->int_handle;
llist_add(&desc->llnode, &ie->pending_llist);
}
if (wq_dedicated(wq)) {
iosubmit_cmds512(portal, desc->hw, 1);
} else {
- /*
- * It's not likely that we would receive queue full rejection
- * since the descriptor allocation gates at wq size. If we
- * receive a -EAGAIN, that means something went wrong such as the
- * device is not accepting descriptor at all.
- */
- rc = enqcmds(portal, desc->hw);
+ rc = idxd_enqcmds(wq, portal, desc->hw);
if (rc < 0) {
percpu_ref_put(&wq->wq_active);
/* abort operation frees the descriptor */
if (ie)
llist_abort_desc(wq, ie, desc);
- else
- idxd_free_desc(wq, desc);
return rc;
}
}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index a9025be940db..7e19ab92b61a 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -99,31 +99,39 @@ struct device_type idxd_engine_device_type = {
/* Group attributes */
-static void idxd_set_free_tokens(struct idxd_device *idxd)
+static void idxd_set_free_rdbufs(struct idxd_device *idxd)
{
- int i, tokens;
+ int i, rdbufs;
- for (i = 0, tokens = 0; i < idxd->max_groups; i++) {
+ for (i = 0, rdbufs = 0; i < idxd->max_groups; i++) {
struct idxd_group *g = idxd->groups[i];
- tokens += g->tokens_reserved;
+ rdbufs += g->rdbufs_reserved;
}
- idxd->nr_tokens = idxd->max_tokens - tokens;
+ idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs;
+}
+
+static ssize_t group_read_buffers_reserved_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->rdbufs_reserved);
}
static ssize_t group_tokens_reserved_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct idxd_group *group = confdev_to_group(dev);
-
- return sysfs_emit(buf, "%u\n", group->tokens_reserved);
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n");
+ return group_read_buffers_reserved_show(dev, attr, buf);
}
-static ssize_t group_tokens_reserved_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t group_read_buffers_reserved_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct idxd_group *group = confdev_to_group(dev);
struct idxd_device *idxd = group->idxd;
@@ -143,33 +151,53 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
if (idxd->state == IDXD_DEV_ENABLED)
return -EPERM;
- if (val > idxd->max_tokens)
+ if (val > idxd->max_rdbufs)
return -EINVAL;
- if (val > idxd->nr_tokens + group->tokens_reserved)
+ if (val > idxd->nr_rdbufs + group->rdbufs_reserved)
return -EINVAL;
- group->tokens_reserved = val;
- idxd_set_free_tokens(idxd);
+ group->rdbufs_reserved = val;
+ idxd_set_free_rdbufs(idxd);
return count;
}
+static ssize_t group_tokens_reserved_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n");
+ return group_read_buffers_reserved_store(dev, attr, buf, count);
+}
+
static struct device_attribute dev_attr_group_tokens_reserved =
__ATTR(tokens_reserved, 0644, group_tokens_reserved_show,
group_tokens_reserved_store);
+static struct device_attribute dev_attr_group_read_buffers_reserved =
+ __ATTR(read_buffers_reserved, 0644, group_read_buffers_reserved_show,
+ group_read_buffers_reserved_store);
+
+static ssize_t group_read_buffers_allowed_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->rdbufs_allowed);
+}
+
static ssize_t group_tokens_allowed_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct idxd_group *group = confdev_to_group(dev);
-
- return sysfs_emit(buf, "%u\n", group->tokens_allowed);
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n");
+ return group_read_buffers_allowed_show(dev, attr, buf);
}
-static ssize_t group_tokens_allowed_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t group_read_buffers_allowed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct idxd_group *group = confdev_to_group(dev);
struct idxd_device *idxd = group->idxd;
@@ -190,29 +218,49 @@ static ssize_t group_tokens_allowed_store(struct device *dev,
return -EPERM;
if (val < 4 * group->num_engines ||
- val > group->tokens_reserved + idxd->nr_tokens)
+ val > group->rdbufs_reserved + idxd->nr_rdbufs)
return -EINVAL;
- group->tokens_allowed = val;
+ group->rdbufs_allowed = val;
return count;
}
+static ssize_t group_tokens_allowed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n");
+ return group_read_buffers_allowed_store(dev, attr, buf, count);
+}
+
static struct device_attribute dev_attr_group_tokens_allowed =
__ATTR(tokens_allowed, 0644, group_tokens_allowed_show,
group_tokens_allowed_store);
+static struct device_attribute dev_attr_group_read_buffers_allowed =
+ __ATTR(read_buffers_allowed, 0644, group_read_buffers_allowed_show,
+ group_read_buffers_allowed_store);
+
+static ssize_t group_use_read_buffer_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit);
+}
+
static ssize_t group_use_token_limit_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct idxd_group *group = confdev_to_group(dev);
-
- return sysfs_emit(buf, "%u\n", group->use_token_limit);
+ dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n");
+ return group_use_read_buffer_limit_show(dev, attr, buf);
}
-static ssize_t group_use_token_limit_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t group_use_read_buffer_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct idxd_group *group = confdev_to_group(dev);
struct idxd_device *idxd = group->idxd;
@@ -232,17 +280,29 @@ static ssize_t group_use_token_limit_store(struct device *dev,
if (idxd->state == IDXD_DEV_ENABLED)
return -EPERM;
- if (idxd->token_limit == 0)
+ if (idxd->rdbuf_limit == 0)
return -EPERM;
- group->use_token_limit = !!val;
+ group->use_rdbuf_limit = !!val;
return count;
}
+static ssize_t group_use_token_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n");
+ return group_use_read_buffer_limit_store(dev, attr, buf, count);
+}
+
static struct device_attribute dev_attr_group_use_token_limit =
__ATTR(use_token_limit, 0644, group_use_token_limit_show,
group_use_token_limit_store);
+static struct device_attribute dev_attr_group_use_read_buffer_limit =
+ __ATTR(use_read_buffer_limit, 0644, group_use_read_buffer_limit_show,
+ group_use_read_buffer_limit_store);
+
static ssize_t group_engines_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -387,8 +447,11 @@ static struct attribute *idxd_group_attributes[] = {
&dev_attr_group_work_queues.attr,
&dev_attr_group_engines.attr,
&dev_attr_group_use_token_limit.attr,
+ &dev_attr_group_use_read_buffer_limit.attr,
&dev_attr_group_tokens_allowed.attr,
+ &dev_attr_group_read_buffers_allowed.attr,
&dev_attr_group_tokens_reserved.attr,
+ &dev_attr_group_read_buffers_reserved.attr,
&dev_attr_group_traffic_class_a.attr,
&dev_attr_group_traffic_class_b.attr,
NULL,
@@ -945,6 +1008,41 @@ static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *at
static struct device_attribute dev_attr_wq_occupancy =
__ATTR(occupancy, 0444, wq_occupancy_show, NULL);
+static ssize_t wq_enqcmds_retries_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ if (wq_dedicated(wq))
+ return -EOPNOTSUPP;
+
+ return sysfs_emit(buf, "%u\n", wq->enqcmds_retries);
+}
+
+static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ int rc;
+ unsigned int retries;
+
+ if (wq_dedicated(wq))
+ return -EOPNOTSUPP;
+
+ rc = kstrtouint(buf, 10, &retries);
+ if (rc < 0)
+ return rc;
+
+ if (retries > IDXD_ENQCMDS_MAX_RETRIES)
+ retries = IDXD_ENQCMDS_MAX_RETRIES;
+
+ wq->enqcmds_retries = retries;
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_enqcmds_retries =
+ __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store);
+
static struct attribute *idxd_wq_attributes[] = {
&dev_attr_wq_clients.attr,
&dev_attr_wq_state.attr,
@@ -961,6 +1059,7 @@ static struct attribute *idxd_wq_attributes[] = {
&dev_attr_wq_max_batch_size.attr,
&dev_attr_wq_ats_disable.attr,
&dev_attr_wq_occupancy.attr,
+ &dev_attr_wq_enqcmds_retries.attr,
NULL,
};
@@ -1156,26 +1255,42 @@ static ssize_t errors_show(struct device *dev,
}
static DEVICE_ATTR_RO(errors);
+static ssize_t max_read_buffers_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->max_rdbufs);
+}
+
static ssize_t max_tokens_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ dev_warn_once(dev, "attribute deprecated, see max_read_buffers.\n");
+ return max_read_buffers_show(dev, attr, buf);
+}
+
+static DEVICE_ATTR_RO(max_tokens); /* deprecated */
+static DEVICE_ATTR_RO(max_read_buffers);
+
+static ssize_t read_buffer_limit_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
struct idxd_device *idxd = confdev_to_idxd(dev);
- return sysfs_emit(buf, "%u\n", idxd->max_tokens);
+ return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit);
}
-static DEVICE_ATTR_RO(max_tokens);
static ssize_t token_limit_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct idxd_device *idxd = confdev_to_idxd(dev);
-
- return sysfs_emit(buf, "%u\n", idxd->token_limit);
+ dev_warn_once(dev, "attribute deprecated, see read_buffer_limit.\n");
+ return read_buffer_limit_show(dev, attr, buf);
}
-static ssize_t token_limit_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t read_buffer_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct idxd_device *idxd = confdev_to_idxd(dev);
unsigned long val;
@@ -1191,16 +1306,26 @@ static ssize_t token_limit_store(struct device *dev,
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
return -EPERM;
- if (!idxd->hw.group_cap.token_limit)
+ if (!idxd->hw.group_cap.rdbuf_limit)
return -EPERM;
- if (val > idxd->hw.group_cap.total_tokens)
+ if (val > idxd->hw.group_cap.total_rdbufs)
return -EINVAL;
- idxd->token_limit = val;
+ idxd->rdbuf_limit = val;
return count;
}
-static DEVICE_ATTR_RW(token_limit);
+
+static ssize_t token_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffer_limit\n");
+ return read_buffer_limit_store(dev, attr, buf, count);
+}
+
+static DEVICE_ATTR_RW(token_limit); /* deprecated */
+static DEVICE_ATTR_RW(read_buffer_limit);
static ssize_t cdev_major_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -1246,7 +1371,9 @@ static struct attribute *idxd_device_attributes[] = {
&dev_attr_state.attr,
&dev_attr_errors.attr,
&dev_attr_max_tokens.attr,
+ &dev_attr_max_read_buffers.attr,
&dev_attr_token_limit.attr,
+ &dev_attr_read_buffer_limit.attr,
&dev_attr_cdev_major.attr,
&dev_attr_cmd_status.attr,
NULL,
@@ -1268,8 +1395,6 @@ static void idxd_conf_device_release(struct device *dev)
kfree(idxd->groups);
kfree(idxd->wqs);
kfree(idxd->engines);
- kfree(idxd->irq_entries);
- kfree(idxd->int_handles);
ida_free(&idxd_ida, idxd->id);
kfree(idxd);
}