summaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/habanalabs/command_submission.c24
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c33
-rw-r--r--drivers/misc/habanalabs/goya/goya.c34
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h6
-rw-r--r--drivers/misc/habanalabs/habanalabs.h10
-rw-r--r--drivers/misc/habanalabs/hw_queue.c19
6 files changed, 83 insertions, 43 deletions
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 777f88d25acd..7769a1aacca1 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -363,6 +363,7 @@ static void cs_do_release(struct kref *ref)
cs_counters_aggregate(hdev, cs->ctx);
+ kfree(cs->jobs_in_queue_cnt);
kfree(cs);
}
@@ -435,13 +436,19 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
other = ctx->cs_pending[cs_cmpl->cs_seq &
(hdev->asic_prop.max_pending_cs - 1)];
if ((other) && (!dma_fence_is_signaled(other))) {
- spin_unlock(&ctx->cs_lock);
dev_dbg(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
rc = -EAGAIN;
goto free_fence;
}
+ cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+ sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+ if (!cs->jobs_in_queue_cnt) {
+ rc = -ENOMEM;
+ goto free_fence;
+ }
+
dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
ctx->asid, ctx->cs_sequence);
@@ -463,6 +470,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
return 0;
free_fence:
+ spin_unlock(&ctx->cs_lock);
kfree(cs_cmpl);
free_cs:
kfree(cs);
@@ -515,10 +523,18 @@ static int validate_queue_index(struct hl_device *hdev,
struct asic_fixed_properties *asic = &hdev->asic_prop;
struct hw_queue_properties *hw_queue_prop;
+ /* This must be checked here to prevent out-of-bounds access to
+ * hw_queues_props array
+ */
+ if (chunk->queue_index >= asic->max_queues) {
+ dev_err(hdev->dev, "Queue index %d is invalid\n",
+ chunk->queue_index);
+ return -EINVAL;
+ }
+
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
- if ((chunk->queue_index >= HL_MAX_QUEUES) ||
- (hw_queue_prop->type == QUEUE_TYPE_NA)) {
+ if (hw_queue_prop->type == QUEUE_TYPE_NA) {
dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index);
return -EINVAL;
@@ -795,7 +811,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
q_type = hw_queue_prop->type;
- if ((q_idx >= HL_MAX_QUEUES) ||
+ if ((q_idx >= hdev->asic_prop.max_queues) ||
(!hw_queue_prop->supports_sync_stream)) {
dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
rc = -EINVAL;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index eede6c33a37f..7eee4a10154b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -340,14 +340,15 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i;
- if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
- dev_err(hdev->dev,
- "Number of H/W queues must be smaller than %d\n",
- HL_MAX_QUEUES);
- return -EFAULT;
- }
+ prop->max_queues = GAUDI_QUEUE_ID_SIZE;
+ prop->hw_queues_props = kcalloc(prop->max_queues,
+ sizeof(struct hw_queue_properties),
+ GFP_KERNEL);
- for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
+ if (!prop->hw_queues_props)
+ return -ENOMEM;
+
+ for (i = 0 ; i < prop->max_queues ; i++) {
if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
prop->hw_queues_props[i].driver_only = 0;
@@ -370,9 +371,6 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
}
}
- for (; i < HL_MAX_QUEUES; i++)
- prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
-
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->sync_stream_first_sob = 0;
prop->sync_stream_first_mon = 0;
@@ -548,7 +546,8 @@ static int gaudi_early_init(struct hl_device *hdev)
(unsigned long long) pci_resource_len(pdev,
SRAM_BAR_ID),
SRAM_BAR_SIZE);
- return -ENODEV;
+ rc = -ENODEV;
+ goto free_queue_props;
}
if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
@@ -558,20 +557,26 @@ static int gaudi_early_init(struct hl_device *hdev)
(unsigned long long) pci_resource_len(pdev,
CFG_BAR_ID),
CFG_BAR_SIZE);
- return -ENODEV;
+ rc = -ENODEV;
+ goto free_queue_props;
}
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
rc = hl_pci_init(hdev);
if (rc)
- return rc;
+ goto free_queue_props;
return 0;
+
+free_queue_props:
+ kfree(hdev->asic_prop.hw_queues_props);
+ return rc;
}
static int gaudi_early_fini(struct hl_device *hdev)
{
+ kfree(hdev->asic_prop.hw_queues_props);
hl_pci_fini(hdev);
return 0;
@@ -3461,7 +3466,7 @@ static int gaudi_test_queues(struct hl_device *hdev)
{
int i, rc, ret_val = 0;
- for (i = 0 ; i < HL_MAX_QUEUES ; i++) {
+ for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
rc = gaudi_test_queue(hdev, i);
if (rc)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 5839b5bc9ee3..36db771f391c 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -337,11 +337,19 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
-void goya_get_fixed_properties(struct hl_device *hdev)
+int goya_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i;
+ prop->max_queues = GOYA_QUEUE_ID_SIZE;
+ prop->hw_queues_props = kcalloc(prop->max_queues,
+ sizeof(struct hw_queue_properties),
+ GFP_KERNEL);
+
+ if (!prop->hw_queues_props)
+ return -ENOMEM;
+
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
prop->hw_queues_props[i].driver_only = 0;
@@ -361,9 +369,6 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->hw_queues_props[i].requires_kernel_cb = 0;
}
- for (; i < HL_MAX_QUEUES; i++)
- prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
-
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->dram_base_address = DRAM_PHYS_BASE;
@@ -428,6 +433,8 @@ void goya_get_fixed_properties(struct hl_device *hdev)
CARD_NAME_MAX_LEN);
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
+
+ return 0;
}
/*
@@ -540,7 +547,11 @@ static int goya_early_init(struct hl_device *hdev)
u32 val;
int rc;
- goya_get_fixed_properties(hdev);
+ rc = goya_get_fixed_properties(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to get fixed properties\n");
+ return rc;
+ }
/* Check BAR sizes */
if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
@@ -550,7 +561,8 @@ static int goya_early_init(struct hl_device *hdev)
(unsigned long long) pci_resource_len(pdev,
SRAM_CFG_BAR_ID),
CFG_BAR_SIZE);
- return -ENODEV;
+ rc = -ENODEV;
+ goto free_queue_props;
}
if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
@@ -560,14 +572,15 @@ static int goya_early_init(struct hl_device *hdev)
(unsigned long long) pci_resource_len(pdev,
MSIX_BAR_ID),
MSIX_BAR_SIZE);
- return -ENODEV;
+ rc = -ENODEV;
+ goto free_queue_props;
}
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
rc = hl_pci_init(hdev);
if (rc)
- return rc;
+ goto free_queue_props;
if (!hdev->pldm) {
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
@@ -577,6 +590,10 @@ static int goya_early_init(struct hl_device *hdev)
}
return 0;
+
+free_queue_props:
+ kfree(hdev->asic_prop.hw_queues_props);
+ return rc;
}
/*
@@ -589,6 +606,7 @@ static int goya_early_init(struct hl_device *hdev)
*/
static int goya_early_fini(struct hl_device *hdev)
{
+ kfree(hdev->asic_prop.hw_queues_props);
hl_pci_fini(hdev);
return 0;
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 9d8a1761252d..8265cc21b45a 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -31,10 +31,6 @@
*/
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + 1)
-#if (NUMBER_OF_HW_QUEUES >= HL_MAX_QUEUES)
-#error "Number of H/W queues must be smaller than HL_MAX_QUEUES"
-#endif
-
#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES)
#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
#endif
@@ -170,7 +166,7 @@ struct goya_device {
u8 device_cpu_mmu_mappings_done;
};
-void goya_get_fixed_properties(struct hl_device *hdev);
+int goya_get_fixed_properties(struct hl_device *hdev);
int goya_mmu_init(struct hl_device *hdev);
void goya_init_dma_qmans(struct hl_device *hdev);
void goya_init_mme_qmans(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 365236589bbf..9213d107b533 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -41,8 +41,6 @@
#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
-#define HL_MAX_QUEUES 128
-
#define HL_IDLE_BUSY_TS_ARR_SIZE 4096
/* Memory */
@@ -290,14 +288,15 @@ struct hl_mmu_properties {
* @high_pll: high PLL frequency used by the device.
* @cb_pool_cb_cnt: number of CBs in the CB pool.
* @cb_pool_cb_size: size of each CB in the CB pool.
- * @tpc_enabled_mask: which TPCs are enabled.
+ * @max_pending_cs: maximum of concurrent pending command submissions
+ * @max_queues: maximum amount of queues in the system
* @sync_stream_first_sob: first sync object available for sync stream use
* @sync_stream_first_mon: first monitor available for sync stream use
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
*/
struct asic_fixed_properties {
- struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES];
+ struct hw_queue_properties *hw_queues_props;
struct armcp_info armcp_info;
char uboot_ver[VERSION_MAX_LEN];
char preboot_ver[VERSION_MAX_LEN];
@@ -336,6 +335,7 @@ struct asic_fixed_properties {
u32 cb_pool_cb_cnt;
u32 cb_pool_cb_size;
u32 max_pending_cs;
+ u32 max_queues;
u16 sync_stream_first_sob;
u16 sync_stream_first_mon;
u8 tpc_enabled_mask;
@@ -901,7 +901,7 @@ struct hl_userptr {
* @aborted: true if CS was aborted due to some device error.
*/
struct hl_cs {
- u16 jobs_in_queue_cnt[HL_MAX_QUEUES];
+ u16 *jobs_in_queue_cnt;
struct hl_ctx *ctx;
struct list_head job_list;
spinlock_t job_lock;
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index da66ffb528f8..7965551587fc 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -46,7 +46,7 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
goto out;
q = &hdev->kernel_queues[0];
- for (i = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
+ for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
if (q->queue_type == QUEUE_TYPE_INT) {
q->ci += cs->jobs_in_queue_cnt[i];
q->ci &= ((q->int_queue_len << 1) - 1);
@@ -509,6 +509,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
struct hl_device *hdev = ctx->hdev;
struct hl_cs_job *job, *tmp;
struct hl_hw_queue *q;
+ u32 max_queues;
int rc = 0, i, cq_cnt;
hdev->asic_funcs->hw_queues_lock(hdev);
@@ -521,8 +522,10 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
goto out;
}
+ max_queues = hdev->asic_prop.max_queues;
+
q = &hdev->kernel_queues[0];
- for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
+ for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
if (cs->jobs_in_queue_cnt[i]) {
switch (q->queue_type) {
case QUEUE_TYPE_EXT:
@@ -601,7 +604,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
unroll_cq_resv:
q = &hdev->kernel_queues[0];
- for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) {
+ for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
if ((q->queue_type == QUEUE_TYPE_EXT ||
q->queue_type == QUEUE_TYPE_HW) &&
cs->jobs_in_queue_cnt[i]) {
@@ -872,7 +875,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
struct hl_hw_queue *q;
int i, rc, q_ready_cnt;
- hdev->kernel_queues = kcalloc(HL_MAX_QUEUES,
+ hdev->kernel_queues = kcalloc(asic->max_queues,
sizeof(*hdev->kernel_queues), GFP_KERNEL);
if (!hdev->kernel_queues) {
@@ -882,7 +885,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
/* Initialize the H/W queues */
for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
- i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
+ i < asic->max_queues ; i++, q_ready_cnt++, q++) {
q->queue_type = asic->hw_queues_props[i].type;
q->supports_sync_stream =
@@ -909,9 +912,10 @@ release_queues:
void hl_hw_queues_destroy(struct hl_device *hdev)
{
struct hl_hw_queue *q;
+ u32 max_queues = hdev->asic_prop.max_queues;
int i;
- for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++)
+ for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
queue_fini(hdev, q);
kfree(hdev->kernel_queues);
@@ -920,9 +924,10 @@ void hl_hw_queues_destroy(struct hl_device *hdev)
void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
{
struct hl_hw_queue *q;
+ u32 max_queues = hdev->asic_prop.max_queues;
int i;
- for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++) {
+ for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
if ((!q->valid) ||
((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
continue;