diff options
Diffstat (limited to 'drivers/misc/habanalabs/common/hw_queue.c')
-rw-r--r-- | drivers/misc/habanalabs/common/hw_queue.c | 273 |
1 files changed, 174 insertions, 99 deletions
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 250cf9cefc06..7caf868d1585 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -48,6 +48,11 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs) return; q = &hdev->kernel_queues[0]; + + /* There are no internal queues if H/W queues are being used */ + if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW) + return; + for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { if (q->queue_type == QUEUE_TYPE_INT) atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); @@ -333,7 +338,14 @@ static void int_queue_schedule_job(struct hl_cs_job *job) bd.ctl = 0; bd.len = cpu_to_le32(job->job_cb_size); - bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); + + if (job->is_kernel_allocated_cb) + /* bus_address is actually a mmu mapped address + * allocated from an internal pool + */ + bd.ptr = cpu_to_le64(job->user_cb->bus_address); + else + bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); @@ -388,6 +400,91 @@ static void hw_queue_schedule_job(struct hl_cs_job *job) ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } +static void init_signal_cs(struct hl_device *hdev, + struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) +{ + struct hl_sync_stream_properties *prop; + struct hl_hw_sob *hw_sob; + u32 q_idx; + + q_idx = job->hw_queue_id; + prop = &hdev->kernel_queues[q_idx].sync_stream_prop; + hw_sob = &prop->hw_sob[prop->curr_sob_offset]; + + cs_cmpl->hw_sob = hw_sob; + cs_cmpl->sob_val = prop->next_sob_val++; + + dev_dbg(hdev->dev, + "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + + hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, + cs_cmpl->hw_sob->sob_id, 0); + + kref_get(&hw_sob->kref); + + /* check for wraparound */ + if (prop->next_sob_val == HL_MAX_SOB_VAL) { + /* + * Decrement as we reached the max value. + * The release function won't be called here as we've + * just incremented the refcount. + */ + kref_put(&hw_sob->kref, hl_sob_reset_error); + prop->next_sob_val = 1; + /* only two SOBs are currently in use */ + prop->curr_sob_offset = + (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; + + dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", + prop->curr_sob_offset, q_idx); + } +} + +static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, + struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) +{ + struct hl_cs_compl *signal_cs_cmpl; + struct hl_sync_stream_properties *prop; + struct hl_gen_wait_properties wait_prop; + u32 q_idx; + + q_idx = job->hw_queue_id; + prop = &hdev->kernel_queues[q_idx].sync_stream_prop; + + signal_cs_cmpl = container_of(cs->signal_fence, + struct hl_cs_compl, + base_fence); + + /* copy the SOB id and value of the signal CS */ + cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; + cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + + dev_dbg(hdev->dev, + "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, + prop->base_mon_id, q_idx); + + wait_prop.data = (void *) job->patched_cb; + wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; + wait_prop.sob_mask = 0x1; + wait_prop.sob_val = cs_cmpl->sob_val; + wait_prop.mon_id = prop->base_mon_id; + wait_prop.q_idx = q_idx; + wait_prop.size = 0; + hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop); + + kref_get(&cs_cmpl->hw_sob->kref); + /* + * Must put the signal fence after the SOB refcnt increment so + * the SOB refcnt won't turn 0 and reset the SOB before the + * wait CS was submitted. + */ + mb(); + hl_fence_put(cs->signal_fence); + cs->signal_fence = NULL; +} + /* * init_signal_wait_cs - initialize a signal/wait CS * @cs: pointer to the signal/wait CS @@ -398,84 +495,18 @@ static void init_signal_wait_cs(struct hl_cs *cs) { struct hl_ctx *ctx = cs->ctx; struct hl_device *hdev = ctx->hdev; - struct hl_hw_queue *hw_queue; + struct hl_cs_job *job; struct hl_cs_compl *cs_cmpl = container_of(cs->fence, struct hl_cs_compl, base_fence); - struct hl_hw_sob *hw_sob; - struct hl_cs_job *job; - u32 q_idx; - /* There is only one job in a signal/wait CS */ job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); - q_idx = job->hw_queue_id; - hw_queue = &hdev->kernel_queues[q_idx]; - - if (cs->type & CS_TYPE_SIGNAL) { - hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset]; - - cs_cmpl->hw_sob = hw_sob; - cs_cmpl->sob_val = hw_queue->next_sob_val++; - - dev_dbg(hdev->dev, - "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", - cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); - - hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id); - - kref_get(&hw_sob->kref); - - /* check for wraparound */ - if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) { - /* - * Decrement as we reached the max value. - * The release function won't be called here as we've - * just incremented the refcount. - */ - kref_put(&hw_sob->kref, hl_sob_reset_error); - hw_queue->next_sob_val = 1; - /* only two SOBs are currently in use */ - hw_queue->curr_sob_offset = - (hw_queue->curr_sob_offset + 1) % - HL_RSVD_SOBS_IN_USE; - - dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", - hw_queue->curr_sob_offset, q_idx); - } - } else if (cs->type & CS_TYPE_WAIT) { - struct hl_cs_compl *signal_cs_cmpl; - - signal_cs_cmpl = container_of(cs->signal_fence, - struct hl_cs_compl, - base_fence); - - /* copy the the SOB id and value of the signal CS */ - cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; - cs_cmpl->sob_val = signal_cs_cmpl->sob_val; - - dev_dbg(hdev->dev, - "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", - cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, - hw_queue->base_mon_id, q_idx); - hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id, - cs_cmpl->sob_val, - hw_queue->base_mon_id, - q_idx); - - kref_get(&cs_cmpl->hw_sob->kref); - /* - * Must put the signal fence after the SOB refcnt increment so - * the SOB refcnt won't turn 0 and reset the SOB before the - * wait CS was submitted. - */ - mb(); - hl_fence_put(cs->signal_fence); - cs->signal_fence = NULL; - } + if (cs->type & CS_TYPE_SIGNAL) + init_signal_cs(hdev, job, cs_cmpl); + else if (cs->type & CS_TYPE_WAIT) + init_wait_cs(hdev, cs, job, cs_cmpl); } /* @@ -484,19 +515,24 @@ static void init_signal_wait_cs(struct hl_cs *cs) */ int hl_hw_queue_schedule_cs(struct hl_cs *cs) { + enum hl_device_status status; + struct hl_cs_counters_atomic *cntr; struct hl_ctx *ctx = cs->ctx; struct hl_device *hdev = ctx->hdev; struct hl_cs_job *job, *tmp; struct hl_hw_queue *q; - u32 max_queues; int rc = 0, i, cq_cnt; + u32 max_queues; + + cntr = &hdev->aggregated_cs_counters; hdev->asic_funcs->hw_queues_lock(hdev); - if (hl_device_disabled_or_in_reset(hdev)) { - ctx->cs_counters.device_in_reset_drop_cnt++; + if (!hl_device_operational(hdev, &status)) { + atomic64_inc(&cntr->device_in_reset_drop_cnt); + atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt); dev_err(hdev->dev, - "device is disabled or in reset, CS rejected!\n"); + "device is %s, CS rejected!\n", hdev->status[status]); rc = -EPERM; goto out; } @@ -527,7 +563,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } if (rc) { - ctx->cs_counters.queue_full_drop_cnt++; + atomic64_inc( + &ctx->cs_counters.queue_full_drop_cnt); + atomic64_inc(&cntr->queue_full_drop_cnt); goto unroll_cq_resv; } @@ -538,21 +576,23 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) init_signal_wait_cs(cs); + else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) + hdev->asic_funcs->collective_wait_init_cs(cs); - spin_lock(&hdev->hw_queues_mirror_lock); - list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list); + spin_lock(&hdev->cs_mirror_lock); + list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); /* Queue TDR if the CS is the first entry and if timeout is wanted */ if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && - (list_first_entry(&hdev->hw_queues_mirror_list, + (list_first_entry(&hdev->cs_mirror_list, struct hl_cs, mirror_node) == cs)) { cs->tdr_active = true; schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); - spin_unlock(&hdev->hw_queues_mirror_lock); - } else { - spin_unlock(&hdev->hw_queues_mirror_lock); + } + spin_unlock(&hdev->cs_mirror_lock); + if (!hdev->cs_active_cnt++) { struct hl_device_idle_busy_ts *ts; @@ -714,22 +754,56 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) { - struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + struct hl_sync_stream_properties *sync_stream_prop; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_hw_sob *hw_sob; - int sob, queue_idx = hdev->sync_stream_queue_idx++; + int sob, reserved_mon_idx, queue_idx; + + sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop; + + /* We use 'collective_mon_idx' as a running index in order to reserve + * monitors for collective master/slave queues. + * collective master queue gets 2 reserved monitors + * collective slave queue gets 1 reserved monitor + */ + if (hdev->kernel_queues[q_idx].collective_mode == + HL_COLLECTIVE_MASTER) { + reserved_mon_idx = hdev->collective_mon_idx; + + /* reserve the first monitor for collective master queue */ + sync_stream_prop->collective_mstr_mon_id[0] = + prop->collective_first_mon + reserved_mon_idx; + + /* reserve the second monitor for collective master queue */ + sync_stream_prop->collective_mstr_mon_id[1] = + prop->collective_first_mon + reserved_mon_idx + 1; + + hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS; + } else if (hdev->kernel_queues[q_idx].collective_mode == + HL_COLLECTIVE_SLAVE) { + reserved_mon_idx = hdev->collective_mon_idx++; + + /* reserve a monitor for collective slave queue */ + sync_stream_prop->collective_slave_mon_id = + prop->collective_first_mon + reserved_mon_idx; + } + + if (!hdev->kernel_queues[q_idx].supports_sync_stream) + return; + + queue_idx = hdev->sync_stream_queue_idx++; - hw_queue->base_sob_id = - prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS; - hw_queue->base_mon_id = - prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS; - hw_queue->next_sob_val = 1; - hw_queue->curr_sob_offset = 0; + sync_stream_prop->base_sob_id = prop->sync_stream_first_sob + + (queue_idx * HL_RSVD_SOBS); + sync_stream_prop->base_mon_id = prop->sync_stream_first_mon + + (queue_idx * HL_RSVD_MONS); + sync_stream_prop->next_sob_val = 1; + sync_stream_prop->curr_sob_offset = 0; for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { - hw_sob = &hw_queue->hw_sob[sob]; + hw_sob = &sync_stream_prop->hw_sob[sob]; hw_sob->hdev = hdev; - hw_sob->sob_id = hw_queue->base_sob_id + sob; + hw_sob->sob_id = sync_stream_prop->base_sob_id + sob; hw_sob->q_idx = q_idx; kref_init(&hw_sob->kref); } @@ -737,15 +811,16 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx) { - struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + struct hl_sync_stream_properties *prop = + &hdev->kernel_queues[q_idx].sync_stream_prop; /* * In case we got here due to a stuck CS, the refcnt might be bigger * than 1 and therefore we reset it. */ - kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref); - hw_queue->curr_sob_offset = 0; - hw_queue->next_sob_val = 1; + kref_init(&prop->hw_sob[prop->curr_sob_offset].kref); + prop->curr_sob_offset = 0; + prop->next_sob_val = 1; } /* @@ -788,8 +863,7 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, break; } - if (q->supports_sync_stream) - sync_stream_queue_init(hdev, q->hw_queue_id); + sync_stream_queue_init(hdev, q->hw_queue_id); if (rc) return rc; @@ -867,6 +941,7 @@ int hl_hw_queues_create(struct hl_device *hdev) q->queue_type = asic->hw_queues_props[i].type; q->supports_sync_stream = asic->hw_queues_props[i].supports_sync_stream; + q->collective_mode = asic->hw_queues_props[i].collective_mode; rc = queue_init(hdev, q, i); if (rc) { dev_err(hdev->dev, |