summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c71
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c9
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c11
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi_security.c115
-rw-r--r--drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h2
5 files changed, 134 insertions, 74 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 7b0516cf808b..91b57544f7c6 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -405,7 +405,7 @@ static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
{
bool next_entry_found = false;
- struct hl_cs *next;
+ struct hl_cs *next, *first_cs;
if (!cs_needs_timeout(cs))
return;
@@ -415,9 +415,16 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
/* We need to handle tdr only once for the complete staged submission.
* Hence, we choose the CS that reaches this function first which is
* the CS marked as 'staged_last'.
+ * In case single staged cs was submitted which has both first and last
+ * indications, then "cs_find_first" below will return NULL, since we
+ * removed the cs node from the list before getting here,
+ * in such cases just continue with the cs to cancel it's TDR work.
*/
- if (cs->staged_cs && cs->staged_last)
- cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+ if (cs->staged_cs && cs->staged_last) {
+ first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+ if (first_cs)
+ cs = first_cs;
+ }
spin_unlock(&hdev->cs_mirror_lock);
@@ -1288,6 +1295,12 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
if (rc)
goto free_cs_object;
+ /* If this is a staged submission we must return the staged sequence
+ * rather than the internal CS sequence
+ */
+ if (cs->staged_cs)
+ *cs_seq = cs->staged_sequence;
+
/* Validate ALL the CS chunks before submitting the CS */
for (i = 0 ; i < num_chunks ; i++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
@@ -1988,6 +2001,15 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
goto free_cs_chunk_array;
}
+ if (!hdev->nic_ports_mask) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
+ dev_err(hdev->dev,
+ "Collective operations not supported when NIC ports are disabled");
+ rc = -EINVAL;
+ goto free_cs_chunk_array;
+ }
+
collective_engine_id = chunk->collective_engine_id;
}
@@ -2026,9 +2048,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
spin_unlock(&ctx->sig_mgr.lock);
if (!handle_found) {
- dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
+ /* treat as signal CS already finished */
+ dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
signal_seq);
- rc = -EINVAL;
+ rc = 0;
goto free_cs_chunk_array;
}
@@ -2613,7 +2636,8 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
* completed after the poll function.
*/
if (!mcs_data.completion_bitmap) {
- dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n");
+ dev_warn_ratelimited(hdev->dev,
+ "Multi-CS got completion on wait but no CS completed\n");
rc = -EFAULT;
}
}
@@ -2740,10 +2764,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
else
interrupt = &hdev->user_interrupt[interrupt_offset];
+ /* Add pending user interrupt to relevant list for the interrupt
+ * handler to monitor
+ */
+ spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+ list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
+ spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+ /* We check for completion value as interrupt could have been received
+ * before we added the node to the wait list
+ */
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
- goto free_fence;
+ goto remove_pending_user_interrupt;
}
if (completion_value >= target_value)
@@ -2752,14 +2786,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
*status = CS_WAIT_STATUS_BUSY;
if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
- goto free_fence;
-
- /* Add pending user interrupt to relevant list for the interrupt
- * handler to monitor
- */
- spin_lock_irqsave(&interrupt->wait_list_lock, flags);
- list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
- spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+ goto remove_pending_user_interrupt;
wait_again:
/* Wait for interrupt handler to signal completion */
@@ -2770,6 +2797,15 @@ wait_again:
* If comparison fails, keep waiting until timeout expires
*/
if (completion_rc > 0) {
+ spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+ /* reinit_completion must be called before we check for user
+ * completion value, otherwise, if interrupt is received after
+ * the comparison and before the next wait_for_completion,
+ * we will reach timeout and fail
+ */
+ reinit_completion(&pend->fence.completion);
+ spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
@@ -2780,11 +2816,7 @@ wait_again:
if (completion_value >= target_value) {
*status = CS_WAIT_STATUS_COMPLETED;
} else {
- spin_lock_irqsave(&interrupt->wait_list_lock, flags);
- reinit_completion(&pend->fence.completion);
timeout = completion_rc;
-
- spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
goto wait_again;
}
} else if (completion_rc == -ERESTARTSYS) {
@@ -2802,7 +2834,6 @@ remove_pending_user_interrupt:
list_del(&pend->wait_list_node);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
-free_fence:
kfree(pend);
hl_ctx_put(ctx);
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 76b7de8f1406..0743319b10c7 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -437,6 +437,7 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
struct hl_cs_compl *cs_cmpl)
{
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
+ u32 offset = 0;
cs_cmpl->hw_sob = handle->hw_sob;
@@ -446,9 +447,13 @@ void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
* set offset 1 for example he mean to wait only for the first
* signal only, which will be pre_sob_val, and if he set offset 2
* then the value required is (pre_sob_val + 1) and so on...
+ * if user set wait offset to 0, then treat it as legacy wait cs,
+ * wait for the next signal.
*/
- cs_cmpl->sob_val = handle->pre_sob_val +
- (job->encaps_sig_wait_offset - 1);
+ if (job->encaps_sig_wait_offset)
+ offset = job->encaps_sig_wait_offset - 1;
+
+ cs_cmpl->sob_val = handle->pre_sob_val + offset;
}
static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 383865be3c2c..14da87b38e83 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -395,7 +395,7 @@ static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
- { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
+ { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
@@ -5802,6 +5802,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct packet_msg_prot *cq_pkt;
+ u64 msi_addr;
u32 tmp;
cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
@@ -5823,10 +5824,12 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
cq_pkt->ctl = cpu_to_le32(tmp);
cq_pkt->value = cpu_to_le32(1);
- if (!gaudi->multi_msi_mode)
- msi_vec = 0;
+ if (gaudi->multi_msi_mode)
+ msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
+ else
+ msi_addr = mmPCIE_CORE_MSI_REQ;
- cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
+ cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
}
static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c
index cb265c00cf73..25ac87cebd45 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_security.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c
@@ -8,16 +8,21 @@
#include "gaudiP.h"
#include "../include/gaudi/asic_reg/gaudi_regs.h"
-#define GAUDI_NUMBER_OF_RR_REGS 24
-#define GAUDI_NUMBER_OF_LBW_RANGES 12
+#define GAUDI_NUMBER_OF_LBW_RR_REGS 28
+#define GAUDI_NUMBER_OF_HBW_RR_REGS 24
+#define GAUDI_NUMBER_OF_LBW_RANGES 10
-static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
+ mmDMA_IF_W_S_SOB_HIT_WPROT,
mmDMA_IF_W_S_DMA0_HIT_WPROT,
mmDMA_IF_W_S_DMA1_HIT_WPROT,
+ mmDMA_IF_E_S_SOB_HIT_WPROT,
mmDMA_IF_E_S_DMA0_HIT_WPROT,
mmDMA_IF_E_S_DMA1_HIT_WPROT,
+ mmDMA_IF_W_N_SOB_HIT_WPROT,
mmDMA_IF_W_N_DMA0_HIT_WPROT,
mmDMA_IF_W_N_DMA1_HIT_WPROT,
+ mmDMA_IF_E_N_SOB_HIT_WPROT,
mmDMA_IF_E_N_DMA0_HIT_WPROT,
mmDMA_IF_E_N_DMA1_HIT_WPROT,
mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AW,
@@ -38,13 +43,17 @@ static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AW,
};
-static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
+ mmDMA_IF_W_S_SOB_HIT_RPROT,
mmDMA_IF_W_S_DMA0_HIT_RPROT,
mmDMA_IF_W_S_DMA1_HIT_RPROT,
+ mmDMA_IF_E_S_SOB_HIT_RPROT,
mmDMA_IF_E_S_DMA0_HIT_RPROT,
mmDMA_IF_E_S_DMA1_HIT_RPROT,
+ mmDMA_IF_W_N_SOB_HIT_RPROT,
mmDMA_IF_W_N_DMA0_HIT_RPROT,
mmDMA_IF_W_N_DMA1_HIT_RPROT,
+ mmDMA_IF_E_N_SOB_HIT_RPROT,
mmDMA_IF_E_N_DMA0_HIT_RPROT,
mmDMA_IF_E_N_DMA1_HIT_RPROT,
mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AR,
@@ -65,13 +74,17 @@ static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AR,
};
-static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
+ mmDMA_IF_W_S_SOB_MIN_WPROT_0,
mmDMA_IF_W_S_DMA0_MIN_WPROT_0,
mmDMA_IF_W_S_DMA1_MIN_WPROT_0,
+ mmDMA_IF_E_S_SOB_MIN_WPROT_0,
mmDMA_IF_E_S_DMA0_MIN_WPROT_0,
mmDMA_IF_E_S_DMA1_MIN_WPROT_0,
+ mmDMA_IF_W_N_SOB_MIN_WPROT_0,
mmDMA_IF_W_N_DMA0_MIN_WPROT_0,
mmDMA_IF_W_N_DMA1_MIN_WPROT_0,
+ mmDMA_IF_E_N_SOB_MIN_WPROT_0,
mmDMA_IF_E_N_DMA0_MIN_WPROT_0,
mmDMA_IF_E_N_DMA1_MIN_WPROT_0,
mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AW_0,
@@ -92,13 +105,17 @@ static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AW_0,
};
-static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
+ mmDMA_IF_W_S_SOB_MAX_WPROT_0,
mmDMA_IF_W_S_DMA0_MAX_WPROT_0,
mmDMA_IF_W_S_DMA1_MAX_WPROT_0,
+ mmDMA_IF_E_S_SOB_MAX_WPROT_0,
mmDMA_IF_E_S_DMA0_MAX_WPROT_0,
mmDMA_IF_E_S_DMA1_MAX_WPROT_0,
+ mmDMA_IF_W_N_SOB_MAX_WPROT_0,
mmDMA_IF_W_N_DMA0_MAX_WPROT_0,
mmDMA_IF_W_N_DMA1_MAX_WPROT_0,
+ mmDMA_IF_E_N_SOB_MAX_WPROT_0,
mmDMA_IF_E_N_DMA0_MAX_WPROT_0,
mmDMA_IF_E_N_DMA1_MAX_WPROT_0,
mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AW_0,
@@ -119,13 +136,17 @@ static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AW_0,
};
-static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
+ mmDMA_IF_W_S_SOB_MIN_RPROT_0,
mmDMA_IF_W_S_DMA0_MIN_RPROT_0,
mmDMA_IF_W_S_DMA1_MIN_RPROT_0,
+ mmDMA_IF_E_S_SOB_MIN_RPROT_0,
mmDMA_IF_E_S_DMA0_MIN_RPROT_0,
mmDMA_IF_E_S_DMA1_MIN_RPROT_0,
+ mmDMA_IF_W_N_SOB_MIN_RPROT_0,
mmDMA_IF_W_N_DMA0_MIN_RPROT_0,
mmDMA_IF_W_N_DMA1_MIN_RPROT_0,
+ mmDMA_IF_E_N_SOB_MIN_RPROT_0,
mmDMA_IF_E_N_DMA0_MIN_RPROT_0,
mmDMA_IF_E_N_DMA1_MIN_RPROT_0,
mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AR_0,
@@ -146,13 +167,17 @@ static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AR_0,
};
-static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = {
+ mmDMA_IF_W_S_SOB_MAX_RPROT_0,
mmDMA_IF_W_S_DMA0_MAX_RPROT_0,
mmDMA_IF_W_S_DMA1_MAX_RPROT_0,
+ mmDMA_IF_E_S_SOB_MAX_RPROT_0,
mmDMA_IF_E_S_DMA0_MAX_RPROT_0,
mmDMA_IF_E_S_DMA1_MAX_RPROT_0,
+ mmDMA_IF_W_N_SOB_MAX_RPROT_0,
mmDMA_IF_W_N_DMA0_MAX_RPROT_0,
mmDMA_IF_W_N_DMA1_MAX_RPROT_0,
+ mmDMA_IF_E_N_SOB_MAX_RPROT_0,
mmDMA_IF_E_N_DMA0_MAX_RPROT_0,
mmDMA_IF_E_N_DMA1_MAX_RPROT_0,
mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AR_0,
@@ -173,7 +198,7 @@ static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AR_0,
};
-static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AW,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AW,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AW,
@@ -200,7 +225,7 @@ static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AW
};
-static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AR,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AR,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AR,
@@ -227,7 +252,7 @@ static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AR
};
-static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AW_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0,
@@ -254,7 +279,7 @@ static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AW_0
};
-static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AW_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0,
@@ -281,7 +306,7 @@ static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AW_0
};
-static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AW_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0,
@@ -308,7 +333,7 @@ static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AW_0
};
-static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AW_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0,
@@ -335,7 +360,7 @@ static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_HIGH_AW_0
};
-static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AR_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0,
@@ -362,7 +387,7 @@ static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AR_0
};
-static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AR_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0,
@@ -389,7 +414,7 @@ static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AR_0
};
-static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AR_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0,
@@ -416,7 +441,7 @@ static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AR_0
};
-static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = {
+static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = {
mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0,
mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AR_0,
mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0,
@@ -12849,50 +12874,44 @@ static void gaudi_init_range_registers_lbw(struct hl_device *hdev)
u32 lbw_rng_end[GAUDI_NUMBER_OF_LBW_RANGES];
int i, j;
- lbw_rng_start[0] = (0xFBFE0000 & 0x3FFFFFF) - 1;
- lbw_rng_end[0] = (0xFBFFF000 & 0x3FFFFFF) + 1;
+ lbw_rng_start[0] = (0xFC0E8000 & 0x3FFFFFF) - 1; /* 0x000E7FFF */
+ lbw_rng_end[0] = (0xFC11FFFF & 0x3FFFFFF) + 1; /* 0x00120000 */
- lbw_rng_start[1] = (0xFC0E8000 & 0x3FFFFFF) - 1;
- lbw_rng_end[1] = (0xFC120000 & 0x3FFFFFF) + 1;
+ lbw_rng_start[1] = (0xFC1E8000 & 0x3FFFFFF) - 1; /* 0x001E7FFF */
+ lbw_rng_end[1] = (0xFC48FFFF & 0x3FFFFFF) + 1; /* 0x00490000 */
- lbw_rng_start[2] = (0xFC1E8000 & 0x3FFFFFF) - 1;
- lbw_rng_end[2] = (0xFC48FFFF & 0x3FFFFFF) + 1;
+ lbw_rng_start[2] = (0xFC600000 & 0x3FFFFFF) - 1; /* 0x005FFFFF */
+ lbw_rng_end[2] = (0xFCC48FFF & 0x3FFFFFF) + 1; /* 0x00C49000 */
- lbw_rng_start[3] = (0xFC600000 & 0x3FFFFFF) - 1;
- lbw_rng_end[3] = (0xFCC48FFF & 0x3FFFFFF) + 1;
+ lbw_rng_start[3] = (0xFCC4A000 & 0x3FFFFFF) - 1; /* 0x00C49FFF */
+ lbw_rng_end[3] = (0xFCCDFFFF & 0x3FFFFFF) + 1; /* 0x00CE0000 */
- lbw_rng_start[4] = (0xFCC4A000 & 0x3FFFFFF) - 1;
- lbw_rng_end[4] = (0xFCCDFFFF & 0x3FFFFFF) + 1;
+ lbw_rng_start[4] = (0xFCCE4000 & 0x3FFFFFF) - 1; /* 0x00CE3FFF */
+ lbw_rng_end[4] = (0xFCD1FFFF & 0x3FFFFFF) + 1; /* 0x00D20000 */
- lbw_rng_start[5] = (0xFCCE4000 & 0x3FFFFFF) - 1;
- lbw_rng_end[5] = (0xFCD1FFFF & 0x3FFFFFF) + 1;
+ lbw_rng_start[5] = (0xFCD24000 & 0x3FFFFFF) - 1; /* 0x00D23FFF */
+ lbw_rng_end[5] = (0xFCD5FFFF & 0x3FFFFFF) + 1; /* 0x00D60000 */
- lbw_rng_start[6] = (0xFCD24000 & 0x3FFFFFF) - 1;
- lbw_rng_end[6] = (0xFCD5FFFF & 0x3FFFFFF) + 1;
+ lbw_rng_start[6] = (0xFCD64000 & 0x3FFFFFF) - 1; /* 0x00D63FFF */
+ lbw_rng_end[6] = (0xFCD9FFFF & 0x3FFFFFF) + 1; /* 0x00DA0000 */
- lbw_rng_start[7] = (0xFCD64000 & 0x3FFFFFF) - 1;
- lbw_rng_end[7] = (0xFCD9FFFF & 0x3FFFFFF) + 1;
+ lbw_rng_start[7] = (0xFCDA4000 & 0x3FFFFFF) - 1; /* 0x00DA3FFF */
+ lbw_rng_end[7] = (0xFCDDFFFF & 0x3FFFFFF) + 1; /* 0x00DE0000 */
- lbw_rng_start[8] = (0xFCDA4000 & 0x3FFFFFF) - 1;
- lbw_rng_end[8] = (0xFCDDFFFF & 0x3FFFFFF) + 1;
+ lbw_rng_start[8] = (0xFCDE4000 & 0x3FFFFFF) - 1; /* 0x00DE3FFF */
+ lbw_rng_end[8] = (0xFCE05FFF & 0x3FFFFFF) + 1; /* 0x00E06000 */
- lbw_rng_start[9] = (0xFCDE4000 & 0x3FFFFFF) - 1;
- lbw_rng_end[9] = (0xFCE05FFF & 0x3FFFFFF) + 1;
+ lbw_rng_start[9] = (0xFCFC9000 & 0x3FFFFFF) - 1; /* 0x00FC8FFF */
+ lbw_rng_end[9] = (0xFFFFFFFE & 0x3FFFFFF) + 1; /* 0x03FFFFFF */
- lbw_rng_start[10] = (0xFEC43000 & 0x3FFFFFF) - 1;
- lbw_rng_end[10] = (0xFEC43FFF & 0x3FFFFFF) + 1;
-
- lbw_rng_start[11] = (0xFE484000 & 0x3FFFFFF) - 1;
- lbw_rng_end[11] = (0xFE484FFF & 0x3FFFFFF) + 1;
-
- for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
+ for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++) {
WREG32(gaudi_rr_lbw_hit_aw_regs[i],
(1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1);
WREG32(gaudi_rr_lbw_hit_ar_regs[i],
(1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1);
}
- for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++)
+ for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++)
for (j = 0 ; j < GAUDI_NUMBER_OF_LBW_RANGES ; j++) {
WREG32(gaudi_rr_lbw_min_aw_regs[i] + (j << 2),
lbw_rng_start[j]);
@@ -12939,12 +12958,12 @@ static void gaudi_init_range_registers_hbw(struct hl_device *hdev)
* 6th range is the host
*/
- for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
+ for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) {
WREG32(gaudi_rr_hbw_hit_aw_regs[i], 0x1F);
WREG32(gaudi_rr_hbw_hit_ar_regs[i], 0x1D);
}
- for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) {
+ for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) {
WREG32(gaudi_rr_hbw_base_low_aw_regs[i], dram_addr_lo);
WREG32(gaudi_rr_hbw_base_low_ar_regs[i], dram_addr_lo);
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
index ffdfbd9b3220..1a6576666794 100644
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
+++ b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
@@ -308,6 +308,8 @@
#define mmPCIE_AUX_FLR_CTRL 0xC07394
#define mmPCIE_AUX_DBI 0xC07490
+#define mmPCIE_CORE_MSI_REQ 0xC04100
+
#define mmPSOC_PCI_PLL_NR 0xC72100
#define mmSRAM_W_PLL_NR 0x4C8100
#define mmPSOC_HBM_PLL_NR 0xC74100