summaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/cardreader/rtsx_pcr.c7
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c77
-rw-r--r--drivers/misc/habanalabs/common/device.c19
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c65
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h5
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c1
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c9
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c5
-rw-r--r--drivers/misc/habanalabs/common/memory.c10
-rw-r--r--drivers/misc/habanalabs/common/mmu.c6
-rw-r--r--drivers/misc/habanalabs/common/mmu_v1.c12
-rw-r--r--drivers/misc/habanalabs/common/pci.c28
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c194
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h7
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi_coresight.c3
-rw-r--r--drivers/misc/habanalabs/goya/goya.c78
-rw-r--r--drivers/misc/habanalabs/include/common/hl_boot_if.h9
-rw-r--r--drivers/misc/pvpanic.c19
18 files changed, 340 insertions, 214 deletions
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index 2aa6648fa41f..5a491d2cd1ae 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -1512,6 +1512,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
struct pcr_handle *handle;
u32 base, len;
int ret, i, bar = 0;
+ u8 val;
dev_dbg(&(pcidev->dev),
": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n",
@@ -1577,7 +1578,11 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr;
pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN;
pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN;
-
+ rtsx_pci_read_register(pcr, ASPM_FORCE_CTL, &val);
+ if (val & FORCE_ASPM_CTL0 && val & FORCE_ASPM_CTL1)
+ pcr->aspm_enabled = false;
+ else
+ pcr->aspm_enabled = true;
pcr->card_inserted = 0;
pcr->card_removed = 0;
INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index beb482310a58..b2b3d2b0f808 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cntr = &hdev->aggregated_cs_counters;
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
- if (!cs)
+ if (!cs) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
return -ENOMEM;
+ }
cs->ctx = ctx;
cs->submitted = false;
@@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
if (!cs_cmpl) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
rc = -ENOMEM;
goto free_cs;
}
@@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
if (!cs->jobs_in_queue_cnt) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
rc = -ENOMEM;
goto free_fence;
}
@@ -562,7 +569,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
flush_workqueue(hdev->cq_wq[i]);
- /* Make sure we don't have leftovers in the H/W queues mirror list */
+ /* Make sure we don't have leftovers in the CS mirror list */
list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
cs_get(cs);
cs->aborted = true;
@@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
static int hl_cs_copy_chunk_array(struct hl_device *hdev,
struct hl_cs_chunk **cs_chunk_array,
- void __user *chunks, u32 num_chunks)
+ void __user *chunks, u32 num_chunks,
+ struct hl_ctx *ctx)
{
u32 size_to_copy;
if (num_chunks > HL_MAX_JOBS_PER_CS) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
dev_err(hdev->dev,
"Number of chunks can NOT be larger than %d\n",
HL_MAX_JOBS_PER_CS);
@@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
GFP_ATOMIC);
- if (!*cs_chunk_array)
+ if (!*cs_chunk_array) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
return -ENOMEM;
+ }
size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
kfree(*cs_chunk_array);
return -EFAULT;
@@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_device *hdev = hpriv->hdev;
struct hl_cs_chunk *cs_chunk_array;
struct hl_cs_counters_atomic *cntr;
+ struct hl_ctx *ctx = hpriv->ctx;
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
@@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
cntr = &hdev->aggregated_cs_counters;
*cs_seq = ULLONG_MAX;
- rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+ rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+ hpriv->ctx);
if (rc)
goto out;
@@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = validate_queue_index(hdev, chunk, &queue_type,
&is_kernel_allocated_cb);
if (rc) {
- atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
- atomic64_inc(&cntr->parsing_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
goto free_cs_object;
}
@@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) {
atomic64_inc(
- &hpriv->ctx->cs_counters.parsing_drop_cnt);
- atomic64_inc(&cntr->parsing_drop_cnt);
+ &ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
rc = -EINVAL;
goto free_cs_object;
}
@@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb);
if (!job) {
- atomic64_inc(
- &hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
@@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = cs_parser(hpriv, job);
if (rc) {
- atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
atomic64_inc(&cntr->parsing_drop_cnt);
dev_err(hdev->dev,
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
}
if (int_queues_only) {
- atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
- atomic64_inc(&cntr->parsing_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Reject CS %d.%llu because only internal queues jobs are present\n",
cs->ctx->asid, cs->sequence);
@@ -1042,7 +1058,7 @@ out:
}
static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
- struct hl_cs_chunk *chunk, u64 *signal_seq)
+ struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
{
u64 *signal_seq_arr = NULL;
u32 size_to_copy, signal_seq_arr_len;
@@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
/* currently only one signal seq is supported */
if (signal_seq_arr_len != 1) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
dev_err(hdev->dev,
"Wait for signal CS supports only one signal CS seq\n");
return -EINVAL;
@@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
signal_seq_arr = kmalloc_array(signal_seq_arr_len,
sizeof(*signal_seq_arr),
GFP_ATOMIC);
- if (!signal_seq_arr)
+ if (!signal_seq_arr) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
return -ENOMEM;
+ }
size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
if (copy_from_user(signal_seq_arr,
u64_to_user_ptr(chunk->signal_seq_arr),
size_to_copy)) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
dev_err(hdev->dev,
"Failed to copy signal seq array from user\n");
rc = -EFAULT;
@@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_device *hdev = hpriv->hdev;
struct hl_cs_compl *sig_waitcs_cmpl;
u32 q_idx, collective_engine_id = 0;
+ struct hl_cs_counters_atomic *cntr;
struct hl_fence *sig_fence = NULL;
struct hl_ctx *ctx = hpriv->ctx;
enum hl_queue_type q_type;
@@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
u64 signal_seq;
int rc;
+ cntr = &hdev->aggregated_cs_counters;
*cs_seq = ULLONG_MAX;
- rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+ rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+ ctx);
if (rc)
goto out;
@@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
chunk = &cs_chunk_array[0];
if (chunk->queue_index >= hdev->asic_prop.max_queues) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index);
rc = -EINVAL;
@@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
q_type = hw_queue_prop->type;
if (!hw_queue_prop->supports_sync_stream) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Queue index %d does not support sync stream operations\n",
q_idx);
@@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Queue index %d is invalid\n", q_idx);
rc = -EINVAL;
@@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
}
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
- rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
+ rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
if (rc)
goto free_cs_chunk_array;
sig_fence = hl_ctx_get_fence(ctx, signal_seq);
if (IS_ERR(sig_fence)) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Failed to get signal CS with seq 0x%llx\n",
signal_seq);
@@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
container_of(sig_fence, struct hl_cs_compl, base_fence);
if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"CS seq 0x%llx is not of a signal CS\n",
signal_seq);
@@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
cs, q_idx, collective_engine_id);
- else
+ else {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
rc = -EINVAL;
+ }
if (rc)
goto free_cs_object;
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 5871162a8442..69d04eca767f 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
- if (hdev->disabled)
- status = HL_DEVICE_STATUS_MALFUNCTION;
- else if (atomic_read(&hdev->in_reset))
+ if (atomic_read(&hdev->in_reset))
status = HL_DEVICE_STATUS_IN_RESET;
else if (hdev->needs_reset)
status = HL_DEVICE_STATUS_NEEDS_RESET;
+ else if (hdev->disabled)
+ status = HL_DEVICE_STATUS_MALFUNCTION;
else
status = HL_DEVICE_STATUS_OPERATIONAL;
@@ -1037,7 +1037,7 @@ kill_processes:
if (hard_reset) {
/* Release kernel context */
- if (hl_ctx_put(hdev->kernel_ctx) == 1)
+ if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
hdev->kernel_ctx = NULL;
hl_vm_fini(hdev);
hl_mmu_fini(hdev);
@@ -1092,6 +1092,7 @@ kill_processes:
GFP_KERNEL);
if (!hdev->kernel_ctx) {
rc = -ENOMEM;
+ hl_mmu_fini(hdev);
goto out_err;
}
@@ -1103,6 +1104,7 @@ kill_processes:
"failed to init kernel ctx in hard reset\n");
kfree(hdev->kernel_ctx);
hdev->kernel_ctx = NULL;
+ hl_mmu_fini(hdev);
goto out_err;
}
}
@@ -1485,6 +1487,15 @@ void hl_device_fini(struct hl_device *hdev)
}
}
+ /* Disable PCI access from device F/W so it won't send us additional
+ * interrupts. We disable MSI/MSI-X at the halt_engines function and we
+ * can't have the F/W sending us interrupts after that. We need to
+ * disable the access here because if the device is marked disable, the
+ * message won't be send. Also, in case of heartbeat, the device CPU is
+ * marked as disable so this message won't be sent
+ */
+ hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
+
/* Mark device as disabled */
hdev->disabled = true;
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 0e1c629e9800..c9a12980218a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -402,6 +402,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
}
counters->rx_throughput = result;
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+
/* Fetch PCI tx counter */
pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -414,6 +418,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
counters->tx_throughput = result;
/* Fetch PCI replay counter */
+ memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
@@ -627,25 +632,38 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
security_status = RREG32(cpu_security_boot_status_reg);
/* We read security status multiple times during boot:
- * 1. preboot - we check if fw security feature is supported
- * 2. boot cpu - we get boot cpu security status
- * 3. FW application - we get FW application security status
+ * 1. preboot - a. Check whether the security status bits are valid
+ * b. Check whether fw security is enabled
+ * c. Check whether hard reset is done by preboot
+ * 2. boot cpu - a. Fetch boot cpu security status
+ * b. Check whether hard reset is done by boot cpu
+ * 3. FW application - a. Fetch fw application security status
+ * b. Check whether hard reset is done by fw app
*
* Preboot:
* Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
* check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
*/
if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
- hdev->asic_prop.fw_security_status_valid = 1;
- prop->fw_security_disabled =
- !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
+ prop->fw_security_status_valid = 1;
+
+ if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
+ prop->fw_security_disabled = false;
+ else
+ prop->fw_security_disabled = true;
+
+ if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+ prop->hard_reset_done_by_fw = true;
} else {
- hdev->asic_prop.fw_security_status_valid = 0;
+ prop->fw_security_status_valid = 0;
prop->fw_security_disabled = true;
}
+ dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
+ prop->hard_reset_done_by_fw ? "enabled" : "disabled");
+
dev_info(hdev->dev, "firmware-level security is %s\n",
- prop->fw_security_disabled ? "disabled" : "enabled");
+ prop->fw_security_disabled ? "disabled" : "enabled");
return 0;
}
@@ -655,6 +673,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 status;
int rc;
@@ -723,11 +742,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
/* Read U-Boot version now in case we will later fail */
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
+ /* Clear reset status since we need to read it again from boot CPU */
+ prop->hard_reset_done_by_fw = false;
+
/* Read boot_cpu security bits */
- if (hdev->asic_prop.fw_security_status_valid)
- hdev->asic_prop.fw_boot_cpu_security_map =
+ if (prop->fw_security_status_valid) {
+ prop->fw_boot_cpu_security_map =
RREG32(cpu_security_boot_status_reg);
+ if (prop->fw_boot_cpu_security_map &
+ CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+ prop->hard_reset_done_by_fw = true;
+ }
+
+ dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
+ prop->hard_reset_done_by_fw ? "enabled" : "disabled");
+
if (rc) {
detect_cpu_boot_status(hdev, status);
rc = -EIO;
@@ -796,18 +826,21 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
goto out;
}
+ /* Clear reset status since we need to read again from app */
+ prop->hard_reset_done_by_fw = false;
+
/* Read FW application security bits */
- if (hdev->asic_prop.fw_security_status_valid) {
- hdev->asic_prop.fw_app_security_map =
+ if (prop->fw_security_status_valid) {
+ prop->fw_app_security_map =
RREG32(cpu_security_boot_status_reg);
- if (hdev->asic_prop.fw_app_security_map &
+ if (prop->fw_app_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
- hdev->asic_prop.hard_reset_done_by_fw = true;
+ prop->hard_reset_done_by_fw = true;
}
- dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
- hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
+ dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
+ prop->hard_reset_done_by_fw ? "enabled" : "disabled");
dev_info(hdev->dev, "Successfully loaded firmware to device\n");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 571eda6ef5ab..60e16dc4bcac 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -944,7 +944,7 @@ struct hl_asic_funcs {
u32 (*get_signal_cb_size)(struct hl_device *hdev);
u32 (*get_wait_cb_size)(struct hl_device *hdev);
u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
- u32 size);
+ u32 size, bool eb);
u32 (*gen_wait_cb)(struct hl_device *hdev,
struct hl_gen_wait_properties *prop);
void (*reset_sob)(struct hl_device *hdev, void *data);
@@ -1000,6 +1000,7 @@ struct hl_va_range {
* @queue_full_drop_cnt: dropped due to queue full
* @device_in_reset_drop_cnt: dropped due to device in reset
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ * @validation_drop_cnt: dropped due to error in validation
*/
struct hl_cs_counters_atomic {
atomic64_t out_of_mem_drop_cnt;
@@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic {
atomic64_t queue_full_drop_cnt;
atomic64_t device_in_reset_drop_cnt;
atomic64_t max_cs_in_flight_drop_cnt;
+ atomic64_t validation_drop_cnt;
};
/**
@@ -2180,6 +2182,7 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops);
+bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst, u32 src_offset, u32 size);
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 6bbb6bca6860..032d114f01ea 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = {
.id_table = ids,
.probe = hl_pci_probe,
.remove = hl_pci_remove,
+ .shutdown = hl_pci_remove,
.driver.pm = &hl_pm_ops,
.err_handler = &hl_pci_err_handler,
};
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 32e6af1db4e3..d25892d61ec9 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -133,6 +133,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
&hw_idle.busy_engines_mask_ext, NULL);
+ hw_idle.busy_engines_mask =
+ lower_32_bits(hw_idle.busy_engines_mask_ext);
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
@@ -335,6 +337,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
atomic64_read(&cntr->device_in_reset_drop_cnt);
cs_counters.total_max_cs_in_flight_drop_cnt =
atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
+ cs_counters.total_validation_drop_cnt =
+ atomic64_read(&cntr->validation_drop_cnt);
if (hpriv->ctx) {
cs_counters.ctx_out_of_mem_drop_cnt =
@@ -352,6 +356,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
cs_counters.ctx_max_cs_in_flight_drop_cnt =
atomic64_read(
&hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
+ cs_counters.ctx_validation_drop_cnt =
+ atomic64_read(
+ &hpriv->ctx->cs_counters.validation_drop_cnt);
}
return copy_to_user(out, &cs_counters,
@@ -406,7 +413,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
- struct hl_pll_frequency_info freq_info = {0};
+ struct hl_pll_frequency_info freq_info = { {0} };
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
int rc;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 7caf868d1585..76217258780a 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev,
"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+ /* we set an EB since we must make sure all oeprations are done
+ * when sending the signal
+ */
hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
- cs_cmpl->hw_sob->sob_id, 0);
+ cs_cmpl->hw_sob->sob_id, 0, true);
kref_get(&hw_sob->kref);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index cbe9da4e0211..5d4fbdcaefe3 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -886,8 +886,10 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
{
struct hl_device *hdev = ctx->hdev;
u64 next_vaddr, i;
+ bool is_host_addr;
u32 page_size;
+ is_host_addr = !hl_is_dram_va(hdev, vaddr);
page_size = phys_pg_pack->page_size;
next_vaddr = vaddr;
@@ -900,9 +902,13 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
/*
* unmapping on Palladium can be really long, so avoid a CPU
* soft lockup bug by sleeping a little between unmapping pages
+ *
+ * In addition, when unmapping host memory we pass through
+ * the Linux kernel to unpin the pages and that takes a long
+ * time. Therefore, sleep every 32K pages to avoid soft lockup
*/
- if (hdev->pldm)
- usleep_range(500, 1000);
+ if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
+ usleep_range(50, 200);
}
}
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c
index 33ae953d3a36..28a4638741d8 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -9,7 +9,7 @@
#include "habanalabs.h"
-static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
+bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -156,7 +156,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
if (!hdev->mmu_enable)
return 0;
- is_dram_addr = is_dram_va(hdev, virt_addr);
+ is_dram_addr = hl_is_dram_va(hdev, virt_addr);
if (is_dram_addr)
mmu_prop = &prop->dmmu;
@@ -236,7 +236,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
if (!hdev->mmu_enable)
return 0;
- is_dram_addr = is_dram_va(hdev, virt_addr);
+ is_dram_addr = hl_is_dram_va(hdev, virt_addr);
if (is_dram_addr)
mmu_prop = &prop->dmmu;
diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c
index 2ce6ea89d4fa..06d8a44dd5d4 100644
--- a/drivers/misc/habanalabs/common/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu_v1.c
@@ -467,8 +467,16 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
{
/* MMU H/W fini was already done in device hw_fini() */
- kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
- gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+ if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
+ kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+ }
+
+ /* Make sure that if we arrive here again without init was called we
+ * won't cause kernel panic. This can happen for example if we fail
+ * during hard reset code at certain points
+ */
+ hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
}
/**
diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c
index 923b2606e29f..b4725e6101f6 100644
--- a/drivers/misc/habanalabs/common/pci.c
+++ b/drivers/misc/habanalabs/common/pci.c
@@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
return 0;
- if (val & PCI_CONFIG_ELBI_STS_ERR) {
- dev_err(hdev->dev, "Error writing to ELBI\n");
+ if (val & PCI_CONFIG_ELBI_STS_ERR)
return -EIO;
- }
if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
dev_err(hdev->dev, "ELBI write didn't finish in time\n");
@@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
dbi_offset = addr & 0xFFF;
- rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
+ /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+ * in case the firmware security is enabled
+ */
+ hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
+
+ rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
data);
if (rc)
@@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
- /* Return the DBI window to the default location */
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+ /* Return the DBI window to the default location
+ * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+ * in case the firmware security is enabled
+ */
+ hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
if (rc)
dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
@@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
/* Enable */
rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
- /* Return the DBI window to the default location */
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+ /* Return the DBI window to the default location
+ * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+ * in case the firmware security is enabled
+ */
+ hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
return rc;
}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 1f1926607c5e..b328ddaa64ee 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -151,19 +151,6 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
[PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
};
-static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = {
- [CPU_PLL] = mmPSOC_CPU_PLL_NR,
- [PCI_PLL] = mmPSOC_PCI_PLL_NR,
- [SRAM_PLL] = mmSRAM_W_PLL_NR,
- [HBM_PLL] = mmPSOC_HBM_PLL_NR,
- [NIC_PLL] = mmNIC0_PLL_NR,
- [DMA_PLL] = mmDMA_W_PLL_NR,
- [MESH_PLL] = mmMESH_W_PLL_NR,
- [MME_PLL] = mmPSOC_MME_PLL_NR,
- [TPC_PLL] = mmPSOC_TPC_PLL_NR,
- [IF_PLL] = mmIF_W_PLL_NR
-};
-
static inline bool validate_packet_id(enum packet_id id)
{
switch (id) {
@@ -374,7 +361,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev);
static void gaudi_disable_clock_gating(struct hl_device *hdev);
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
- u32 size);
+ u32 size, bool eb);
static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
struct hl_gen_wait_properties *prop);
@@ -667,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev)
if (rc)
goto free_queue_props;
- if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
- dev_info(hdev->dev,
- "H/W state is dirty, must reset before initializing\n");
- hdev->asic_funcs->hw_fini(hdev, true);
- }
-
/* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware
*/
@@ -685,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev)
goto pci_fini;
}
+ if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+ dev_info(hdev->dev,
+ "H/W state is dirty, must reset before initializing\n");
+ hdev->asic_funcs->hw_fini(hdev, true);
+ }
+
return 0;
pci_fini:
@@ -703,93 +690,60 @@ static int gaudi_early_fini(struct hl_device *hdev)
}
/**
- * gaudi_fetch_pll_frequency - Fetch PLL frequency values
+ * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
*
* @hdev: pointer to hl_device structure
- * @pll_index: index of the pll to fetch frequency from
- * @pll_freq: pointer to store the pll frequency in MHz in each of the available
- * outputs. if a certain output is not available a 0 will be set
*
*/
-static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
- enum gaudi_pll_index pll_index,
- u16 *pll_freq_arr)
+static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
{
- u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel,
- pll_base_addr = gaudi_pll_base_addresses[pll_index];
- u16 freq = 0;
- int i, rc;
-
- if (hdev->asic_prop.fw_security_status_valid &&
- (hdev->asic_prop.fw_app_security_map &
- CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
- rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr);
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
+ u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
+ int rc;
- if (rc)
- return rc;
- } else if (hdev->asic_prop.fw_security_disabled) {
+ if (hdev->asic_prop.fw_security_disabled) {
/* Backward compatibility */
- nr = RREG32(pll_base_addr + PLL_NR_OFFSET);
- nf = RREG32(pll_base_addr + PLL_NF_OFFSET);
- od = RREG32(pll_base_addr + PLL_OD_OFFSET);
-
- for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) {
- div_fctr = RREG32(pll_base_addr +
- PLL_DIV_FACTOR_0_OFFSET + i * 4);
- div_sel = RREG32(pll_base_addr +
- PLL_DIV_SEL_0_OFFSET + i * 4);
+ div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
+ div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
+ nr = RREG32(mmPSOC_CPU_PLL_NR);
+ nf = RREG32(mmPSOC_CPU_PLL_NF);
+ od = RREG32(mmPSOC_CPU_PLL_OD);
- if (div_sel == DIV_SEL_REF_CLK ||
+ if (div_sel == DIV_SEL_REF_CLK ||
div_sel == DIV_SEL_DIVIDED_REF) {
- if (div_sel == DIV_SEL_REF_CLK)
- freq = PLL_REF_CLK;
- else
- freq = PLL_REF_CLK / (div_fctr + 1);
- } else if (div_sel == DIV_SEL_PLL_CLK ||
- div_sel == DIV_SEL_DIVIDED_PLL) {
- pll_clk = PLL_REF_CLK * (nf + 1) /
- ((nr + 1) * (od + 1));
- if (div_sel == DIV_SEL_PLL_CLK)
- freq = pll_clk;
- else
- freq = pll_clk / (div_fctr + 1);
- } else {
- dev_warn(hdev->dev,
- "Received invalid div select value: %d",
- div_sel);
- }
-
- pll_freq_arr[i] = freq;
+ if (div_sel == DIV_SEL_REF_CLK)
+ freq = PLL_REF_CLK;
+ else
+ freq = PLL_REF_CLK / (div_fctr + 1);
+ } else if (div_sel == DIV_SEL_PLL_CLK ||
+ div_sel == DIV_SEL_DIVIDED_PLL) {
+ pll_clk = PLL_REF_CLK * (nf + 1) /
+ ((nr + 1) * (od + 1));
+ if (div_sel == DIV_SEL_PLL_CLK)
+ freq = pll_clk;
+ else
+ freq = pll_clk / (div_fctr + 1);
+ } else {
+ dev_warn(hdev->dev,
+ "Received invalid div select value: %d",
+ div_sel);
+ freq = 0;
}
} else {
- dev_err(hdev->dev, "Failed to fetch PLL frequency values\n");
- return -EIO;
- }
+ rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
- return 0;
-}
-
-/**
- * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
- *
- * @hdev: pointer to hl_device structure
- *
- */
-static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u16 pll_freq[HL_PLL_NUM_OUTPUTS];
- int rc;
+ if (rc)
+ return rc;
- rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq);
- if (rc)
- return rc;
+ freq = pll_freq_arr[2];
+ }
- prop->psoc_timestamp_frequency = pll_freq[2];
- prop->psoc_pci_pll_nr = 0;
- prop->psoc_pci_pll_nf = 0;
- prop->psoc_pci_pll_od = 0;
- prop->psoc_pci_pll_div_factor = 0;
+ prop->psoc_timestamp_frequency = freq;
+ prop->psoc_pci_pll_nr = nr;
+ prop->psoc_pci_pll_nf = nf;
+ prop->psoc_pci_pll_od = od;
+ prop->psoc_pci_pll_div_factor = div_fctr;
return 0;
}
@@ -884,11 +838,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev)
size_t fw_size;
void *cpu_addr;
dma_addr_t dma_handle;
- int rc;
+ int rc, count = 5;
+again:
rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
+ if (rc == -EINTR && count-- > 0) {
+ msleep(50);
+ goto again;
+ }
+
if (rc) {
- dev_err(hdev->dev, "Firmware file %s is not found!\n",
+ dev_err(hdev->dev, "Failed to load firmware file %s\n",
GAUDI_TPC_FW_FILE);
goto out;
}
@@ -1110,7 +1070,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
prop->collective_sob_id, queue_id);
cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
- prop->collective_sob_id, cb_size);
+ prop->collective_sob_id, cb_size, false);
}
static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
@@ -2449,8 +2409,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
gaudi_init_e2e(hdev);
gaudi_init_hbm_cred(hdev);
- hdev->asic_funcs->disable_clock_gating(hdev);
-
for (tpc_id = 0, tpc_offset = 0;
tpc_id < TPC_NUMBER_OF_ENGINES;
tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
@@ -3462,6 +3420,9 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
if (hdev->in_debug)
return;
+ if (!hdev->asic_prop.fw_security_disabled)
+ return;
+
for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
enable = !!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i])));
@@ -3513,7 +3474,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
u32 qman_offset;
int i;
- if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
+ if (!hdev->asic_prop.fw_security_disabled)
return;
for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
@@ -3806,7 +3767,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
static void gaudi_pre_hw_init(struct hl_device *hdev)
{
/* Perform read from the device to make sure device is up */
- RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+ RREG32(mmHW_STATE);
if (hdev->asic_prop.fw_security_disabled) {
/* Set the access through PCI bars (Linux driver only) as
@@ -3847,6 +3808,13 @@ static int gaudi_hw_init(struct hl_device *hdev)
return rc;
}
+ /* In case the clock gating was enabled in preboot we need to disable
+ * it here before touching the MME/TPC registers.
+ * There is no need to take clk gating mutex because when this function
+ * runs, no other relevant code can run
+ */
+ hdev->asic_funcs->disable_clock_gating(hdev);
+
/* SRAM scrambler must be initialized after CPU is running from HBM */
gaudi_init_scrambler_sram(hdev);
@@ -3885,7 +3853,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
}
/* Perform read from the device to flush all configuration */
- RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+ RREG32(mmHW_STATE);
return 0;
@@ -3927,7 +3895,10 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
/* I don't know what is the state of the CPU so make sure it is
* stopped in any means necessary
*/
- WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+ if (hdev->asic_prop.hard_reset_done_by_fw)
+ WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
+ else
+ WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
@@ -3971,11 +3942,15 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
- }
- dev_info(hdev->dev,
- "Issued HARD reset command, going to wait %dms\n",
- reset_timeout_ms);
+ dev_info(hdev->dev,
+ "Issued HARD reset command, going to wait %dms\n",
+ reset_timeout_ms);
+ } else {
+ dev_info(hdev->dev,
+ "Firmware performs HARD reset, going to wait %dms\n",
+ reset_timeout_ms);
+ }
/*
* After hard reset, we can't poll the BTM_FSM register because the PSOC
@@ -4027,7 +4002,8 @@ static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
VM_DONTCOPY | VM_NORESERVE;
- rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
+ rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
+ (dma_addr - HOST_PHYS_BASE), size);
if (rc)
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
@@ -7936,7 +7912,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
}
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
- u32 size)
+ u32 size, bool eb)
{
struct hl_cb *cb = (struct hl_cb *) data;
struct packet_msg_short *pkt;
@@ -7953,7 +7929,7 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
- ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
+ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb);
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index f2d91f4fcffe..a7ab2d7e57d4 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -105,13 +105,6 @@
#define MME_ACC_OFFSET (mmMME1_ACC_BASE - mmMME0_ACC_BASE)
#define SRAM_BANK_OFFSET (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE)
-#define PLL_NR_OFFSET 0
-#define PLL_NF_OFFSET (mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR)
-#define PLL_OD_OFFSET (mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR)
-#define PLL_DIV_FACTOR_0_OFFSET (mmPSOC_CPU_PLL_DIV_FACTOR_0 - \
- mmPSOC_CPU_PLL_NR)
-#define PLL_DIV_SEL_0_OFFSET (mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR)
-
#define NUM_OF_SOB_IN_BLOCK \
(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index 2e3612e1ee28..88a09d42e111 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -9,6 +9,7 @@
#include "../include/gaudi/gaudi_coresight.h"
#include "../include/gaudi/asic_reg/gaudi_regs.h"
#include "../include/gaudi/gaudi_masks.h"
+#include "../include/gaudi/gaudi_reg_map.h"
#include <uapi/misc/habanalabs.h>
#define SPMU_SECTION_SIZE MME0_ACC_SPMU_MAX_OFFSET
@@ -874,7 +875,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data)
}
/* Perform read from the device to flush all configuration */
- RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+ RREG32(mmHW_STATE);
return rc;
}
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 3e5eb9e3d7bd..63679a747d2c 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev)
if (rc)
goto free_queue_props;
- if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
- dev_info(hdev->dev,
- "H/W state is dirty, must reset before initializing\n");
- hdev->asic_funcs->hw_fini(hdev, true);
- }
-
/* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware
*/
@@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev)
goto pci_fini;
}
+ if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+ dev_info(hdev->dev,
+ "H/W state is dirty, must reset before initializing\n");
+ hdev->asic_funcs->hw_fini(hdev, true);
+ }
+
if (!hdev->pldm) {
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
@@ -694,32 +694,47 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
static void goya_fetch_psoc_frequency(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u32 trace_freq = 0;
- u32 pll_clk = 0;
- u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
- u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
- u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
- u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
- u32 od = RREG32(mmPSOC_PCI_PLL_OD);
-
- if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
- if (div_sel == DIV_SEL_REF_CLK)
- trace_freq = PLL_REF_CLK;
- else
- trace_freq = PLL_REF_CLK / (div_fctr + 1);
- } else if (div_sel == DIV_SEL_PLL_CLK ||
- div_sel == DIV_SEL_DIVIDED_PLL) {
- pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
- if (div_sel == DIV_SEL_PLL_CLK)
- trace_freq = pll_clk;
- else
- trace_freq = pll_clk / (div_fctr + 1);
+ u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
+ u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
+ int rc;
+
+ if (hdev->asic_prop.fw_security_disabled) {
+ div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+ div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
+ nr = RREG32(mmPSOC_PCI_PLL_NR);
+ nf = RREG32(mmPSOC_PCI_PLL_NF);
+ od = RREG32(mmPSOC_PCI_PLL_OD);
+
+ if (div_sel == DIV_SEL_REF_CLK ||
+ div_sel == DIV_SEL_DIVIDED_REF) {
+ if (div_sel == DIV_SEL_REF_CLK)
+ freq = PLL_REF_CLK;
+ else
+ freq = PLL_REF_CLK / (div_fctr + 1);
+ } else if (div_sel == DIV_SEL_PLL_CLK ||
+ div_sel == DIV_SEL_DIVIDED_PLL) {
+ pll_clk = PLL_REF_CLK * (nf + 1) /
+ ((nr + 1) * (od + 1));
+ if (div_sel == DIV_SEL_PLL_CLK)
+ freq = pll_clk;
+ else
+ freq = pll_clk / (div_fctr + 1);
+ } else {
+ dev_warn(hdev->dev,
+ "Received invalid div select value: %d",
+ div_sel);
+ freq = 0;
+ }
} else {
- dev_warn(hdev->dev,
- "Received invalid div select value: %d", div_sel);
+ rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr);
+
+ if (rc)
+ return;
+
+ freq = pll_freq_arr[1];
}
- prop->psoc_timestamp_frequency = trace_freq;
+ prop->psoc_timestamp_frequency = freq;
prop->psoc_pci_pll_nr = nr;
prop->psoc_pci_pll_nf = nf;
prop->psoc_pci_pll_od = od;
@@ -2704,7 +2719,8 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
VM_DONTCOPY | VM_NORESERVE;
- rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
+ rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
+ (dma_addr - HOST_PHYS_BASE), size);
if (rc)
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
@@ -5324,7 +5340,7 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev)
}
static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
- u32 size)
+ u32 size, bool eb)
{
return 0;
}
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index e5801ecf0cb2..b637dfd69f6e 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -145,11 +145,15 @@
* implemented. This means that FW will
* perform hard reset procedure on
* receiving the halt-machine event.
- * Initialized in: linux
+ * Initialized in: preboot, u-boot, linux
*
* CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled.
* Initialized in: linux
*
+ * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled.
+ * FW initialized Clock Gating.
+ * Initialized in: preboot
+ *
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
* This is a main indication that the
* running FW populates the device status
@@ -171,6 +175,7 @@
#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9)
#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10)
#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11)
+#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
enum cpu_boot_status {
@@ -204,6 +209,8 @@ enum kmd_msg {
KMD_MSG_GOTO_WFE,
KMD_MSG_FIT_RDY,
KMD_MSG_SKIP_BMC,
+ RESERVED,
+ KMD_MSG_RST_DEV,
};
enum cpu_msg_status {
diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c
index 951b37da5e3c..41cab297d66e 100644
--- a/drivers/misc/pvpanic.c
+++ b/drivers/misc/pvpanic.c
@@ -55,12 +55,23 @@ static int pvpanic_mmio_probe(struct platform_device *pdev)
struct resource *res;
res = platform_get_mem_or_io(pdev, 0);
- if (res && resource_type(res) == IORESOURCE_IO)
+ if (!res)
+ return -EINVAL;
+
+ switch (resource_type(res)) {
+ case IORESOURCE_IO:
base = devm_ioport_map(dev, res->start, resource_size(res));
- else
+ if (!base)
+ return -ENOMEM;
+ break;
+ case IORESOURCE_MEM:
base = devm_ioremap_resource(dev, res);
- if (IS_ERR(base))
- return PTR_ERR(base);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+ break;
+ default:
+ return -EINVAL;
+ }
atomic_notifier_chain_register(&panic_notifier_list,
&pvpanic_panic_nb);