summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/goya
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2020-07-07 17:30:13 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2020-07-10 19:53:03 +0300
commit788cacf308871db0a619952321bedfec8f1773e2 (patch)
tree4e507fb09ad60b5131d6f371e9e3780670fad83c /drivers/misc/habanalabs/goya
parente38bfd30e08802d9661efffb8c048bd53a3acfc4 (diff)
downloadlinux-788cacf308871db0a619952321bedfec8f1773e2.tar.bz2
habanalabs: set 4s timeout for message to device CPU
We see that sometimes the CPU in GOYA and GAUDI is occupied by the power/thermal loop and can't answer requests from the driver fast enough. Therefore, to avoid false notifications on timeouts, increase the timeout to 4 seconds on each message sent to the device CPU. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Diffstat (limited to 'drivers/misc/habanalabs/goya')
-rw-r--r--drivers/misc/habanalabs/goya/goya.c12
1 files changed, 8 insertions, 4 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 83f0c70f140b..88460b2138d8 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -88,6 +88,7 @@
#define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
#define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
+#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
#define GOYA_QMAN0_FENCE_VAL 0xD169B243
@@ -2830,6 +2831,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
return 0;
}
+ if (!timeout)
+ timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
+
return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
timeout, result);
}
@@ -4431,8 +4435,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
- rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size,
- HL_DEVICE_TIMEOUT_USEC, &result);
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
+ total_pkt_size, 0, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask IRQ array\n");
@@ -4464,8 +4468,8 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(event_type);
- rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- HL_DEVICE_TIMEOUT_USEC, &result);
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ 0, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);