summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/gaudi/gaudi.c
diff options
context:
space:
mode:
authorOmer Shpigelman <oshpigelman@habana.ai>2020-05-24 23:06:59 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2020-05-25 08:17:57 +0300
commit8ff5f4fd40df9525675ea0e512da4cec65d646eb (patch)
treefc7ae7c7cae4bfda1d96d9fc075d61414980a011 /drivers/misc/habanalabs/gaudi/gaudi.c
parent36fafe87edd636292a4ed6a3af9608f2c7d0d0fb (diff)
downloadlinux-8ff5f4fd40df9525675ea0e512da4cec65d646eb.tar.bz2
habanalabs: handle MMU cache invalidation timeout
MMU cache invalidation timeout indicates that the device is unstable and therefore unusable. Hence in such case do hard reset and return an error to the user if was called from ioctl. In addition, change the print to error level and rephrase its text. Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/misc/habanalabs/gaudi/gaudi.c')
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c36
1 files changed, 22 insertions, 14 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 92a5130f06fb..61f88e9884ce 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5975,7 +5975,7 @@ static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
return gaudi->events_stat;
}
-static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
+static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
u32 flags)
{
struct gaudi_device *gaudi = hdev->asic_specific;
@@ -5984,15 +5984,15 @@ static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
hdev->hard_reset_pending)
- return;
-
- mutex_lock(&hdev->mmu_cache_lock);
+ return 0;
if (hdev->pldm)
timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
+ mutex_lock(&hdev->mmu_cache_lock);
+
/* L0 & L1 invalidation */
WREG32(mmSTLB_INV_PS, 2);
@@ -6006,14 +6006,18 @@ static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
WREG32(mmSTLB_INV_SET, 0);
- if (rc)
- dev_notice_ratelimited(hdev->dev,
- "Timeout when waiting for MMU cache invalidation\n");
-
mutex_unlock(&hdev->mmu_cache_lock);
+
+ if (rc) {
+ dev_err_ratelimited(hdev->dev,
+ "MMU cache invalidation timeout\n");
+ hl_device_reset(hdev, true, false);
+ }
+
+ return rc;
}
-static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
+static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
bool is_hard, u32 asid, u64 va, u64 size)
{
struct gaudi_device *gaudi = hdev->asic_specific;
@@ -6024,7 +6028,7 @@ static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
hdev->hard_reset_pending)
- return;
+ return 0;
mutex_lock(&hdev->mmu_cache_lock);
@@ -6055,11 +6059,15 @@ static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
1000,
timeout_usec);
- if (rc)
- dev_notice_ratelimited(hdev->dev,
- "Timeout when waiting for MMU cache invalidation\n");
-
mutex_unlock(&hdev->mmu_cache_lock);
+
+ if (rc) {
+ dev_err_ratelimited(hdev->dev,
+ "MMU cache invalidation timeout\n");
+ hl_device_reset(hdev, true, false);
+ }
+
+ return rc;
}
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,