diff options
author | Omer Shpigelman <oshpigelman@habana.ai> | 2020-05-24 23:06:59 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2020-05-25 08:17:57 +0300 |
commit | 8ff5f4fd40df9525675ea0e512da4cec65d646eb (patch) | |
tree | fc7ae7c7cae4bfda1d96d9fc075d61414980a011 /drivers/misc/habanalabs/gaudi/gaudi.c | |
parent | 36fafe87edd636292a4ed6a3af9608f2c7d0d0fb (diff) | |
download | linux-8ff5f4fd40df9525675ea0e512da4cec65d646eb.tar.bz2 |
habanalabs: handle MMU cache invalidation timeout
MMU cache invalidation timeout indicates that the device is unstable and
therefore unusable.
Hence in such case do hard reset and return an error to the user if was
called from ioctl.
In addition, change the print to error level and rephrase its text.
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/misc/habanalabs/gaudi/gaudi.c')
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 92a5130f06fb..61f88e9884ce 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -5975,7 +5975,7 @@ static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, return gaudi->events_stat; } -static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, +static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -5984,15 +5984,15 @@ static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || hdev->hard_reset_pending) - return; - - mutex_lock(&hdev->mmu_cache_lock); + return 0; if (hdev->pldm) timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; else timeout_usec = MMU_CONFIG_TIMEOUT_USEC; + mutex_lock(&hdev->mmu_cache_lock); + /* L0 & L1 invalidation */ WREG32(mmSTLB_INV_PS, 2); @@ -6006,14 +6006,18 @@ static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, WREG32(mmSTLB_INV_SET, 0); - if (rc) - dev_notice_ratelimited(hdev->dev, - "Timeout when waiting for MMU cache invalidation\n"); - mutex_unlock(&hdev->mmu_cache_lock); + + if (rc) { + dev_err_ratelimited(hdev->dev, + "MMU cache invalidation timeout\n"); + hl_device_reset(hdev, true, false); + } + + return rc; } -static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, +static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -6024,7 +6028,7 @@ static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || hdev->hard_reset_pending) - return; + return 0; mutex_lock(&hdev->mmu_cache_lock); @@ -6055,11 +6059,15 @@ static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 1000, timeout_usec); - if (rc) - dev_notice_ratelimited(hdev->dev, - "Timeout when waiting for MMU cache invalidation\n"); - mutex_unlock(&hdev->mmu_cache_lock); + + if (rc) { + dev_err_ratelimited(hdev->dev, + "MMU cache invalidation timeout\n"); + hl_device_reset(hdev, true, false); + } + + return rc; } static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, |