From 9f201aba56b92c3daa4b76efae056ddbb80d91e6 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 6 Apr 2019 15:33:38 +0300 Subject: habanalabs: prevent device PTE read/write during hard-reset During hard-reset, contexts are closed as part of the tear-down process. After a context is closed, the driver cleans up the page tables of that context in the device's DRAM. This action is both dangerous and unnecessary. It is unnecessary, because the device is going through a hard-reset, which means the device's DRAM contents are no longer valid and the device's MMU is being reset. It is dangerous, because if the hard-reset came as a result of a PCI freeze, this action may cause the entire host machine to hang. Therefore, prevent all device PTE updates when a hard-reset operation is pending. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index aaa05662ca9f..5100dfbf3acc 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4058,6 +4058,9 @@ static u64 goya_read_pte(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; + if (hdev->hard_reset_pending) + return U64_MAX; + return readq(hdev->pcie_bar[DDR_BAR_ID] + (addr - goya->ddr_bar_cur_addr)); } @@ -4066,6 +4069,9 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val) { struct goya_device *goya = hdev->asic_specific; + if (hdev->hard_reset_pending) + return; + writeq(val, hdev->pcie_bar[DDR_BAR_ID] + (addr - goya->ddr_bar_cur_addr)); } -- cgit v1.2.3