summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/goya
diff options
context:
space:
mode:
authorYuri Nudelman <ynudelman@habana.ai>2021-06-06 10:28:51 +0300
committerOded Gabbay <ogabbay@kernel.org>2021-08-29 09:47:46 +0300
commit938b793fdede518e10c67dc1dcfba1804faf98a6 (patch)
tree640575a63c1b0c204cdbacab5f2d0bb1c4cce3d7 /drivers/misc/habanalabs/goya
parente79e745b208b3c709cc5e689e587d04a4c0fe1bb (diff)
downloadlinux-938b793fdede518e10c67dc1dcfba1804faf98a6.tar.bz2
habanalabs: expose state dump
To improve the user's ability to debug the case where a workload that is part of executing training/inference of a topology is getting stuck, we need to add a 'core dump' each time a CS times-out. The 'core dump' shall contain all relevant Sync Manager information and corresponding fence values. The most recent dumps shall be accessible via debugfs, under 'state_dump' node. Reading from the node will provide the oldest dump available. Writing an integer value X will discard X dumps, starting with the oldest one, i.e. subsequent read will now return newer dumps. Signed-off-by: Yuri Nudelman <ynudelman@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc/habanalabs/goya')
-rw-r--r--drivers/misc/habanalabs/goya/goya.c24
1 files changed, 23 insertions, 1 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 755e08cf2ecc..2c3d642d31ab 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -350,6 +350,8 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
};
+static s64 goya_state_dump_specs_props[SP_MAX] = {0};
+
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
@@ -5524,6 +5526,25 @@ static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
}
}
+static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
+ struct hl_sync_to_engine_map *map)
+{
+ /* Not implemented */
+ return 0;
+}
+
+
+static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
+ .gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
+};
+
+static void goya_state_dump_init(struct hl_device *hdev)
+{
+ /* Not implemented */
+ hdev->state_dump_specs.props = goya_state_dump_specs_props;
+ hdev->state_dump_specs.funcs = goya_state_dump_funcs;
+}
+
static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init,
.early_fini = goya_early_fini,
@@ -5609,7 +5630,8 @@ static const struct hl_asic_funcs goya_funcs = {
.enable_events_from_fw = goya_enable_events_from_fw,
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
.init_firmware_loader = goya_init_firmware_loader,
- .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram
+ .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
+ .state_dump_init = goya_state_dump_init,
};
/*