summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/common/habanalabs_ioctl.c
diff options
context:
space:
mode:
authorDani Liberman <dliberman@habana.ai>2021-11-03 10:09:59 +0200
committerOded Gabbay <ogabbay@kernel.org>2021-12-26 08:59:05 +0200
commit3e55b5dbf929a40966b8eb7d4de94fad3bb404bd (patch)
treebb74b1f6065cf9253ca642dd8a4c1059b11d6e3a /drivers/misc/habanalabs/common/habanalabs_ioctl.c
parente2637fdca70aa5357b26c57e44fcec0ed673eb22 (diff)
downloadlinux-3e55b5dbf929a40966b8eb7d4de94fad3bb404bd.tar.bz2
habanalabs: add support for fetching historic errors
A new uAPI is added for debug purposes of the user-space to retrieve errors related data from previous session (before device reset was performed). Inforamtion is filled when a razwi or CS timeout happens and can contain one of the following: 1. Retrieve timestamp of last time the device was opened and razwi or CS timeout happened. 2. Retrieve information about last CS timeout. 3. Retrieve information about last razwi error. This information doesn't contain user data, so no danger of data leakage between users. Signed-off-by: Dani Liberman <dliberman@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc/habanalabs/common/habanalabs_ioctl.c')
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c60
1 files changed, 60 insertions, 0 deletions
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 68c655acdec8..360a1e9bbd5d 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -540,6 +540,57 @@ static int dram_replaced_rows_info(struct hl_fpriv *hpriv, struct hl_info_args *
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
+static int last_err_open_dev_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_info_last_err_open_dev_time info = {0};
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ info.timestamp = ktime_to_ns(hdev->last_error.open_dev_timestamp);
+
+ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+}
+
+static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_info_cs_timeout_event info = {0};
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ info.seq = hdev->last_error.cs_timeout_seq;
+ info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout_timestamp);
+
+ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+}
+
+static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ struct hl_info_razwi_event info = {0};
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ info.timestamp = ktime_to_ns(hdev->last_error.razwi_timestamp);
+ info.addr = hdev->last_error.razwi_addr;
+ info.engine_id_1 = hdev->last_error.razwi_engine_id_1;
+ info.engine_id_2 = hdev->last_error.razwi_engine_id_2;
+ info.no_engine_id = hdev->last_error.razwi_non_engine_initiator;
+ info.error_type = hdev->last_error.razwi_type;
+
+ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@@ -632,6 +683,15 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DRAM_PENDING_ROWS:
return dram_pending_rows_info(hpriv, args);
+ case HL_INFO_LAST_ERR_OPEN_DEV_TIME:
+ return last_err_open_dev_info(hpriv, args);
+
+ case HL_INFO_CS_TIMEOUT_EVENT:
+ return cs_timeout_info(hpriv, args);
+
+ case HL_INFO_RAZWI_EVENT:
+ return razwi_info(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;