summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_events.c
diff options
context:
space:
mode:
authorDennis Li <Dennis.Li@amd.com>2021-05-11 15:35:49 +0800
committerAlex Deucher <alexander.deucher@amd.com>2021-05-19 22:29:44 -0400
commite2b1f9f52bb630a076039064aa4cb7f55f3e5a14 (patch)
treee068e4a7c3ff8c5ea7dc9a03d8c73ca7186cd281 /drivers/gpu/drm/amd/amdkfd/kfd_events.c
parent2bb5b5f688cbbd5030629905d3ed8032ab46e79f (diff)
downloadlinux-e2b1f9f52bb630a076039064aa4cb7f55f3e5a14.tar.bz2
drm/amdkfd: refine the poison data consumption handling
The user applications maybe register the KFD_EVENT_TYPE_HW_EXCEPTION and KFD_EVENT_TYPE_MEMORY events, driver could notify them when poison data consumed. Beside that, some applications maybe register SIGBUS signal hander. These applications will handle poison data by themselves, exit or re-create context to re-dispatch works. Signed-off-by: Dennis Li <Dennis.Li@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_events.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c39
1 files changed, 39 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ba2c2ce0c55a..4d210f23c33c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1050,3 +1050,42 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
+
+void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
+{
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_hsa_memory_exception_data memory_exception_data;
+ struct kfd_hsa_hw_exception_data hw_exception_data;
+ struct kfd_event *ev;
+ uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+
+ if (!p)
+ return; /* Presumably process exited. */
+
+ memset(&hw_exception_data, 0, sizeof(hw_exception_data));
+ hw_exception_data.gpu_id = dev->id;
+ hw_exception_data.memory_lost = 1;
+ hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
+
+ memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+ memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
+ memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.failure.imprecise = true;
+
+ mutex_lock(&p->event_mutex);
+ idr_for_each_entry_continue(&p->event_idr, ev, id) {
+ if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ ev->hw_exception_data = hw_exception_data;
+ set_event(ev);
+ }
+
+ if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+ ev->memory_exception_data = memory_exception_data;
+ set_event(ev);
+ }
+ }
+ mutex_unlock(&p->event_mutex);
+
+ /* user application will handle SIGBUS signal */
+ send_sig(SIGBUS, p->lead_thread, 0);
+}