diff options
author | Luben Tuikov <luben.tuikov@amd.com> | 2021-05-21 11:53:09 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2021-05-27 12:23:06 -0400 |
commit | 05adfd80cc52e0b4581e65bb5418de5dfd24d105 (patch) | |
tree | d6634299bb56f99f115e49d8a8059bbe56875894 /drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | |
parent | a46751fbcde505e6aff8622e17995092c8d86ae4 (diff) | |
download | linux-05adfd80cc52e0b4581e65bb5418de5dfd24d105.tar.bz2 |
drm/amdgpu: Use delayed work to collect RAS error counters
On Context Query2 IOCTL return the correctable and
uncorrectable errors in O(1) fashion, from cached
values, and schedule a delayed work function to
calculate and cache them for the next such IOCTL.
v2: Cancel pending delayed work at ras_fini().
v3: Remove conditionals when dealing with delayed
work manipulation as they're inherently racy.
Cc: Alexander Deucher <Alexander.Deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: John Clements <john.clements@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 31 |
1 files changed, 29 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index bb0cfe871aba..e7a010b7ca1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -331,10 +331,13 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } +#define AMDGPU_RAS_COUNTE_DELAY_MS 3000 + static int amdgpu_ctx_query2(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv, uint32_t id, - union drm_amdgpu_ctx_out *out) + struct amdgpu_fpriv *fpriv, uint32_t id, + union drm_amdgpu_ctx_out *out) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr; @@ -361,6 +364,30 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; + if (adev->ras_enabled && con) { + /* Return the cached values in O(1), + * and schedule delayed work to cache + * new vaues. + */ + int ce_count, ue_count; + + ce_count = atomic_read(&con->ras_ce_count); + ue_count = atomic_read(&con->ras_ue_count); + + if (ce_count != ctx->ras_counter_ce) { + ctx->ras_counter_ce = ce_count; + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; + } + + if (ue_count != ctx->ras_counter_ue) { + ctx->ras_counter_ue = ue_count; + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; + } + + schedule_delayed_work(&con->ras_counte_delay_work, + msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS)); + } + mutex_unlock(&mgr->lock); return 0; } |