summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c59
1 files changed, 31 insertions, 28 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 2932ade7dbd0..ee41d5592c51 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1544,24 +1544,6 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
gfx_v10_0_init_compute_vmid(adev);
- mutex_lock(&adev->grbm_idx_mutex);
- /*
- * making sure that the following register writes will be broadcasted
- * to all the shaders
- */
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-
- tmp = REG_SET_FIELD(0, PA_SC_FIFO_SIZE, SC_FRONTEND_PRIM_FIFO_SIZE,
- adev->gfx.config.sc_prim_fifo_size_frontend);
- tmp = REG_SET_FIELD(tmp, PA_SC_FIFO_SIZE, SC_BACKEND_PRIM_FIFO_SIZE,
- adev->gfx.config.sc_prim_fifo_size_backend);
- tmp = REG_SET_FIELD(tmp, PA_SC_FIFO_SIZE, SC_HIZ_TILE_FIFO_SIZE,
- adev->gfx.config.sc_hiz_tile_fifo_size);
- tmp = REG_SET_FIELD(tmp, PA_SC_FIFO_SIZE, SC_EARLYZ_TILE_FIFO_SIZE,
- adev->gfx.config.sc_earlyz_tile_fifo_size);
- WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, tmp);
-
- mutex_unlock(&adev->grbm_idx_mutex);
}
static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
@@ -4215,6 +4197,15 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
+ /* Prevent a hw deadlock due to a wave ID mismatch between ME and GDS.
+ * This resets the wave ID counters. (needed by transform feedback)
+ * TODO: This might only be needed on a VMID switch when we change
+ * the GDS OA mapping, not sure.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmVGT_GS_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.vgt_gs_max_wave_id);
+
if (ib->flags & AMDGPU_IB_FLAG_CE)
header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2);
else
@@ -4252,6 +4243,22 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -4278,11 +4285,7 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
PACKET3_RELEASE_MEM_GCR_GL2_WB |
- PACKET3_RELEASE_MEM_GCR_GL2_INV |
- PACKET3_RELEASE_MEM_GCR_GL2_US |
- PACKET3_RELEASE_MEM_GCR_GL1_INV |
- PACKET3_RELEASE_MEM_GCR_GLV_INV |
- PACKET3_RELEASE_MEM_GCR_GLM_INV |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
PACKET3_RELEASE_MEM_GCR_GLM_WB |
PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
@@ -4948,7 +4951,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2, /* SWITCH_BUFFER */
- .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
+ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_gfx */
.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
.emit_fence = gfx_v10_0_ring_emit_fence,
.emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
@@ -4987,7 +4990,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
- .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_compute */
+ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence,
.emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
@@ -5020,7 +5023,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
- .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_compute */
+ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence_kiq,
.test_ring = gfx_v10_0_ring_test_ring,
@@ -5096,10 +5099,10 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
/* init asic gds info */
switch (adev->asic_type) {
case CHIP_NAVI10:
- adev->gds.gds_size = 0x10000;
- break;
default:
adev->gds.gds_size = 0x10000;
+ adev->gds.gds_compute_max_wave_id = 0x4ff;
+ adev->gds.vgt_gs_max_wave_id = 0x3ff;
break;
}