diff options
author | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2020-12-15 11:05:43 +0100 |
---|---|---|
committer | Maarten Lankhorst <maarten.lankhorst@linux.intel.com> | 2020-12-15 11:05:43 +0100 |
commit | ae75a0431f822273ce5d3a574863c67503db5775 (patch) | |
tree | 1361bf5e24d1d38f993c08583f42eda4fb28b42e /drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | |
parent | 91def3cce986238db7c06af083b8e9e30372a895 (diff) | |
parent | 5fbd41d3bf123af6a135bdea564087ec0f563eb0 (diff) | |
download | linux-ae75a0431f822273ce5d3a574863c67503db5775.tar.bz2 |
Merge drm/drm-next into drm-misc-next
Required backmerge since we will be based on top of v5.11, and there
has been a request to backmerge already to upstream some features.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 246 |
1 files changed, 215 insertions, 31 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 269002816109..def583916294 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -48,6 +48,7 @@ #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c #define VCN_INSTANCES_SIENNA_CICHLID 2 +#define DEC_SW_RING_ENABLED FALSE static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -155,6 +156,13 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; + /* + * Note: doorbell assignment is fixed for SRIOV multiple VCN engines + * Formula: + * vcn_db_base = adev->doorbell_index.vcn.vcn_ring0_1 << 1; + * dec_ring_i = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + * enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j + */ if (amdgpu_sriov_vf(adev)) { vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1; /* get DWORD offset */ @@ -162,6 +170,7 @@ static int vcn_v3_0_sw_init(void *handle) } for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -192,9 +201,7 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { - ring->doorbell_index = vcn_doorbell_index; - /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ - vcn_doorbell_index++; + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1); } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } @@ -216,9 +223,7 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[j]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { - ring->doorbell_index = vcn_doorbell_index; - /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ - vcn_doorbell_index++; + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j; } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } @@ -230,6 +235,11 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; } + + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) | + cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); + fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); } if (amdgpu_sriov_vf(adev)) { @@ -253,7 +263,17 @@ static int vcn_v3_0_sw_init(void *handle) static int vcn_v3_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sw_ring.is_enabled = false; + } if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); @@ -291,17 +311,19 @@ static int vcn_v3_0_hw_init(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - ring->wptr = 0; - ring->wptr_old = 0; - vcn_v3_0_dec_ring_set_wptr(ring); - ring->sched.ready = true; + if (ring->sched.ready) { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_dec_ring_set_wptr(ring); + } for (j = 0; j < adev->vcn.num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; - ring->wptr = 0; - ring->wptr_old = 0; - vcn_v3_0_enc_ring_set_wptr(ring); - ring->sched.ready = true; + if (ring->sched.ready) { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_enc_ring_set_wptr(ring); + } } } } else { @@ -461,6 +483,15 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); } static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) @@ -543,13 +574,16 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* non-cache window */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) @@ -902,6 +936,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -1011,6 +1046,12 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + /* set the write pointer delay */ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); @@ -1033,11 +1074,17 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + return 0; } static int vcn_v3_0_start(struct amdgpu_device *adev) { + volatile struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; int i, j, k, r; @@ -1180,6 +1227,9 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + /* programm the RB_BASE for ring buffer */ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); @@ -1192,19 +1242,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[i].ring_enc[0]; WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[i].ring_enc[1]; WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); } return 0; @@ -1227,12 +1283,12 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) uint32_t table_size; uint32_t size, size_dw; + bool is_vcn_ready; + struct mmsch_v3_0_cmd_direct_write direct_wt = { {0} }; struct mmsch_v3_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} }; - struct mmsch_v3_0_cmd_direct_polling - direct_poll = { {0} }; struct mmsch_v3_0_cmd_end end = { {0} }; struct mmsch_v3_0_init_header header; @@ -1240,8 +1296,6 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) MMSCH_COMMAND__DIRECT_REG_WRITE; direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; - direct_poll.cmd_header.command_type = - MMSCH_COMMAND__DIRECT_REG_POLLING; end.cmd_header.command_type = MMSCH_COMMAND__END; @@ -1364,7 +1418,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) MMSCH_V3_0_INSERT_END(); /* refine header */ - header.inst[i].init_status = 1; + header.inst[i].init_status = 0; header.inst[i].table_offset = header.total_size; header.inst[i].table_size = table_size; header.total_size += table_size; @@ -1422,6 +1476,30 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) } } + /* 6, check each VCN's init_status + * if it remains as 0, then this VCN is not assigned to current VF + * do not start ring for this VCN + */ + size = sizeof(struct mmsch_v3_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy(&header, (void *)table_loc, size); + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + is_vcn_ready = (header.inst[i].init_status == 1); + if (!is_vcn_ready) + DRM_INFO("VCN(%d) engine is disabled by hypervisor\n", i); + + ring = &adev->vcn.inst[i].ring_dec; + ring->sched.ready = is_vcn_ready; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->sched.ready = is_vcn_ready; + } + } + return 0; } @@ -1531,6 +1609,7 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev) static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state) { + volatile struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t reg_data = 0; int ret_code; @@ -1556,23 +1635,36 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + /* Restore */ + fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + ring->wptr = 0; WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + ring->wptr = 0; WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1630,10 +1722,6 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, - lower_32_bits(ring->wptr) | 0x80000000); - if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); @@ -1642,6 +1730,98 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) } } +static void vcn_v3_0_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, uint32_t flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP); +} + +static void vcn_v3_0_dec_sw_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); +} + +static void vcn_v3_0_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + uint32_t vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB); + amdgpu_ring_write(ring, vmid); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v3_0_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v3_0_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint32_t vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v3_0_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v3_0_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0x3f, + .nop = VCN_DEC_SW_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = vcn_v3_0_dec_ring_get_rptr, + .get_wptr = vcn_v3_0_dec_ring_get_wptr, + .set_wptr = vcn_v3_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v3_0_dec_sw_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v3_0_dec_sw_ring_emit_fdec_swe x2 vm fdec_swe */ + 1, /* vcn_v3_0_dec_sw_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v3_0_dec_sw_ring_emit_ib */ + .emit_ib = vcn_v3_0_dec_sw_ring_emit_ib, + .emit_fence = vcn_v3_0_dec_sw_ring_emit_fence, + .emit_vm_flush = vcn_v3_0_dec_sw_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, + .test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v3_0_dec_sw_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v3_0_dec_sw_ring_emit_wreg, + .emit_reg_wait = vcn_v3_0_dec_sw_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, @@ -1779,9 +1959,13 @@ static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; + if (!DEC_SW_RING_ENABLED) + adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; + else + adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; - DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i, + DEC_SW_RING_ENABLED?"(Software Ring)":""); } } |