diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 348 |
1 files changed, 314 insertions, 34 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index ca14c3ef742e..897a5ce9c9da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -30,6 +30,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "mmsch_v4_0.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -45,6 +46,8 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN_HARVEST_MMSCH 0 + #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 @@ -53,12 +56,14 @@ static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN1 }; +static int vcn_v4_0_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); +static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring); /** * vcn_v4_0_early_init - set function pointers @@ -71,6 +76,9 @@ static int vcn_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + adev->vcn.harvest_config = VCN_HARVEST_MMSCH; + /* re-use enc ring as unified ring */ adev->vcn.num_enc_rings = 1; @@ -92,6 +100,7 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + int vcn_doorbell_index = 0; r = amdgpu_vcn_sw_init(adev); if (r) @@ -103,6 +112,12 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + if (amdgpu_sriov_vf(adev)) { + vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET; + /* get DWORD offset */ + vcn_doorbell_index = vcn_doorbell_index << 1; + } + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -119,7 +134,10 @@ static int vcn_v4_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; + if (amdgpu_sriov_vf(adev)) + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1; + else + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; sprintf(ring->name, "vcn_unified_%d", i); @@ -132,10 +150,23 @@ static int vcn_v4_0_sw_init(void *handle) fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + + if (amdgpu_sriov_vf(adev)) + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; @@ -169,6 +200,9 @@ static int vcn_v4_0_sw_fini(void *handle) drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -191,18 +225,42 @@ static int vcn_v4_0_hw_init(void *handle) struct amdgpu_ring *ring; int i, r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v4_0_start_sriov(adev); + if (r) + goto done; - ring = &adev->vcn.inst[i].ring_enc[0]; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + ring = &adev->vcn.inst[i].ring_enc[0]; + if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) { + ring->sched.ready = false; + ring->no_scheduler = true; + dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); + } else { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v4_0_unified_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + ring = &adev->vcn.inst[i].ring_enc[0]; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + + } } done: @@ -230,12 +288,14 @@ static int vcn_v4_0_hw_fini(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - - if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + if (!amdgpu_sriov_vf(adev)) { + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, regUVD_STATUS))) { vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } } + } return 0; @@ -1107,6 +1167,214 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) +{ + int i; + struct amdgpu_ring *ring_enc; + uint64_t cache_addr; + uint64_t rb_enc_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + uint32_t init_status; + uint32_t enabled_vcn; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_init_header header; + + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2; + for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { + header.inst[i].init_status = 0; + header.inst[i].table_offset = 0; + header.inst[i].table_size = 0; + } + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + table_size = 0; + + MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + offset = 0; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET0), + 0); + } else { + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET1), + 0); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE1), + AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET2), + 0); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE2), + AMDGPU_VCN_CONTEXT_SIZE); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + rb_setup = &fw_shared->rb_setup; + + ring_enc = &adev->vcn.inst[i].ring_enc[0]; + ring_enc->wptr = 0; + rb_enc_addr = ring_enc->gpu_addr; + + rb_setup->is_rb_enabled_flags |= RB_ENABLED; + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + + /* add end packet */ + MMSCH_V4_0_INSERT_END(); + + /* refine header */ + header.inst[i].init_status = 0; + header.inst[i].table_offset = header.total_size; + header.inst[i].table_size = table_size; + header.total_size += table_size; + } + + /* Update init table header in memory */ + size = sizeof(struct mmsch_v4_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + /* message MMSCH (in VCN[0]) to initialize this client + * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr + * of memory descriptor location + */ + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + /* 2, update vmid of descriptor */ + tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp); + + /* 3, notify mmsch about the size of this descriptor */ + size = header.total_size; + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0); + + /* 5, kick off the initialization and wait until + * MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + param = 0x00000001; + WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + while (resp != expected) { + resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP); + if (resp != 0) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; + init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status; + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); + + return 0; +} + /** * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode * @@ -1115,7 +1383,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) * * Stop VCN block with dpg mode */ -static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { uint32_t tmp; @@ -1133,7 +1401,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); - return 0; } /** @@ -1154,7 +1421,7 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v4_0_stop_dpg_mode(adev, i); + vcn_v4_0_stop_dpg_mode(adev, i); continue; } @@ -1328,21 +1595,23 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&p->entity->fence_seq)) + if (atomic_read(&job->base.entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] [AMDGPU_RING_PRIO_0].sched; - drm_sched_entity_modify_sched(p->entity, scheds, 1); + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); return 0; } -static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1413,7 +1682,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v4_0_limit_sched(p); + r = vcn_v4_0_limit_sched(p, job); if (r) goto out; } @@ -1426,32 +1695,34 @@ out: #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - struct amdgpu_job *job, - struct amdgpu_ib *ib) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + struct amdgpu_ring *ring = amdgpu_job_ring(job); + struct amdgpu_vcn_decode_buffer *decode_buffer; + uint64_t addr; uint32_t val; - int r = 0; /* The first instance can decode anything */ if (!ring->me) - return r; + return 0; /* unified queue ib header has 8 double words. */ if (ib->length_dw < 8) - return r; + return 0; val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + if (val != RADEON_VCN_ENGINE_TYPE_DECODE) + return 0; - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; - if (decode_buffer->valid_buf_flag & 0x1) - r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo); - } - return r; + if (!(decode_buffer->valid_buf_flag & 0x1)) + return 0; + + addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo; + return vcn_v4_0_dec_msg(p, job, addr); } static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { @@ -1597,6 +1868,15 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if(state == adev->vcn.cur_state) return 0; |