diff options
author | Oak Zeng <Oak.Zeng@amd.com> | 2019-02-19 14:59:51 -0600 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-07-18 14:18:03 -0500 |
commit | 2fb1e49fda46e78db38eb0bed5ebda3940564458 (patch) | |
tree | 31444b926918a3515d832e51be07211678e9a6d1 /drivers/gpu/drm/amd/amdkfd | |
parent | 3a65d14d259547794db679ce5741da182d6f0b2d (diff) | |
download | linux-2fb1e49fda46e78db38eb0bed5ebda3940564458.tar.bz2 |
drm/amdkfd: Support bigger gds size
Extend map_process and set_resources pm4 packet to support
bigger gds size for arcturus.
v2: Only make the change for v9
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 33 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h | 10 |
2 files changed, 36 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 91da72d0d405..9a4bafb2e175 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -81,7 +81,8 @@ static int pm_map_process_v9(struct packet_manager *pm, packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 1; packet->bitfields2.pasid = qpd->pqm->process->pasid; - packet->bitfields14.gds_size = qpd->gds_size; + packet->bitfields14.gds_size = qpd->gds_size & 0x3F; + packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; packet->bitfields14.num_gws = qpd->num_gws; packet->bitfields14.num_oac = qpd->num_oac; packet->bitfields14.sdma_enable = 1; @@ -143,6 +144,34 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } +static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) +{ + struct pm4_mes_set_resources *packet; + + packet = (struct pm4_mes_set_resources *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); + + packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_mes_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; + packet->bitfields2.vmid_mask = res->vmid_mask; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; + packet->bitfields7.oac_mask = res->oac_mask; + packet->bitfields8.gds_heap_base = res->gds_heap_base; + packet->bitfields8.gds_heap_size = res->gds_heap_size; + + packet->gws_mask_lo = lower_32_bits(res->gws_mask); + packet->gws_mask_hi = upper_32_bits(res->gws_mask); + + packet->queue_mask_lo = lower_32_bits(res->queue_mask); + packet->queue_mask_hi = upper_32_bits(res->queue_mask); + + return 0; +} + static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, struct queue *q, bool is_static) { @@ -344,7 +373,7 @@ static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) const struct packet_manager_funcs kfd_v9_pm_funcs = { .map_process = pm_map_process_v9, .runlist = pm_runlist_v9, - .set_resources = pm_set_resources_vi, + .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, .query_status = pm_query_status_v9, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index 44ed94239513..4d7add843746 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -83,10 +83,10 @@ struct pm4_mes_set_resources { union { struct { - uint32_t gds_heap_base:6; - uint32_t reserved3:5; - uint32_t gds_heap_size:6; - uint32_t reserved4:15; + uint32_t gds_heap_base:10; + uint32_t reserved3:1; + uint32_t gds_heap_size:10; + uint32_t reserved4:11; } bitfields8; uint32_t ordinal8; }; @@ -179,7 +179,7 @@ struct pm4_mes_map_process { uint32_t num_gws:7; uint32_t sdma_enable:1; uint32_t num_oac:4; - uint32_t reserved8:4; + uint32_t gds_size_hi:4; uint32_t gds_size:6; uint32_t num_queues:10; } bitfields14; |