diff options
author | Thomas Zimmermann <tzimmermann@suse.de> | 2022-10-20 09:09:00 +0200 |
---|---|---|
committer | Thomas Zimmermann <tzimmermann@suse.de> | 2022-10-20 09:09:00 +0200 |
commit | 1aca5ce036e3499336d1a2ace3070f908381c055 (patch) | |
tree | 32b53fca3cff8c6c084d9c1d94d1761c3618e739 /drivers/gpu/drm/amd/amdgpu | |
parent | 01f2cf53844b01e691516b465df1b6ab01b03230 (diff) | |
parent | 9abf2313adc1ca1b6180c508c25f22f9395cc780 (diff) | |
download | linux-1aca5ce036e3499336d1a2ace3070f908381c055.tar.bz2 |
Merge drm/drm-fixes into drm-misc-fixes
Backmerging to get v6.1-rc1.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
121 files changed, 5131 insertions, 2367 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 5a283d12f8e1..6ad39cf71bdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -75,7 +75,7 @@ amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ - nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o + sienna_cichlid.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o # add DF block amdgpu-y += \ @@ -89,7 +89,7 @@ amdgpu-y += \ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \ mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \ - mmhub_v3_0_1.o + mmhub_v3_0_1.o gfxhub_v3_0_3.o # add UMC block amdgpu-y += \ @@ -134,7 +134,8 @@ amdgpu-y += \ gfx_v9_4_2.o \ gfx_v10_0.o \ imu_v11_0.o \ - gfx_v11_0.o + gfx_v11_0.o \ + imu_v11_0_3.o # add async DMA block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index c6cc493a5486..2b97b8a96fb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -148,30 +148,22 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; - struct list_head reset_device_list; int r = 0; dev_dbg(adev->dev, "aldebaran perform hw reset\n"); + + if (reset_device_list == NULL) + return -EINVAL; + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && reset_context->hive == NULL) { /* Wrong context, return error */ return -EINVAL; } - INIT_LIST_HEAD(&reset_device_list); - if (reset_context->hive) { - list_for_each_entry (tmp_adev, - &reset_context->hive->device_list, - gmc.xgmi.head) - list_add_tail(&tmp_adev->reset_list, - &reset_device_list); - } else { - list_add_tail(&reset_context->reset_req_dev->reset_list, - &reset_device_list); - } - - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { mutex_lock(&tmp_adev->reset_cntl->reset_lock); tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2; } @@ -179,7 +171,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch * them together so that they can be completed asynchronously on multiple nodes */ - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { /* For XGMI run all resets in parallel to speed up the process */ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { if (!queue_work(system_unbound_wq, @@ -197,7 +189,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, /* For XGMI wait for all resets to complete before proceed */ if (!r) { - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { flush_work(&tmp_adev->reset_cntl->reset_work); r = tmp_adev->asic_reset_res; @@ -207,7 +199,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, } } - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { mutex_unlock(&tmp_adev->reset_cntl->reset_lock); tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE; } @@ -339,10 +331,13 @@ static int aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { + struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; - struct list_head reset_device_list; int r; + if (reset_device_list == NULL) + return -EINVAL; + if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && reset_context->hive == NULL) { @@ -350,19 +345,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, return -EINVAL; } - INIT_LIST_HEAD(&reset_device_list); - if (reset_context->hive) { - list_for_each_entry (tmp_adev, - &reset_context->hive->device_list, - gmc.xgmi.head) - list_add_tail(&tmp_adev->reset_list, - &reset_device_list); - } else { - list_add_tail(&reset_context->reset_req_dev->reset_list, - &reset_device_list); - } - - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); r = aldebaran_mode2_restore_ip(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e146810c700b..ae9371b172e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -274,6 +274,9 @@ extern int amdgpu_vcnfw_log; #define AMDGPU_RESET_VCE (1 << 13) #define AMDGPU_RESET_VCE1 (1 << 14) +#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0) +#define AMDGPU_RESET_LEVEL_MODE2 (1 << 1) + /* max cursor sizes (in pixels) */ #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 @@ -317,7 +320,7 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; - +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 @@ -882,6 +885,7 @@ struct amdgpu_device { u64 fence_context; unsigned num_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; + struct dma_fence __rcu *gang_submit; bool ib_pool_ready; struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX]; struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; @@ -1060,6 +1064,9 @@ struct amdgpu_device { uint32_t scpm_status; struct work_struct reset_work; + + uint32_t amdgpu_reset_level_mask; + bool job_hang; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1288,6 +1295,8 @@ u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v); +struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, + struct dma_fence *gang); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 130060834b4e..b14800ac179e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2012 Advanced Micro Devices, Inc. * @@ -849,6 +850,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) struct amdgpu_display_manager *dm = &adev->dm; + if (dm->backlight_dev[0]) atif->bd = dm->backlight_dev[0]; #endif @@ -863,6 +865,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) && enc->enc_priv) { struct amdgpu_encoder_atom_dig *dig = enc->enc_priv; + if (dig->bl_dev) { atif->bd = dig->bl_dev; break; @@ -919,9 +922,9 @@ static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev) return false; status = acpi_get_handle(dhandle, "ATIF", &atif_handle); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status)) return false; - } + amdgpu_acpi_priv.atif.handle = atif_handle; acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer); DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name); @@ -954,9 +957,9 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) return false; status = acpi_get_handle(dhandle, "ATCS", &atcs_handle); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status)) return false; - } + amdgpu_acpi_priv.atcs.handle = atcs_handle; acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer); DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name); @@ -1050,6 +1053,10 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { if (adev->flags & AMD_IS_APU) return false; + + if (amdgpu_sriov_vf(adev)) + return false; + return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; } @@ -1066,6 +1073,12 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) (pm_suspend_target_state != PM_SUSPEND_TO_IDLE)) return false; + /* + * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally + * risky to do any special firmware-related preparations for entering + * S0ix even though the system is suspending to idle, so return false + * in that case. + */ if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) { dev_warn_once(adev->dev, "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5e53a5293935..03bbfaa51cbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014 Advanced Micro Devices, Inc. * @@ -74,9 +75,6 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; adev->kfd.dev = kgd2kfd_probe(adev, vf); - - if (adev->kfd.dev) - amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; } /** @@ -130,11 +128,13 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) kfd.reset_work); struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } @@ -198,6 +198,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources); + amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; + INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); } } @@ -207,6 +209,7 @@ void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev) if (adev->kfd.dev) { kgd2kfd_device_exit(adev->kfd.dev); adev->kfd.dev = NULL; + amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size; } } @@ -683,6 +686,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, ib->length_dw = ib_len; /* This works for NO_HWS. TODO: need to handle without knowing VMID */ job->vmid = vmid; + job->num_ibs = 1; ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); @@ -752,11 +756,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo { struct ras_err_data err_data = {0, 0, 0, NULL}; - /* CPU MCA will handle page retirement if connected_to_cpu is 1 */ - if (!adev->gmc.xgmi.connected_to_cpu) - amdgpu_umc_poison_handler(adev, &err_data, reset); - else if (reset) - amdgpu_amdkfd_gpu_reset(adev); + amdgpu_umc_poison_handler(adev, &err_data, reset); } bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 3c09dcc0986e..647220a8762d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence { struct amdgpu_kfd_dev { struct kfd_dev *dev; uint64_t vram_used; + uint64_t vram_used_aligned; bool init_complete; struct work_struct reset_work; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a699134a1e8c..978d3970b5cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014-2018 Advanced Micro Devices, Inc. * @@ -40,10 +41,10 @@ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 /* - * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB * BO chunk */ -#define VRAM_ALLOCATION_ALIGN (1 << 21) +#define VRAM_AVAILABLITY_ALIGN (1 << 21) /* Impose limit on how much memory KFD can use */ static struct { @@ -149,7 +150,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, * to avoid fragmentation caused by 4K allocations in the tail * 2M BO chunk. */ - vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); + vram_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = size; } else if (!(alloc_flag & @@ -182,8 +183,10 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, */ WARN_ONCE(vram_needed && !adev, "adev reference can't be null when vram is used"); - if (adev) + if (adev) { adev->kfd.vram_used += vram_needed; + adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); + } kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; @@ -203,8 +206,10 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { WARN_ONCE(!adev, "adev reference can't be null when alloc mem flags vram is set"); - if (adev) - adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); + if (adev) { + adev->kfd.vram_used -= size; + adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN); + } } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= size; } else if (!(alloc_flag & @@ -293,7 +298,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, */ replacement = dma_fence_get_stub(); dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, - replacement, DMA_RESV_USAGE_READ); + replacement, DMA_RESV_USAGE_BOOKKEEP); dma_fence_put(replacement); return 0; } @@ -1386,8 +1391,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; - amdgpu_bo_fence(vm->root.bo, - &vm->process_info->eviction_fence->base, true); + dma_resv_add_fence(vm->root.bo->tbo.base.resv, + &vm->process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); amdgpu_bo_unreserve(vm->root.bo); /* Update process info */ @@ -1611,12 +1617,12 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) spin_lock(&kfd_mem_limit.mem_limit_lock); available = adev->gmc.real_vram_size - - adev->kfd.vram_used + - adev->kfd.vram_used_aligned - atomic64_read(&adev->vram_pin_size) - reserved_for_pt; spin_unlock(&kfd_mem_limit.mem_limit_lock); - return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN); + return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN); } int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( @@ -1725,7 +1731,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { - pr_debug("creating userptr BO for user_addr = %llu\n", user_addr); + pr_debug("creating userptr BO for user_addr = %llx\n", user_addr); ret = init_user_pages(*mem, user_addr, criu_resume); if (ret) goto allocate_init_user_pages_failed; @@ -1984,9 +1990,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( } if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) - amdgpu_bo_fence(bo, - &avm->process_info->eviction_fence->base, - true); + dma_resv_add_fence(bo->tbo.base.resv, + &avm->process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); ret = unreserve_bo_and_vms(&ctx, false, false); goto out; @@ -2213,7 +2219,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, { if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; - mb(); + mb(); /* make sure read happened */ atomic_set(&adev->gmc.vm_fault_info_updated, 0); } return 0; @@ -2755,15 +2761,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (mem->bo->tbo.pin_count) continue; - amdgpu_bo_fence(mem->bo, - &process_info->eviction_fence->base, true); + dma_resv_add_fence(mem->bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); } /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { struct amdgpu_bo *bo = peer_vm->root.bo; - amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + dma_resv_add_fence(bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); } validate_map_fail: @@ -2817,7 +2826,9 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; - amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); + dma_resv_add_fence(gws_bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); amdgpu_bo_unreserve(gws_bo); mutex_unlock(&(*mem)->process_info->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index fd8f3731758e..b81b77a9efa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -314,7 +314,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, mem_channel_number = vram_info->v30.channel_num; mem_channel_width = vram_info->v30.channel_width; if (vram_width) - *vram_width = mem_channel_number * mem_channel_width; + *vram_width = mem_channel_number * (1 << mem_channel_width); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index b7933c2ce765..491d4846fc02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1674,10 +1674,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, adev->mode_info.dither_property, AMDGPU_FMT_DITHER_DISABLE); - if (amdgpu_audio != 0) + if (amdgpu_audio != 0) { drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; + } subpixel_order = SubPixelHorizontalRGB; connector->interlace_allowed = true; @@ -1799,6 +1801,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, @@ -1852,6 +1855,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, @@ -1902,6 +1906,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d8f1335bc68f..1bbd39b3b0fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -39,9 +39,82 @@ #include "amdgpu_gem.h" #include "amdgpu_ras.h" -static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, - struct drm_amdgpu_cs_chunk_fence *data, - uint32_t *offset) +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, + struct amdgpu_device *adev, + struct drm_file *filp, + union drm_amdgpu_cs *cs) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + + if (cs->in.num_chunks == 0) + return -EINVAL; + + memset(p, 0, sizeof(*p)); + p->adev = adev; + p->filp = filp; + + p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); + if (!p->ctx) + return -EINVAL; + + if (atomic_read(&p->ctx->guilty)) { + amdgpu_ctx_put(p->ctx); + return -ECANCELED; + } + return 0; +} + +static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_ib *chunk_ib) +{ + struct drm_sched_entity *entity; + unsigned int i; + int r; + + r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, + chunk_ib->ring, &entity); + if (r) + return r; + + /* + * Abort if there is no run queue associated with this entity. + * Possibly because of disabled HW IP. + */ + if (entity->rq == NULL) + return -EINVAL; + + /* Check if we can add this IB to some existing job */ + for (i = 0; i < p->gang_size; ++i) + if (p->entities[i] == entity) + return i; + + /* If not increase the gang size if possible */ + if (i == AMDGPU_CS_GANG_SIZE) + return -EINVAL; + + p->entities[i] = entity; + p->gang_size = i + 1; + return i; +} + +static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_ib *chunk_ib, + unsigned int *num_ibs) +{ + int r; + + r = amdgpu_cs_job_idx(p, chunk_ib); + if (r < 0) + return r; + + ++(num_ibs[r]); + return 0; +} + +static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_fence *data, + uint32_t *offset) { struct drm_gem_object *gobj; struct amdgpu_bo *bo; @@ -80,11 +153,11 @@ error_unref: return r; } -static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, - struct drm_amdgpu_bo_list_in *data) +static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data) { + struct drm_amdgpu_bo_list_entry *info; int r; - struct drm_amdgpu_bo_list_entry *info = NULL; r = amdgpu_bo_create_list_entry_array(data, &info); if (r) @@ -104,38 +177,25 @@ error_free: return r; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) +/* Copy the data from userspace and go over it the first time */ +static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { }; struct amdgpu_vm *vm = &fpriv->vm; uint64_t *chunk_array_user; uint64_t *chunk_array; - unsigned size, num_ibs = 0; uint32_t uf_offset = 0; - int i; + unsigned int size; int ret; + int i; - if (cs->in.num_chunks == 0) - return -EINVAL; - - chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); + chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), + GFP_KERNEL); if (!chunk_array) return -ENOMEM; - p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); - if (!p->ctx) { - ret = -EINVAL; - goto free_chunk; - } - - mutex_lock(&p->ctx->lock); - - /* skip guilty context job */ - if (atomic_read(&p->ctx->guilty) == 1) { - ret = -ECANCELED; - goto free_chunk; - } - /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, @@ -170,7 +230,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs size = p->chunks[i].length_dw; cdata = u64_to_user_ptr(user_chunk.chunk_data); - p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); + p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), + GFP_KERNEL); if (p->chunks[i].kdata == NULL) { ret = -ENOMEM; i--; @@ -182,36 +243,35 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs goto free_partial_kdata; } + /* Assume the worst on the following checks */ + ret = -EINVAL; switch (p->chunks[i].chunk_id) { case AMDGPU_CHUNK_ID_IB: - ++num_ibs; + if (size < sizeof(struct drm_amdgpu_cs_chunk_ib)) + goto free_partial_kdata; + + ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); + if (ret) + goto free_partial_kdata; break; case AMDGPU_CHUNK_ID_FENCE: - size = sizeof(struct drm_amdgpu_cs_chunk_fence); - if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { - ret = -EINVAL; + if (size < sizeof(struct drm_amdgpu_cs_chunk_fence)) goto free_partial_kdata; - } - ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, - &uf_offset); + ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata, + &uf_offset); if (ret) goto free_partial_kdata; - break; case AMDGPU_CHUNK_ID_BO_HANDLES: - size = sizeof(struct drm_amdgpu_bo_list_in); - if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { - ret = -EINVAL; + if (size < sizeof(struct drm_amdgpu_bo_list_in)) goto free_partial_kdata; - } - ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); + ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); if (ret) goto free_partial_kdata; - break; case AMDGPU_CHUNK_ID_DEPENDENCIES: @@ -223,22 +283,32 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs break; default: - ret = -EINVAL; goto free_partial_kdata; } } - ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm); - if (ret) - goto free_all_kdata; + if (!p->gang_size) + return -EINVAL; + + for (i = 0; i < p->gang_size; ++i) { + ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm); + if (ret) + goto free_all_kdata; + + ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i], + &fpriv->vm); + if (ret) + goto free_all_kdata; + } + p->gang_leader = p->jobs[p->gang_size - 1]; - if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { + if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { ret = -ECANCELED; goto free_all_kdata; } if (p->uf_entry.tv.bo) - p->job->uf_addr = uf_offset; + p->gang_leader->uf_addr = uf_offset; kvfree(chunk_array); /* Use this opportunity to fill in task info for the vm */ @@ -260,6 +330,297 @@ free_chunk: return ret; } +static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk, + unsigned int *ce_preempt, + unsigned int *de_preempt) +{ + struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_ring *ring; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + int r; + + r = amdgpu_cs_job_idx(p, chunk_ib); + if (r < 0) + return r; + + job = p->jobs[r]; + ring = amdgpu_job_ring(job); + ib = &job->ibs[job->num_ibs++]; + + /* MM engine doesn't support user fences */ + if (p->uf_entry.tv.bo && ring->funcs->no_user_fence) + return -EINVAL; + + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && + chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { + if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) + (*ce_preempt)++; + else + (*de_preempt)++; + + /* Each GFX command submit allows only 1 IB max + * preemptible for CE & DE */ + if (*ce_preempt > 1 || *de_preempt > 1) + return -EINVAL; + } + + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; + + r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ? + chunk_ib->ib_bytes : 0, + AMDGPU_IB_POOL_DELAYED, ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; + } + + ib->gpu_addr = chunk_ib->va_start; + ib->length_dw = chunk_ib->ib_bytes / 4; + ib->flags = chunk_ib->flags; + return 0; +} + +static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned num_deps; + int i, r; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_ctx *ctx; + struct drm_sched_entity *entity; + struct dma_fence *fence; + + ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); + if (ctx == NULL) + return -EINVAL; + + r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &entity); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + amdgpu_ctx_put(ctx); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + else if (!fence) + continue; + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + struct drm_sched_fence *s_fence; + struct dma_fence *old = fence; + + s_fence = to_drm_sched_fence(fence); + fence = dma_fence_get(&s_fence->scheduled); + dma_fence_put(old); + } + + r = amdgpu_sync_fence(&p->gang_leader->sync, fence); + dma_fence_put(fence); + if (r) + return r; + } + return 0; +} + +static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, + uint32_t handle, u64 point, + u64 flags) +{ + struct dma_fence *fence; + int r; + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); + if (r) { + DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + handle, point, r); + return r; + } + + r = amdgpu_sync_fence(&p->gang_leader->sync, fence); + dma_fence_put(fence); + + return r; +} + +static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; + unsigned num_deps; + int i, r; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; + unsigned num_deps; + int i, r; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; + unsigned num_deps; + int i; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + + for (i = 0; i < num_deps; ++i) { + p->post_deps[i].syncobj = + drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_deps[i].syncobj) + return -EINVAL; + p->post_deps[i].chain = NULL; + p->post_deps[i].point = 0; + p->num_post_deps++; + } + + return 0; +} + +static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; + unsigned num_deps; + int i; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; + + dep->chain = NULL; + if (syncobj_deps[i].point) { + dep->chain = dma_fence_chain_alloc(); + if (!dep->chain) + return -ENOMEM; + } + + dep->syncobj = drm_syncobj_find(p->filp, + syncobj_deps[i].handle); + if (!dep->syncobj) { + dma_fence_chain_free(dep->chain); + return -EINVAL; + } + dep->point = syncobj_deps[i].point; + p->num_post_deps++; + } + + return 0; +} + +static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) +{ + unsigned int ce_preempt = 0, de_preempt = 0; + int i, r; + + for (i = 0; i < p->nchunks; ++i) { + struct amdgpu_cs_chunk *chunk; + + chunk = &p->chunks[i]; + + switch (chunk->chunk_id) { + case AMDGPU_CHUNK_ID_IB: + r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: + r = amdgpu_cs_p2_dependencies(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + r = amdgpu_cs_p2_syncobj_in(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + r = amdgpu_cs_p2_syncobj_out(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk); + if (r) + return r; + break; + } + } + + return 0; +} + /* Convert microseconds to bytes. */ static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) { @@ -495,9 +856,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - struct amdgpu_bo *gds; - struct amdgpu_bo *gws; - struct amdgpu_bo *oa; + unsigned int i; int r; INIT_LIST_HEAD(&p->validated); @@ -581,16 +940,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->bo_va = amdgpu_vm_bo_find(vm, bo); } - /* Move fence waiting after getting reservation lock of - * PD root. Then there is no need on a ctx mutex lock. - */ - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - goto error_validate; - } - amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; @@ -611,197 +960,139 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (r) goto error_validate; - amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, - p->bytes_moved_vis); + if (p->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); - gds = p->bo_list->gds_obj; - gws = p->bo_list->gws_obj; - oa = p->bo_list->oa_obj; + r = amdgpu_ttm_alloc_gart(&uf->tbo); + if (r) + goto error_validate; - if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; - p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; - } - if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; - p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; - } - if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; - p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; + p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(uf); } - if (!r && p->uf_entry.tv.bo) { - struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); + amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, + p->bytes_moved_vis); - r = amdgpu_ttm_alloc_gart(&uf->tbo); - p->job->uf_addr += amdgpu_bo_gpu_offset(uf); - } + for (i = 0; i < p->gang_size; ++i) + amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj, + p->bo_list->gws_obj, + p->bo_list->oa_obj); + return 0; error_validate: - if (r) - ttm_eu_backoff_reservation(&p->ticket, &p->validated); + ttm_eu_backoff_reservation(&p->ticket, &p->validated); out_free_user_pages: - if (r) { - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (!e->user_pages) - continue; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - kvfree(e->user_pages); - e->user_pages = NULL; - } - mutex_unlock(&p->bo_list->bo_list_mutex); + if (!e->user_pages) + continue; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + kvfree(e->user_pages); + e->user_pages = NULL; } return r; } -static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) +static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p) { - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_bo_list_entry *e; - int r; + int i, j; - list_for_each_entry(e, &p->validated, tv.head) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - struct dma_resv *resv = bo->tbo.base.resv; - enum amdgpu_sync_mode sync_mode; + if (!trace_amdgpu_cs_enabled()) + return; - sync_mode = amdgpu_bo_explicit_sync(bo) ? - AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, - &fpriv->vm); - if (r) - return r; + for (i = 0; i < p->gang_size; ++i) { + struct amdgpu_job *job = p->jobs[i]; + + for (j = 0; j < job->num_ibs; ++j) + trace_amdgpu_cs(p, job, &job->ibs[j]); } - return 0; } -/** - * amdgpu_cs_parser_fini() - clean parser states - * @parser: parser structure holding parsing context. - * @error: error number - * @backoff: indicator to backoff the reservation - * - * If error is set then unvalidate buffer, otherwise just free memory - * used by parsing context. - **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, - bool backoff) +static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { - unsigned i; + struct amdgpu_ring *ring = amdgpu_job_ring(job); + unsigned int i; + int r; - if (error && backoff) { - ttm_eu_backoff_reservation(&parser->ticket, - &parser->validated); - mutex_unlock(&parser->bo_list->bo_list_mutex); - } + /* Only for UVD/VCE VM emulation */ + if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place) + return 0; - for (i = 0; i < parser->num_post_deps; i++) { - drm_syncobj_put(parser->post_deps[i].syncobj); - kfree(parser->post_deps[i].chain); - } - kfree(parser->post_deps); + for (i = 0; i < job->num_ibs; ++i) { + struct amdgpu_ib *ib = &job->ibs[i]; + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj; + uint64_t va_start; + uint8_t *kptr; - dma_fence_put(parser->fence); + va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } - if (parser->ctx) { - mutex_unlock(&parser->ctx->lock); - amdgpu_ctx_put(parser->ctx); + if ((va_start + ib->length_dw * 4) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); + + if (ring->funcs->parse_cs) { + memcpy(ib->ptr, kptr, ib->length_dw * 4); + amdgpu_bo_kunmap(aobj); + + r = amdgpu_ring_parse_cs(ring, p, job, ib); + if (r) + return r; + } else { + ib->ptr = (uint32_t *)kptr; + r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); + amdgpu_bo_kunmap(aobj); + if (r) + return r; + } } - if (parser->bo_list) - amdgpu_bo_list_put(parser->bo_list); - for (i = 0; i < parser->nchunks; i++) - kvfree(parser->chunks[i].kdata); - kvfree(parser->chunks); - if (parser->job) - amdgpu_job_free(parser->job); - if (parser->uf_entry.tv.bo) { - struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); + return 0; +} - amdgpu_bo_unref(&uf); +static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p) +{ + unsigned int i; + int r; + + for (i = 0; i < p->gang_size; ++i) { + r = amdgpu_cs_patch_ibs(p, p->jobs[i]); + if (r) + return r; } + return 0; } static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_job *job = p->gang_leader; struct amdgpu_device *adev = p->adev; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct amdgpu_bo_va *bo_va; struct amdgpu_bo *bo; + unsigned int i; int r; - /* Only for UVD/VCE VM emulation */ - if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) { - unsigned i, j; - - for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - struct amdgpu_cs_chunk *chunk; - uint64_t offset, va_start; - struct amdgpu_ib *ib; - uint8_t *kptr; - - chunk = &p->chunks[i]; - ib = &p->job->ibs[j]; - chunk_ib = chunk->kdata; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; - - va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK; - r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += va_start - offset; - - if (ring->funcs->parse_cs) { - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - - r = amdgpu_ring_parse_cs(ring, p, p->job, ib); - if (r) - return r; - } else { - ib->ptr = (uint32_t *)kptr; - r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib); - amdgpu_bo_kunmap(aobj); - if (r) - return r; - } - - j++; - } - } - - if (!p->job->vm) - return amdgpu_cs_sync_rings(p); - - r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; @@ -810,18 +1101,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update); if (r) return r; - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (fpriv->csa_va) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update); if (r) return r; } @@ -837,16 +1128,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) { - mutex_unlock(&p->bo_list->bo_list_mutex); + if (r) return r; - } - r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) { - mutex_unlock(&p->bo_list->bo_list_mutex); + r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update); + if (r) return r; - } } r = amdgpu_vm_handle_moved(adev, vm); @@ -857,11 +1144,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->job->sync, vm->last_update); + r = amdgpu_sync_fence(&job->sync, vm->last_update); if (r) return r; - p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); + for (i = 0; i < p->gang_size; ++i) { + job = p->jobs[i]; + + if (!job->vm) + continue; + + job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); + } if (amdgpu_vm_debug) { /* Invalidate all BOs to test for userspace bugs */ @@ -876,331 +1170,40 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } } - return amdgpu_cs_sync_rings(p); -} - -static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, - struct amdgpu_cs_parser *parser) -{ - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - int r, ce_preempt = 0, de_preempt = 0; - struct amdgpu_ring *ring; - int i, j; - - for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { - struct amdgpu_cs_chunk *chunk; - struct amdgpu_ib *ib; - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct drm_sched_entity *entity; - - chunk = &parser->chunks[i]; - ib = &parser->job->ibs[j]; - chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; - - if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && - (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { - if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) - ce_preempt++; - else - de_preempt++; - } - - /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ - if (ce_preempt > 1 || de_preempt > 1) - return -EINVAL; - } - - r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, - &entity); - if (r) - return r; - - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) - parser->job->preamble_status |= - AMDGPU_PREAMBLE_IB_PRESENT; - - if (parser->entity && parser->entity != entity) - return -EINVAL; - - /* Return if there is no run queue associated with this entity. - * Possibly because of disabled HW IP*/ - if (entity->rq == NULL) - return -EINVAL; - - parser->entity = entity; - - ring = to_amdgpu_ring(entity->rq->sched); - r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? - chunk_ib->ib_bytes : 0, - AMDGPU_IB_POOL_DELAYED, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - ib->gpu_addr = chunk_ib->va_start; - ib->length_dw = chunk_ib->ib_bytes / 4; - ib->flags = chunk_ib->flags; - - j++; - } - - /* MM engine doesn't support user fences */ - ring = to_amdgpu_ring(parser->entity->rq->sched); - if (parser->job->uf_addr && ring->funcs->no_user_fence) - return -EINVAL; - return 0; } -static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) +static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned num_deps; - int i, r; - struct drm_amdgpu_cs_chunk_dep *deps; - - deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_dep); - - for (i = 0; i < num_deps; ++i) { - struct amdgpu_ctx *ctx; - struct drm_sched_entity *entity; - struct dma_fence *fence; - - ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); - if (ctx == NULL) - return -EINVAL; - - r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, - deps[i].ip_instance, - deps[i].ring, &entity); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } - - fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); - amdgpu_ctx_put(ctx); - - if (IS_ERR(fence)) - return PTR_ERR(fence); - else if (!fence) - continue; - - if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { - struct drm_sched_fence *s_fence; - struct dma_fence *old = fence; - - s_fence = to_drm_sched_fence(fence); - fence = dma_fence_get(&s_fence->scheduled); - dma_fence_put(old); - } - - r = amdgpu_sync_fence(&p->job->sync, fence); - dma_fence_put(fence); - if (r) - return r; - } - return 0; -} - -static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, - uint32_t handle, u64 point, - u64 flags) -{ - struct dma_fence *fence; + struct amdgpu_job *leader = p->gang_leader; + struct amdgpu_bo_list_entry *e; + unsigned int i; int r; - r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); - if (r) { - DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", - handle, point, r); - return r; - } - - r = amdgpu_sync_fence(&p->job->sync, fence); - dma_fence_put(fence); - - return r; -} - -static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_sem *deps; - unsigned num_deps; - int i, r; + list_for_each_entry(e, &p->validated, tv.head) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct dma_resv *resv = bo->tbo.base.resv; + enum amdgpu_sync_mode sync_mode; - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_sem); - for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, - 0, 0); + sync_mode = amdgpu_bo_explicit_sync(bo) ? + AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; + r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode, + &fpriv->vm); if (r) return r; } - return 0; -} - - -static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; - unsigned num_deps; - int i, r; - - syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_syncobj); - for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, - syncobj_deps[i].handle, - syncobj_deps[i].point, - syncobj_deps[i].flags); + for (i = 0; i < p->gang_size - 1; ++i) { + r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync); if (r) return r; } - return 0; -} - -static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_sem *deps; - unsigned num_deps; - int i; - - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_sem); - - if (p->post_deps) - return -EINVAL; - - p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), - GFP_KERNEL); - p->num_post_deps = 0; - - if (!p->post_deps) - return -ENOMEM; - - - for (i = 0; i < num_deps; ++i) { - p->post_deps[i].syncobj = - drm_syncobj_find(p->filp, deps[i].handle); - if (!p->post_deps[i].syncobj) - return -EINVAL; - p->post_deps[i].chain = NULL; - p->post_deps[i].point = 0; - p->num_post_deps++; - } - - return 0; -} - - -static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; - unsigned num_deps; - int i; - - syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_syncobj); - - if (p->post_deps) - return -EINVAL; - - p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), - GFP_KERNEL); - p->num_post_deps = 0; - - if (!p->post_deps) - return -ENOMEM; - - for (i = 0; i < num_deps; ++i) { - struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; - - dep->chain = NULL; - if (syncobj_deps[i].point) { - dep->chain = dma_fence_chain_alloc(); - if (!dep->chain) - return -ENOMEM; - } - - dep->syncobj = drm_syncobj_find(p->filp, - syncobj_deps[i].handle); - if (!dep->syncobj) { - dma_fence_chain_free(dep->chain); - return -EINVAL; - } - dep->point = syncobj_deps[i].point; - p->num_post_deps++; - } - - return 0; -} + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]); + if (r && r != -ERESTARTSYS) + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); -static int amdgpu_cs_dependencies(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) -{ - int i, r; - - /* TODO: Investigate why we still need the context lock */ - mutex_unlock(&p->ctx->lock); - - for (i = 0; i < p->nchunks; ++i) { - struct amdgpu_cs_chunk *chunk; - - chunk = &p->chunks[i]; - - switch (chunk->chunk_id) { - case AMDGPU_CHUNK_ID_DEPENDENCIES: - case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: - r = amdgpu_cs_process_fence_dep(p, chunk); - if (r) - goto out; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_IN: - r = amdgpu_cs_process_syncobj_in_dep(p, chunk); - if (r) - goto out; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: - r = amdgpu_cs_process_syncobj_out_dep(p, chunk); - if (r) - goto out; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: - r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); - if (r) - goto out; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: - r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); - if (r) - goto out; - break; - } - } - -out: - mutex_lock(&p->ctx->lock); return r; } @@ -1225,20 +1228,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct drm_sched_entity *entity = p->entity; + struct amdgpu_job *leader = p->gang_leader; struct amdgpu_bo_list_entry *e; - struct amdgpu_job *job; + unsigned int i; uint64_t seq; int r; - job = p->job; - p->job = NULL; + for (i = 0; i < p->gang_size; ++i) + drm_sched_job_arm(&p->jobs[i]->base); - r = drm_sched_job_init(&job->base, entity, &fpriv->vm); - if (r) - goto error_unlock; + for (i = 0; i < (p->gang_size - 1); ++i) { + struct dma_fence *fence; - drm_sched_job_arm(&job->base); + fence = &p->jobs[i]->base.s_fence->scheduled; + r = amdgpu_sync_fence(&leader->sync, fence); + if (r) + goto error_cleanup; + } + + if (p->gang_size > 1) { + for (i = 0; i < p->gang_size; ++i) + amdgpu_job_set_gang_leader(p->jobs[i], leader); + } /* No memory allocation is allowed while holding the notifier lock. * The lock is held until amdgpu_cs_submit is finished and fence is @@ -1249,6 +1260,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, /* If userptr are invalidated after amdgpu_cs_parser_bos(), return * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. */ + r = 0; amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -1256,67 +1268,96 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } if (r) { r = -EAGAIN; - goto error_abort; + goto error_unlock; } - p->fence = dma_fence_get(&job->base.s_fence->finished); + p->fence = dma_fence_get(&leader->base.s_fence->finished); + list_for_each_entry(e, &p->validated, tv.head) { + + /* Everybody except for the gang leader uses READ */ + for (i = 0; i < (p->gang_size - 1); ++i) { + dma_resv_add_fence(e->tv.bo->base.resv, + &p->jobs[i]->base.s_fence->finished, + DMA_RESV_USAGE_READ); + } - seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence); + /* The gang leader is remembered as writer */ + e->tv.num_shared = 0; + } + + seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1], + p->fence); amdgpu_cs_post_dependencies(p); - if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && + if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && !p->ctx->preamble_presented) { - job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; + leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; p->ctx->preamble_presented = true; } cs->out.handle = seq; - job->uf_sequence = seq; - - amdgpu_job_free_resources(job); + leader->uf_sequence = seq; - trace_amdgpu_cs_ioctl(job); amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); - drm_sched_entity_push_job(&job->base); + for (i = 0; i < p->gang_size; ++i) { + amdgpu_job_free_resources(p->jobs[i]); + trace_amdgpu_cs_ioctl(p->jobs[i]); + drm_sched_entity_push_job(&p->jobs[i]->base); + p->jobs[i] = NULL; + } amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); - - /* Make sure all BOs are remembered as writers */ - amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->tv.num_shared = 0; - ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); + mutex_unlock(&p->adev->notifier_lock); mutex_unlock(&p->bo_list->bo_list_mutex); - return 0; -error_abort: - drm_sched_job_cleanup(&job->base); +error_unlock: mutex_unlock(&p->adev->notifier_lock); -error_unlock: - amdgpu_job_free(job); +error_cleanup: + for (i = 0; i < p->gang_size; ++i) + drm_sched_job_cleanup(&p->jobs[i]->base); return r; } -static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser) +/* Cleanup the parser structure */ +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) { - int i; + unsigned i; - if (!trace_amdgpu_cs_enabled()) - return; + for (i = 0; i < parser->num_post_deps; i++) { + drm_syncobj_put(parser->post_deps[i].syncobj); + kfree(parser->post_deps[i].chain); + } + kfree(parser->post_deps); + + dma_fence_put(parser->fence); + + if (parser->ctx) + amdgpu_ctx_put(parser->ctx); + if (parser->bo_list) + amdgpu_bo_list_put(parser->bo_list); + + for (i = 0; i < parser->nchunks; i++) + kvfree(parser->chunks[i].kdata); + kvfree(parser->chunks); + for (i = 0; i < parser->gang_size; ++i) { + if (parser->jobs[i]) + amdgpu_job_free(parser->jobs[i]); + } + if (parser->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); - for (i = 0; i < parser->job->num_ibs; i++) - trace_amdgpu_cs(parser, i); + amdgpu_bo_unref(&uf); + } } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = drm_to_adev(dev); - union drm_amdgpu_cs *cs = data; - struct amdgpu_cs_parser parser = {}; - bool reserved_buffers = false; + struct amdgpu_cs_parser parser; int r; if (amdgpu_ras_intr_triggered()) @@ -1325,25 +1366,20 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; - parser.adev = adev; - parser.filp = filp; - - r = amdgpu_cs_parser_init(&parser, data); + r = amdgpu_cs_parser_init(&parser, adev, filp, data); if (r) { if (printk_ratelimit()) DRM_ERROR("Failed to initialize parser %d!\n", r); - goto out; + return r; } - r = amdgpu_cs_ib_fill(adev, &parser); + r = amdgpu_cs_pass1(&parser, data); if (r) - goto out; + goto error_fini; - r = amdgpu_cs_dependencies(adev, &parser); - if (r) { - DRM_ERROR("Failed in the dependencies handling %d!\n", r); - goto out; - } + r = amdgpu_cs_pass2(&parser); + if (r) + goto error_fini; r = amdgpu_cs_parser_bos(&parser, data); if (r) { @@ -1351,22 +1387,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) DRM_ERROR("Failed to process the buffer list %d!\n", r); - goto out; + goto error_fini; } - reserved_buffers = true; + r = amdgpu_cs_patch_jobs(&parser); + if (r) + goto error_backoff; + + r = amdgpu_cs_vm_handling(&parser); + if (r) + goto error_backoff; + + r = amdgpu_cs_sync_rings(&parser); + if (r) + goto error_backoff; trace_amdgpu_cs_ibs(&parser); - r = amdgpu_cs_vm_handling(&parser); + r = amdgpu_cs_submit(&parser, data); if (r) - goto out; + goto error_backoff; - r = amdgpu_cs_submit(&parser, cs); + amdgpu_cs_parser_fini(&parser); + return 0; -out: - amdgpu_cs_parser_fini(&parser, r, reserved_buffers); +error_backoff: + ttm_eu_backoff_reservation(&parser.ticket, &parser.validated); + mutex_unlock(&parser.bo_list->bo_list_mutex); +error_fini: + amdgpu_cs_parser_fini(&parser); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index 30ecc4917f81..cbaa19b2b8a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -27,6 +27,8 @@ #include "amdgpu_bo_list.h" #include "amdgpu_ring.h" +#define AMDGPU_CS_GANG_SIZE 4 + struct amdgpu_bo_va_mapping; struct amdgpu_cs_chunk { @@ -50,9 +52,11 @@ struct amdgpu_cs_parser { unsigned nchunks; struct amdgpu_cs_chunk *chunks; - /* scheduler job object */ - struct amdgpu_job *job; - struct drm_sched_entity *entity; + /* scheduler job objects */ + unsigned int gang_size; + struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE]; + struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE]; + struct amdgpu_job *gang_leader; /* buffer objects */ struct ww_acquire_ctx ticket; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 8ee4e8491f39..f6d9d5da53cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -315,7 +315,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, kref_init(&ctx->refcount); ctx->mgr = mgr; spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; @@ -402,12 +401,11 @@ static void amdgpu_ctx_fini(struct kref *ref) } } - if (drm_dev_enter(&adev->ddev, &idx)) { + if (drm_dev_enter(adev_to_drm(adev), &idx)) { amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); drm_dev_exit(idx); } - mutex_destroy(&ctx->lock); kfree(ctx); } @@ -848,7 +846,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, mgr->adev = adev; mutex_init(&mgr->lock); - idr_init(&mgr->ctx_handles); + idr_init_base(&mgr->ctx_handles, 1); for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) atomic64_set(&mgr->time_spend[i], 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index cc7c8afff414..0fa0e56daf67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -53,7 +53,6 @@ struct amdgpu_ctx { bool preamble_presented; int32_t init_priority; int32_t override_priority; - struct mutex lock; atomic_t guilty; unsigned long ras_counter_ce; unsigned long ras_counter_ue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index e2eec985adb3..6066aebf491c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1043,6 +1043,157 @@ err: } /** + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * Read the last residency value logged. It doesn't auto update, one needs to + * stop logging before getting the current value. + */ +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + uint32_t value; + + r = amdgpu_get_gfx_off_residency(adev, &value); + if (r) + goto out; + + r = put_user(value, (uint32_t *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +/** + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop + */ +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; + + r = get_user(value, (uint32_t *)buf); + if (r) + goto out; + + amdgpu_set_gfx_off_residency(adev, value ? true : false); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + + +/** + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + */ +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u64 value = 0; + + r = amdgpu_get_gfx_off_entrycount(adev, &value); + if (r) + goto out; + + r = put_user(value, (u64 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +/** * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF * * @f: open file handle @@ -1249,6 +1400,19 @@ static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_count_read, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_residency_read, + .write = amdgpu_debugfs_gfxoff_residency_write, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1261,6 +1425,8 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, &amdgpu_debugfs_gfxoff_status_fops, + &amdgpu_debugfs_gfxoff_count_fops, + &amdgpu_debugfs_gfxoff_residency_fops, }; static const char *debugfs_regs_names[] = { @@ -1275,6 +1441,8 @@ static const char *debugfs_regs_names[] = { "amdgpu_gpr", "amdgpu_gfxoff", "amdgpu_gfxoff_status", + "amdgpu_gfxoff_count", + "amdgpu_gfxoff_residency", }; /** @@ -1705,7 +1873,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; char reg_offset[11]; - uint32_t *new, *tmp = NULL; + uint32_t *new = NULL, *tmp = NULL; int ret, i = 0, len = 0; do { @@ -1747,7 +1915,8 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, ret = size; error_free: - kfree(tmp); + if (tmp != new) + kfree(tmp); kfree(new); return ret; } @@ -1785,6 +1954,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) return PTR_ERR(ent); } + debugfs_create_u32("amdgpu_reset_level", 0600, root, &adev->amdgpu_reset_level_mask); + /* Register debugfs entries for amdgpu_ttm */ amdgpu_ttm_debugfs_init(adev); amdgpu_debugfs_pm_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c4a6fe3070b6..ab8f970b2849 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2365,8 +2365,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } adev->ip_blocks[i].status.sw = true; - /* need to do gmc hw init early so we can allocate gpu mem */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { + /* need to do common hw init early so everything is set up for gmc */ + r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); + if (r) { + DRM_ERROR("hw_init %d failed %d\n", i, r); + goto init_failed; + } + adev->ip_blocks[i].status.hw = true; + } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + /* need to do gmc hw init early so we can allocate gpu mem */ /* Try to reserve bad pages early */ if (amdgpu_sriov_vf(adev)) amdgpu_virt_exchange_data(adev); @@ -2451,17 +2459,21 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) */ if (adev->gmc.xgmi.num_physical_nodes > 1) { if (amdgpu_xgmi_add_device(adev) == 0) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + if (!amdgpu_sriov_vf(adev)) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); - if (!hive->reset_domain || - !amdgpu_reset_get_reset_domain(hive->reset_domain)) { - r = -ENOENT; - goto init_failed; - } + if (!hive->reset_domain || + !amdgpu_reset_get_reset_domain(hive->reset_domain)) { + r = -ENOENT; + amdgpu_put_xgmi_hive(hive); + goto init_failed; + } - /* Drop the early temporary reset domain we created for device */ - amdgpu_reset_put_reset_domain(adev->reset_domain); - adev->reset_domain = hive->reset_domain; + /* Drop the early temporary reset domain we created for device */ + amdgpu_reset_put_reset_domain(adev->reset_domain); + adev->reset_domain = hive->reset_domain; + amdgpu_put_xgmi_hive(hive); + } } } @@ -3050,8 +3062,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) int i, r; static enum amd_ip_block_type ip_order[] = { - AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_COMMON, + AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_IH, }; @@ -3142,7 +3154,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || + (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { @@ -3499,6 +3512,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->gmc.gart_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; + RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub()); adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; @@ -3577,6 +3591,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); adev->gfx.gfx_off_req_count = 1; + adev->gfx.gfx_off_residency = 0; + adev->gfx.gfx_off_entrycount = 0; adev->pm.ac_power = power_supply_is_system_supplied() > 0; atomic_set(&adev->throttling_logging_enabled, 1); @@ -3965,8 +3981,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_gart_dummy_page_fini(adev); - if (drm_dev_is_unplugged(adev_to_drm(adev))) - amdgpu_device_unmap_mmio(adev); + amdgpu_device_unmap_mmio(adev); } @@ -3979,6 +3994,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) release_firmware(adev->firmware.gpu_info_fw); adev->firmware.gpu_info_fw = NULL; adev->accel_working = false; + dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); amdgpu_reset_fini(adev); @@ -4054,12 +4070,20 @@ static void amdgpu_device_evict_resources(struct amdgpu_device *adev) int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev = drm_to_adev(dev); + int r = 0; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; adev->in_suspend = true; + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_fini_data_exchange(adev); + r = amdgpu_virt_request_full_gpu(adev, false); + if (r) + return r; + } + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) DRM_WARN("smart shift update failed\n"); @@ -4083,6 +4107,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_device_ip_suspend_phase2(adev); + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_release_full_gpu(adev, false); + return 0; } @@ -4101,6 +4128,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) struct amdgpu_device *adev = drm_to_adev(dev); int r = 0; + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_request_full_gpu(adev, true); + if (r) + return r; + } + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -4115,6 +4148,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } r = amdgpu_device_ip_resume(adev); + + /* no matter what r is, always need to properly release full GPU */ + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_data_exchange(adev); + amdgpu_virt_release_full_gpu(adev, true); + } + if (r) { dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); return r; @@ -4413,8 +4453,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, retry: amdgpu_amdkfd_pre_reset(adev); - amdgpu_amdkfd_pre_reset(adev); - if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else @@ -4509,14 +4547,15 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) { - if (!amdgpu_device_ip_check_soft_reset(adev)) { - dev_info(adev->dev, "Timeout, but no hardware hang detected.\n"); - return false; - } if (amdgpu_gpu_recovery == 0) goto disabled; + if (!amdgpu_device_ip_check_soft_reset(adev)) { + dev_info(adev->dev,"Timeout, but no hardware hang detected.\n"); + return false; + } + if (amdgpu_sriov_vf(adev)) return true; @@ -4641,7 +4680,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (!need_full_reset) need_full_reset = amdgpu_device_ip_need_full_reset(adev); - if (!need_full_reset) { + if (!need_full_reset && amdgpu_gpu_recovery) { amdgpu_device_ip_pre_soft_reset(adev); r = amdgpu_device_ip_soft_reset(adev); amdgpu_device_ip_post_soft_reset(adev); @@ -4737,11 +4776,14 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; + bool gpu_reset_for_dev_remove = 0; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); amdgpu_reset_reg_dumps(tmp_adev); + + reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ if (r == -ENOSYS) @@ -4754,6 +4796,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); + gpu_reset_for_dev_remove = + test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && + test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + /* * ASIC reset has to be done on all XGMI hive nodes ASAP * to allow proper links negotiation in FW (within 1 sec) @@ -4798,6 +4844,18 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, amdgpu_ras_intr_cleared(); } + /* Since the mode1 reset affects base ip blocks, the + * phase1 ip blocks need to be resumed. Otherwise there + * will be a BIOS signature error and the psp bootloader + * can't load kdb on the next amdgpu install. + */ + if (gpu_reset_for_dev_remove) { + list_for_each_entry(tmp_adev, device_list_handle, reset_list) + amdgpu_device_ip_resume_phase1(tmp_adev); + + goto end; + } + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { if (need_full_reset) { /* post card */ @@ -5037,6 +5095,7 @@ static void amdgpu_device_recheck_guilty_jobs( /* set guilty */ drm_sched_increase_karma(s_job); + amdgpu_reset_prepare_hwcontext(adev, reset_context); retry: /* do hw reset */ if (amdgpu_sriov_vf(adev)) { @@ -5119,6 +5178,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, bool need_emergency_restart = false; bool audio_suspended = false; int tmp_vram_lost_counter; + bool gpu_reset_for_dev_remove = false; + + gpu_reset_for_dev_remove = + test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && + test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); /* * Special case: RAS triggered and full reset isn't supported @@ -5146,6 +5210,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->job = job; reset_context->hive = hive; + /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list @@ -5153,8 +5218,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ INIT_LIST_HEAD(&device_list); if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { list_add_tail(&tmp_adev->reset_list, &device_list); + if (gpu_reset_for_dev_remove && adev->shutdown) + tmp_adev->shutdown = true; + } if (!list_is_first(&adev->reset_list, &device_list)) list_rotate_to_front(&adev->reset_list, &device_list); device_list_handle = &device_list; @@ -5237,6 +5305,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + if (gpu_reset_for_dev_remove) { + /* Workaroud for ASICs need to disable SMC first */ + amdgpu_device_smu_fini_early(tmp_adev); + } r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); /*TODO Should we stop ?*/ if (r) { @@ -5265,8 +5337,14 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, reset_context); - if (r && r == -EAGAIN) + if (r && r == -EAGAIN) { + set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags); + adev->asic_reset_res = 0; goto retry; + } + + if (!r && gpu_reset_for_dev_remove) + goto recover_end; } skip_hw_reset: @@ -5340,6 +5418,7 @@ skip_sched_resume: amdgpu_device_unset_mp1_state(tmp_adev); } +recover_end: tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); @@ -5522,8 +5601,9 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size - 1; - bool p2p_access = !(pci_p2pdma_distance_many(adev->pdev, - &peer_adev->dev, 1, true) < 0); + bool p2p_access = + !adev->gmc.xgmi.connected_to_cpu && + !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && adev->gmc.real_vram_size == adev->gmc.visible_vram_size && @@ -5697,6 +5777,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(adev, &reset_context); @@ -5906,3 +5987,36 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, (void)RREG32(data); spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } + +/** + * amdgpu_device_switch_gang - switch to a new gang + * @adev: amdgpu_device pointer + * @gang: the gang to switch to + * + * Try to switch to a new gang. + * Returns: NULL if we switched to the new gang or a reference to the current + * gang leader. + */ +struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, + struct dma_fence *gang) +{ + struct dma_fence *old = NULL; + + do { + dma_fence_put(old); + rcu_read_lock(); + old = dma_fence_get_rcu_safe(&adev->gang_submit); + rcu_read_unlock(); + + if (old == gang) + break; + + if (!dma_fence_is_signaled(old)) + return old; + + } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, + old, gang) != old); + + dma_fence_put(old); + return NULL; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 95d34590cad1..3993e6134914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -229,7 +229,7 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, ui return r; } - memcpy((u8 *)binary, (u8 *)fw->data, adev->mman.discovery_tmr_size); + memcpy((u8 *)binary, (u8 *)fw->data, fw->size); release_firmware(fw); return 0; @@ -1506,6 +1506,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; default: @@ -1549,6 +1550,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; default: @@ -1633,6 +1635,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): + case IP_VERSION(13, 0, 10): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): @@ -1682,6 +1685,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): + case IP_VERSION(13, 0, 10): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; default: @@ -1780,6 +1784,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; default: @@ -1823,6 +1828,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): + case IP_VERSION(6, 0, 3): amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block); break; default: @@ -1903,7 +1909,8 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 0, 2): case IP_VERSION(4, 0, 4): amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); break; default: dev_err(adev->dev, @@ -1940,6 +1947,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2165,6 +2173,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): adev->family = AMDGPU_FAMILY_GC_11_0_0; break; case IP_VERSION(11, 0, 1): @@ -2234,7 +2243,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): - adev->nbio.funcs = &nbio_v4_3_funcs; + if (amdgpu_sriov_vf(adev)) + adev->nbio.funcs = &nbio_v4_3_sriov_funcs; + else + adev->nbio.funcs = &nbio_v4_3_funcs; adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg; break; case IP_VERSION(7, 7, 0): @@ -2332,6 +2344,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): + case IP_VERSION(6, 0, 3): adev->lsdma.funcs = &lsdma_v6_0_funcs; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c20922a5af9f..1a06b8d724f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1101,6 +1101,7 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, goto err; ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); + if (ret) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 782cbca37538..7bd8e33b14be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -58,7 +58,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); int r; - if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0) + if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 429fcdf28836..3c9fecdd6b2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -38,6 +38,8 @@ #include <linux/mmu_notifier.h> #include <linux/suspend.h> #include <linux/cc_platform.h> +#include <linux/fb.h> +#include <linux/dynamic_debug.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -102,9 +104,10 @@ * - 3.46.0 - To enable hot plug amdgpu tests in libdrm * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags * - 3.48.0 - Add IP discovery version info to HW INFO + * 3.49.0 - Add gang submit into CS IOCTL */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 48 +#define KMS_DRIVER_MINOR 49 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -185,6 +188,18 @@ int amdgpu_vcnfw_log; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); +DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, + "DRM_UT_CORE", + "DRM_UT_DRIVER", + "DRM_UT_KMS", + "DRM_UT_PRIME", + "DRM_UT_ATOMIC", + "DRM_UT_VBL", + "DRM_UT_STATE", + "DRM_UT_LEASE", + "DRM_UT_DP", + "DRM_UT_DRMRES"); + struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), .delayed_reset_work = __DELAYED_WORK_INITIALIZER( @@ -2181,15 +2196,46 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - drm_dev_unplug(dev); - if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)) { + bool need_to_reset_gpu = false; + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + struct amdgpu_hive_info *hive; + + hive = amdgpu_get_xgmi_hive(adev); + if (hive->device_remove_count == 0) + need_to_reset_gpu = true; + hive->device_remove_count++; + amdgpu_put_xgmi_hive(hive); + } else { + need_to_reset_gpu = true; + } + + /* Workaround for ASICs need to reset SMU. + * Called only when the first device is removed. + */ + if (need_to_reset_gpu) { + struct amdgpu_reset_context reset_context; + + adev->shutdown = true; + memset(&reset_context, 0, sizeof(reset_context)); + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags); + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } + } + amdgpu_driver_unload_kms(dev); + drm_dev_unplug(dev); + /* * Flush any in flight DMA operations from device. * Clear the Bus Master Enable bit and then wait on the PCIe Device @@ -2563,8 +2609,11 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); - if (ret) + if (ret) { + if (amdgpu_device_supports_px(drm_dev)) + pci_disable_device(pdev); return ret; + } if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8adeb7469f1e..d0d99ed607dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -400,7 +400,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) /* We are not protected by ring lock when reading the last sequence * but it's ok to report slightly wrong fence count here. */ - amdgpu_fence_process(ring); emitted = 0x100000000ull; emitted -= atomic_read(&ring->fence_drv.last_seq); emitted += READ_ONCE(ring->fence_drv.sync_seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index ecada5eadfe3..e325150879df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -66,10 +66,15 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) return true; case CHIP_SIENNA_CICHLID: if (strnstr(atom_ctx->vbios_version, "D603", + sizeof(atom_ctx->vbios_version))) { + if (strnstr(atom_ctx->vbios_version, "D603GLXE", sizeof(atom_ctx->vbios_version))) - return true; - else + return false; + else + return true; + } else { return false; + } default: return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 222d3d7ea076..9546adc8a76f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -23,6 +23,7 @@ * */ +#include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" @@ -477,7 +478,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], RESET_QUEUES, 0, 0); - if (adev->gfx.kiq.ring.sched.ready) + if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&adev->gfx.kiq.ring_lock); @@ -610,6 +611,45 @@ unlock: mutex_unlock(&adev->gfx.gfx_off_mutex); } +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_set_residency_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_get_residency_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) { @@ -826,3 +866,142 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) } return amdgpu_num_kcq; } + +void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, + uint32_t ucode_id) +{ + const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; + struct amdgpu_firmware_info *info = NULL; + const struct firmware *ucode_fw; + unsigned int fw_size; + + switch (ucode_id) { + case AMDGPU_UCODE_ID_CP_PFP: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.pfp_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.pfp_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_PFP: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = + le32_to_cpu(cp_hdr_v2_0->header.ucode_version); + adev->gfx.pfp_feature_version = + le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); + ucode_fw = adev->gfx.pfp_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK: + case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.pfp_fw->data; + ucode_fw = adev->gfx.pfp_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_ME: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.me_fw->data; + adev->gfx.me_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.me_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.me_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_ME: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.me_fw->data; + adev->gfx.me_fw_version = + le32_to_cpu(cp_hdr_v2_0->header.ucode_version); + adev->gfx.me_feature_version = + le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); + ucode_fw = adev->gfx.me_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK: + case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.me_fw->data; + ucode_fw = adev->gfx.me_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_CE: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.ce_fw->data; + adev->gfx.ce_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.ce_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.ce_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_MEC1: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.mec_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4; + break; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec_fw->data; + ucode_fw = adev->gfx.mec_fw; + fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; + break; + case AMDGPU_UCODE_ID_CP_MEC2: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.mec2_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4; + break; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + ucode_fw = adev->gfx.mec2_fw; + fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = + le32_to_cpu(cp_hdr_v2_0->header.ucode_version); + adev->gfx.mec_feature_version = + le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); + ucode_fw = adev->gfx.mec_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: + case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: + case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: + case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + ucode_fw = adev->gfx.mec_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); + break; + default: + break; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[ucode_id]; + info->ucode_id = ucode_id; + info->fw = ucode_fw; + adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 23a696d38390..832b3807f1d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -304,6 +304,10 @@ struct amdgpu_gfx { uint32_t rlc_srlg_feature_version; uint32_t rlc_srls_fw_version; uint32_t rlc_srls_feature_version; + uint32_t rlcp_ucode_version; + uint32_t rlcp_ucode_feature_version; + uint32_t rlcv_ucode_version; + uint32_t rlcv_ucode_feature_version; uint32_t mec_feature_version; uint32_t mec2_feature_version; bool mec_fw_write_wait; @@ -332,10 +336,12 @@ struct amdgpu_gfx { uint32_t srbm_soft_reset; /* gfx off */ - bool gfx_off_state; /* true: enabled, false: disabled */ - struct mutex gfx_off_mutex; - uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ - struct delayed_work gfx_off_delay_work; + bool gfx_off_state; /* true: enabled, false: disabled */ + struct mutex gfx_off_mutex; /* mutex to change gfxoff state */ + uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ + struct delayed_work gfx_off_delay_work; /* async work to set gfx block off */ + uint32_t gfx_off_residency; /* last logged residency */ + uint64_t gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */ /* pipe reservation */ struct mutex pipe_reserve_mutex; @@ -407,6 +413,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value); +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency); +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); @@ -416,4 +426,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); +void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index beabab515836..c7b44aeb671b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -35,6 +35,9 @@ struct amdgpu_gfxhub_funcs { void (*init)(struct amdgpu_device *adev); int (*get_xgmi_info)(struct amdgpu_device *adev); void (*utcl2_harvest)(struct amdgpu_device *adev); + void (*mode2_save_regs)(struct amdgpu_device *adev); + void (*mode2_restore_regs)(struct amdgpu_device *adev); + void (*halt)(struct amdgpu_device *adev); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index aebc384531ac..34233a74248c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -572,45 +572,15 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) { struct amdgpu_gmc *gmc = &adev->gmc; - - switch (adev->ip_versions[GC_HWIP][0]) { - case IP_VERSION(9, 0, 1): - case IP_VERSION(9, 3, 0): - case IP_VERSION(9, 4, 0): - case IP_VERSION(9, 4, 1): - case IP_VERSION(9, 4, 2): - case IP_VERSION(10, 3, 3): - case IP_VERSION(10, 3, 4): - case IP_VERSION(10, 3, 5): - case IP_VERSION(10, 3, 6): - case IP_VERSION(10, 3, 7): - /* - * noretry = 0 will cause kfd page fault tests fail - * for some ASICs, so set default to 1 for these ASICs. - */ - if (amdgpu_noretry == -1) - gmc->noretry = 1; - else - gmc->noretry = amdgpu_noretry; - break; - default: - /* Raven currently has issues with noretry - * regardless of what we decide for other - * asics, we should leave raven with - * noretry = 0 until we root cause the - * issues. - * - * default this to 0 for now, but we may want - * to change this in the future for certain - * GPUs as it can increase performance in - * certain cases. - */ - if (amdgpu_noretry == -1) - gmc->noretry = 0; - else - gmc->noretry = amdgpu_noretry; - break; - } + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; + bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) || + gc_ver == IP_VERSION(9, 3, 0) || + gc_ver == IP_VERSION(9, 4, 0) || + gc_ver == IP_VERSION(9, 4, 1) || + gc_ver == IP_VERSION(9, 4, 2) || + gc_ver >= IP_VERSION(10, 3, 0)); + + gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 008eaca27151..0305b660cd17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -264,6 +264,32 @@ struct amdgpu_gmc { u64 mall_size; /* number of UMC instances */ int num_umc; + /* mode2 save restore */ + u64 VM_L2_CNTL; + u64 VM_L2_CNTL2; + u64 VM_DUMMY_PAGE_FAULT_CNTL; + u64 VM_DUMMY_PAGE_FAULT_ADDR_LO32; + u64 VM_DUMMY_PAGE_FAULT_ADDR_HI32; + u64 VM_L2_PROTECTION_FAULT_CNTL; + u64 VM_L2_PROTECTION_FAULT_CNTL2; + u64 VM_L2_PROTECTION_FAULT_MM_CNTL3; + u64 VM_L2_PROTECTION_FAULT_MM_CNTL4; + u64 VM_L2_PROTECTION_FAULT_ADDR_LO32; + u64 VM_L2_PROTECTION_FAULT_ADDR_HI32; + u64 VM_DEBUG; + u64 VM_L2_MM_GROUP_RT_CLASSES; + u64 VM_L2_BANK_SELECT_RESERVED_CID; + u64 VM_L2_BANK_SELECT_RESERVED_CID2; + u64 VM_L2_CACHE_PARITY_CNTL; + u64 VM_L2_IH_LOG_CNTL; + u64 VM_CONTEXT_CNTL[16]; + u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[16]; + u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[16]; + u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16]; + u64 MC_VM_MX_L1_TLB_CNTL; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 8c6b2284cf56..1f3302aebeff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -205,6 +205,42 @@ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr) } /** + * amdgpu_gtt_mgr_intersects - test for intersection + * + * @man: Our manager object + * @res: The resource to test + * @place: The place for the new allocation + * @size: The size of the new allocation + * + * Simplified intersection test, only interesting if we need GART or not. + */ +static bool amdgpu_gtt_mgr_intersects(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res); +} + +/** + * amdgpu_gtt_mgr_compatible - test for compatibility + * + * @man: Our manager object + * @res: The resource to test + * @place: The place for the new allocation + * @size: The size of the new allocation + * + * Simplified compatibility test. + */ +static bool amdgpu_gtt_mgr_compatible(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res); +} + +/** * amdgpu_gtt_mgr_debug - dump VRAM table * * @man: TTM memory type manager @@ -225,6 +261,8 @@ static void amdgpu_gtt_mgr_debug(struct ttm_resource_manager *man, static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = { .alloc = amdgpu_gtt_mgr_new, .free = amdgpu_gtt_mgr_del, + .intersects = amdgpu_gtt_mgr_intersects, + .compatible = amdgpu_gtt_mgr_compatible, .debug = amdgpu_gtt_mgr_debug }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 5071b96be982..46c99331d7f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -49,6 +49,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } memset(&ti, 0, sizeof(struct amdgpu_task_info)); + adev->job_hang = true; if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { @@ -71,6 +72,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) @@ -82,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: + adev->job_hang = false; drm_dev_exit(idx); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -102,7 +105,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, */ (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; - (*job)->num_ibs = num_ibs; amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->sched_sync); @@ -122,6 +124,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, if (r) return r; + (*job)->num_ibs = 1; r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]); if (r) kfree(*job); @@ -129,6 +132,23 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, return r; } +void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, + struct amdgpu_bo *gws, struct amdgpu_bo *oa) +{ + if (gds) { + job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; + job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; + } + if (gws) { + job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; + job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; + } + if (oa) { + job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; + job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; + } +} + void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); @@ -153,13 +173,34 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) dma_fence_put(&job->hw_fence); } +void amdgpu_job_set_gang_leader(struct amdgpu_job *job, + struct amdgpu_job *leader) +{ + struct dma_fence *fence = &leader->base.s_fence->scheduled; + + WARN_ON(job->gang_submit); + + /* + * Don't add a reference when we are the gang leader to avoid circle + * dependency. + */ + if (job != leader) + dma_fence_get(fence); + job->gang_submit = fence; +} + void amdgpu_job_free(struct amdgpu_job *job) { amdgpu_job_free_resources(job); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); + if (job->gang_submit != &job->base.s_fence->scheduled) + dma_fence_put(job->gang_submit); - dma_fence_put(&job->hw_fence); + if (!job->hw_fence.ops) + kfree(job); + else + dma_fence_put(&job->hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -224,12 +265,16 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, fence = amdgpu_sync_get_fence(&job->sync); } + if (!fence && job->gang_submit) + fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + return fence; } static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) { struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched); + struct amdgpu_device *adev = ring->adev; struct dma_fence *fence = NULL, *finished; struct amdgpu_job *job; int r = 0; @@ -241,8 +286,10 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) trace_amdgpu_sched_run_job(job); - if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter)) - dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */ + /* Skip job if VRAM is lost and never resubmit gangs */ + if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter) || + (job->job_run_counter && job->gang_submit)) + dma_fence_set_error(finished, -ECANCELED); if (finished->error < 0) { DRM_INFO("Skip scheduling IBs!\n"); @@ -272,10 +319,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) /* Signal all jobs not yet scheduled */ for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { struct drm_sched_rq *rq = &sched->sched_rq[i]; - - if (!rq) - continue; - spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index babc0af751c2..ab7b150e5d50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -50,6 +50,7 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct dma_fence hw_fence; + struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; @@ -72,11 +73,20 @@ struct amdgpu_job { struct amdgpu_ib ibs[]; }; +static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job) +{ + return to_amdgpu_ring(job->base.entity->rq->sched); +} + int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm); int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, enum amdgpu_ib_pool_type pool, struct amdgpu_job **job); +void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, + struct amdgpu_bo *gws, struct amdgpu_bo *oa); void amdgpu_job_free_resources(struct amdgpu_job *job); +void amdgpu_job_set_gang_leader(struct amdgpu_job *job, + struct amdgpu_job *leader); void amdgpu_job_free(struct amdgpu_job *job); int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, void *owner, struct dma_fence **f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1369c25448dc..fe23e09eec98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -247,6 +247,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->gfx.rlc_srls_fw_version; fw_info->feature = adev->gfx.rlc_srls_feature_version; break; + case AMDGPU_INFO_FW_GFX_RLCP: + fw_info->ver = adev->gfx.rlcp_ucode_version; + fw_info->feature = adev->gfx.rlcp_ucode_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLCV: + fw_info->ver = adev->gfx.rlcv_ucode_version; + fw_info->feature = adev->gfx.rlcv_ucode_feature_version; + break; case AMDGPU_INFO_FW_GFX_MEC: if (query_fw->index == 0) { fw_info->ver = adev->gfx.mec_fw_version; @@ -328,6 +336,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->psp.cap_fw_version; fw_info->feature = adev->psp.cap_feature_version; break; + case AMDGPU_INFO_FW_MES_KIQ: + fw_info->ver = adev->mes.ucode_fw_version[0]; + fw_info->feature = 0; + break; + case AMDGPU_INFO_FW_MES: + fw_info->ver = adev->mes.ucode_fw_version[1]; + fw_info->feature = 0; + break; default: return -EINVAL; } @@ -1160,7 +1176,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) } mutex_init(&fpriv->bo_list_lock); - idr_init(&fpriv->bo_list_handles); + idr_init_base(&fpriv->bo_list_handles, 1); amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); @@ -1469,6 +1485,22 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* RLCP */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCP; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLCP feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLCV */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLCV feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* MEC */ query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; query_fw.index = 0; @@ -1581,6 +1613,22 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) fw_info.feature, fw_info.ver); } + /* MES_KIQ */ + query_fw.fw_type = AMDGPU_INFO_FW_MES_KIQ; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "MES_KIQ feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* MES */ + query_fw.fw_type = AMDGPU_INFO_FW_MES; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index fe82b8b19a4e..0c546245793b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -181,6 +181,9 @@ int amdgpu_mes_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0)) adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; + /* zero sdma_hqd_mask for non-existent engine */ + else if (adev->sdma.num_instances == 1) + adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc; else adev->mes.sdma_hqd_mask[i] = 0xfc; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 7b46f6bf4187..ad980f4b66e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -222,6 +222,8 @@ struct mes_add_queue_input { uint64_t tba_addr; uint64_t tma_addr; uint32_t is_kfd_process; + uint32_t is_aql_queue; + uint32_t queue_size; }; struct mes_remove_queue_input { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d788a00043a5..37322550d750 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -38,7 +38,6 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_framebuffer.h> -#include <drm/drm_plane_helper.h> #include <drm/drm_probe_helper.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 4570ad449390..2e8f6cd7a729 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -591,7 +591,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (!bp->destroy) bp->destroy = &amdgpu_bo_destroy; - r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type, + r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, bp->type, &bo->placement, page_align, &ctx, NULL, bp->resv, bp->destroy); if (unlikely(r != 0)) @@ -688,13 +688,16 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, * num of amdgpu_vm_pt entries. */ BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm)); - bp->destroy = &amdgpu_bo_vm_destroy; r = amdgpu_bo_create(adev, bp, &bo_ptr); if (r) return r; *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr); INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list); + /* Set destroy callback to amdgpu_bo_vm_destroy after vmbo->shadow_list + * is initialized. + */ + bo_ptr->tbo.destroy = &amdgpu_bo_vm_destroy; return r; } @@ -1309,7 +1312,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (bo->base.resv == &bo->base._resv) amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); - if (bo->resource->mem_type != TTM_PL_VRAM || + if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM || !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || adev->in_suspend || adev->shutdown) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index b067ce45d226..effa7df3ddbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -138,6 +138,7 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): + case IP_VERSION(13, 0, 10): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -327,23 +328,32 @@ static int psp_init_sriov_microcode(struct psp_context *psp) switch (adev->ip_versions[MP0_HWIP][0]) { case IP_VERSION(9, 0, 0): + adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2; ret = psp_init_cap_microcode(psp, "vega10"); break; case IP_VERSION(11, 0, 9): + adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2; ret = psp_init_cap_microcode(psp, "navi12"); break; case IP_VERSION(11, 0, 7): + adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2; ret = psp_init_cap_microcode(psp, "sienna_cichlid"); break; case IP_VERSION(13, 0, 2): + adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2; ret = psp_init_cap_microcode(psp, "aldebaran"); ret &= psp_init_ta_microcode(psp, "aldebaran"); break; + case IP_VERSION(13, 0, 0): + adev->virt.autoload_ucode_id = 0; + break; + case IP_VERSION(13, 0, 10): + adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA; + break; default: BUG(); break; } - return ret; } @@ -486,11 +496,14 @@ static int psp_sw_fini(void *handle) release_firmware(psp->ta_fw); psp->ta_fw = NULL; } - if (adev->psp.cap_fw) { + if (psp->cap_fw) { release_firmware(psp->cap_fw); psp->cap_fw = NULL; } - + if (psp->toc_fw) { + release_firmware(psp->toc_fw); + psp->toc_fw = NULL; + } if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) psp_sysfs_fini(adev); @@ -498,6 +511,11 @@ static int psp_sw_fini(void *handle) kfree(cmd); cmd = NULL; + if (psp->km_ring.ring_mem) + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &psp->km_ring.ring_mem_mc_addr, + (void **)&psp->km_ring.ring_mem); + amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -753,7 +771,7 @@ static int psp_tmr_init(struct psp_context *psp) } pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE(psp->adev), + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, pptr); @@ -766,6 +784,7 @@ static bool psp_skip_tmr(struct psp_context *psp) case IP_VERSION(11, 0, 9): case IP_VERSION(11, 0, 7): case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 10): return true; default: return false; @@ -812,7 +831,7 @@ static int psp_tmr_unload(struct psp_context *psp) struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); psp_prep_tmr_unload_cmd_buf(psp, cmd); - DRM_INFO("free PSP TMR buffer\n"); + dev_info(psp->adev->dev, "free PSP TMR buffer\n"); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -2041,6 +2060,15 @@ static int psp_hw_start(struct psp_context *psp) } } + if ((is_psp_fw_valid(psp->ras_drv)) && + (psp->funcs->bootloader_load_ras_drv != NULL)) { + ret = psp_bootloader_load_ras_drv(psp); + if (ret) { + DRM_ERROR("PSP load ras_drv failed!\n"); + return ret; + } + } + if ((is_psp_fw_valid(psp->sos)) && (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); @@ -2401,7 +2429,7 @@ static int psp_load_smu_fw(struct psp_context *psp) static bool fw_load_skip_check(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { - if (!ucode->fw) + if (!ucode->fw || !ucode->ucode_size) return true; if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && @@ -2411,20 +2439,7 @@ static bool fw_load_skip_check(struct psp_context *psp, return true; if (amdgpu_sriov_vf(psp->adev) && - (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 - || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 - || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 - || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3 - || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4 - || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 - || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 - || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM - || ucode->ucode_id == AMDGPU_UCODE_ID_SMC)) - /*skip ucode loading in SRIOV VF */ + amdgpu_virt_fw_load_skip_check(psp->adev, ucode->ucode_id)) return true; if (psp->autoload_supported && @@ -2498,7 +2513,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp) /* Start rlc autoload after psp recieved all the gfx firmware */ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? - AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) { + adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload_start(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); @@ -2641,6 +2656,9 @@ static int psp_hw_fini(void *handle) psp_rap_terminate(psp); psp_dtm_terminate(psp); psp_hdcp_terminate(psp); + + if (adev->gmc.xgmi.num_physical_nodes > 1) + psp_xgmi_terminate(psp); } psp_asd_terminate(psp); @@ -3036,6 +3054,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->dbg_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_RAS_DRV: + psp->ras_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->ras_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ras_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index c32b74bd970f..58ce3ebb446c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -36,6 +36,7 @@ #define PSP_CMD_BUFFER_SIZE 0x1000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000) +#define PSP_TMR_ALIGNMENT 0x100000 #define PSP_FW_NAME_LEN 0x24 enum psp_shared_mem_size { @@ -71,6 +72,7 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOCDRV = 0xB0000, PSP_BL__LOAD_DBGDRV = 0xC0000, PSP_BL__LOAD_INTFDRV = 0xD0000, + PSP_BL__LOAD_RASDRV = 0xE0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -114,6 +116,7 @@ struct psp_funcs int (*bootloader_load_soc_drv)(struct psp_context *psp); int (*bootloader_load_intf_drv)(struct psp_context *psp); int (*bootloader_load_dbg_drv)(struct psp_context *psp); + int (*bootloader_load_ras_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_create)(struct psp_context *psp, @@ -323,6 +326,7 @@ struct psp_context struct psp_bin_desc soc_drv; struct psp_bin_desc intf_drv; struct psp_bin_desc dbg_drv; + struct psp_bin_desc ras_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -403,6 +407,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0) #define psp_bootloader_load_dbg_drv(psp) \ ((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0) +#define psp_bootloader_load_ras_drv(psp) \ + ((psp)->funcs->bootloader_load_ras_drv ? \ + (psp)->funcs->bootloader_load_ras_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ff5361f5c2d4..2dad7aa9a03b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1811,7 +1811,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) amdgpu_ras_query_error_status(adev, &info); if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && - adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) { if (amdgpu_ras_reset_error_status(adev, info.head.block)) dev_warn(adev->dev, "Failed to reset error counter and error status"); } @@ -1949,6 +1950,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } @@ -2718,7 +2720,8 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) /* Need disable ras on all IPs here before ip [hw/sw]fini */ - amdgpu_ras_disable_all_features(adev, 0); + if (con->features) + amdgpu_ras_disable_all_features(adev, 0); amdgpu_ras_recovery_fini(adev); return 0; } @@ -2831,11 +2834,8 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, struct mce *m = (struct mce *)data; struct amdgpu_device *adev = NULL; uint32_t gpu_id = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst, channel_index = 0; + uint32_t umc_inst = 0, ch_inst = 0; struct ras_err_data err_data = {0, 0, 0, NULL}; - struct eeprom_table_record err_rec; - uint64_t retired_page; /* * If the error was generated in UMC_V2, which belongs to GPU UMCs, @@ -2874,21 +2874,22 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d", umc_inst, ch_inst); + err_data.err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, + "Failed to alloc memory for umc error record in mca notifier!\n"); + return NOTIFY_DONE; + } + /* * Translate UMC channel address to Physical address */ - channel_index = - adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num - + ch_inst]; - - retired_page = ADDR_OF_8KB_BLOCK(m->addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(m->addr); - - memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); - err_data.err_addr = &err_rec; - amdgpu_umc_fill_error_record(&err_data, m->addr, - retired_page, channel_index, umc_inst); + if (adev->umc.ras && + adev->umc.ras->convert_ras_error_address) + adev->umc.ras->convert_ras_error_address(adev, + &err_data, m->addr, ch_inst, umc_inst); if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, @@ -2896,6 +2897,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, amdgpu_ras_save_bad_pages(adev); } + kfree(err_data.err_addr); return NOTIFY_OK; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index c4283987bb1e..84c241b9a2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -38,6 +38,7 @@ #define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0 #define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0 #define EEPROM_I2C_MADDR_ALDEBARAN 0x0 +#define EEPROM_I2C_MADDR_SMU_13_0_0 (0x54UL << 16) /* * The 2 macros bellow represent the actual size in bytes that @@ -156,6 +157,15 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return false; } + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + control->i2c_address = EEPROM_I2C_MADDR_SMU_13_0_0; + break; + + default: + break; + } + return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 32c86a0b145c..9da5ead50c90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -23,6 +23,7 @@ #include "amdgpu_reset.h" #include "aldebaran.h" +#include "sienna_cichlid.h" int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_handler *handler) @@ -36,10 +37,15 @@ int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; + adev->amdgpu_reset_level_mask = 0x1; + switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): ret = aldebaran_reset_init(adev); break; + case IP_VERSION(11, 0, 7): + ret = sienna_cichlid_reset_init(adev); + break; default: break; } @@ -55,6 +61,9 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): ret = aldebaran_reset_fini(adev); break; + case IP_VERSION(11, 0, 7): + ret = sienna_cichlid_reset_fini(adev); + break; default: break; } @@ -67,6 +76,12 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, { struct amdgpu_reset_handler *reset_handler = NULL; + if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2)) + return -ENOSYS; + + if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) + return -ENOSYS; + if (adev->reset_cntl && adev->reset_cntl->get_reset_handler) reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); @@ -83,6 +98,12 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, int ret; struct amdgpu_reset_handler *reset_handler = NULL; + if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2)) + return -ENOSYS; + + if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) + return -ENOSYS; + if (adev->reset_cntl) reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 9e55a5d7a825..f5318fedf2f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -30,6 +30,8 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, + AMDGPU_SKIP_MODE2_RESET = 2, + AMDGPU_RESET_FOR_DEVICE_REMOVE = 3, }; struct amdgpu_reset_context { @@ -37,6 +39,7 @@ struct amdgpu_reset_context { struct amdgpu_device *reset_req_dev; struct amdgpu_job *job; struct amdgpu_hive_info *hive; + struct list_head *reset_device_list; unsigned long flags; }; @@ -110,7 +113,8 @@ static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *dom static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain) { - kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain); + if (domain) + kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain); } static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d3558c34d406..3e316b013fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -405,6 +405,9 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); + if (!(ring->adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY)) + return false; + if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 6373bfb47d55..012b72d00e04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -272,3 +272,275 @@ void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev) &adev->gfx.rlc.cp_table_gpu_addr, (void **)&adev->gfx.rlc.cp_table_ptr); } + +static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev) +{ + const struct common_firmware_header *common_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + struct amdgpu_firmware_info *info; + unsigned int *tmp; + unsigned int i; + + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n"); + return -ENOMEM; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_G; + info->fw = adev->gfx.rlc_fw; + if (info->fw) { + common_hdr = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE); + } + } + + return 0; +} + +static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.save_restore_list_cntl_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.save_restore_list_srm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_2 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes); + adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes); + adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes); + adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_3 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; + adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version); + adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version); + adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes); + adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes); + + adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version); + adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version); + adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes); + adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.rlcp_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_P; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.rlcv_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_V; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + } +} + +int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, + uint16_t version_major, + uint16_t version_minor) +{ + int err; + + if (version_major < 2) { + /* only support rlc_hdr v2.x and onwards */ + dev_err(adev->dev, "unsupported rlc fw hdr\n"); + return -EINVAL; + } + + /* is_rlc_v2_1 is still used in APU code path */ + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + if (version_minor >= 0) { + err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); + if (err) { + dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); + return err; + } + } + if (version_minor >= 1) + amdgpu_gfx_rlc_init_microcode_v2_1(adev); + if (version_minor >= 2) + amdgpu_gfx_rlc_init_microcode_v2_2(adev); + if (version_minor == 3) + amdgpu_gfx_rlc_init_microcode_v2_3(adev); + if (version_minor == 4) + amdgpu_gfx_rlc_init_microcode_v2_4(adev); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 03ac36b2c2cf..23f060db9255 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -267,5 +267,7 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev); void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev); - +int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, + uint16_t version_major, + uint16_t version_minor); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 42c1f050542f..ea5278f094c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -21,6 +21,7 @@ * */ +#include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_sdma.h" #include "amdgpu_ras.h" @@ -150,3 +151,158 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } + +static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) +{ + int err = 0; + uint16_t version_major; + const struct common_firmware_header *header = NULL; + const struct sdma_firmware_header_v1_0 *hdr; + const struct sdma_firmware_header_v2_0 *hdr_v2; + + err = amdgpu_ucode_validate(sdma_inst->fw); + if (err) + return err; + + header = (const struct common_firmware_header *) + sdma_inst->fw->data; + version_major = le16_to_cpu(header->header_version_major); + + switch (version_major) { + case 1: + hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); + break; + case 2: + hdr_v2 = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version); + break; + default: + return -EINVAL; + } + + if (sdma_inst->feature_version >= 20) + sdma_inst->burst_nop = true; + + return 0; +} + +void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, + bool duplicate) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + if (duplicate) + break; + } + + memset((void *)adev->sdma.instance, 0, + sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); +} + +int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, + char *fw_name, u32 instance, + bool duplicate) +{ + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + int err = 0, i; + const struct sdma_firmware_header_v2_0 *sdma_hdr; + uint16_t version_major; + + err = request_firmware(&adev->sdma.instance[instance].fw, fw_name, adev->dev); + if (err) + goto out; + + header = (const struct common_firmware_header *) + adev->sdma.instance[instance].fw->data; + version_major = le16_to_cpu(header->header_version_major); + + if ((duplicate && instance) || (!duplicate && version_major > 1)) { + err = -EINVAL; + goto out; + } + + err = amdgpu_sdma_init_inst_ctx(&adev->sdma.instance[instance]); + if (err) + goto out; + + if (duplicate) { + for (i = 1; i < adev->sdma.num_instances; i++) + memcpy((void *)&adev->sdma.instance[i], + (void *)&adev->sdma.instance[0], + sizeof(struct amdgpu_sdma_instance)); + } + + if (amdgpu_sriov_vf(adev)) + return 0; + + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + switch (version_major) { + case 1: + for (i = 0; i < adev->sdma.num_instances; i++) { + if (!duplicate && (instance != i)) + continue; + else { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; + info->fw = adev->sdma.instance[i].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + } + break; + case 2: + sdma_hdr = (const struct sdma_firmware_header_v2_0 *) + adev->sdma.instance[0].fw->data; + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0; + info->fw = adev->sdma.instance[0].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1; + info->fw = adev->sdma.instance[0].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE); + break; + default: + err = -EINVAL; + } + } + +out: + if (err) { + DRM_ERROR("SDMA: Failed to init firmware \"%s\"\n", fw_name); + amdgpu_sdma_destroy_inst_ctx(adev, duplicate); + } + return err; +} + +void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.has_page_queue) { + sdma = &adev->sdma.instance[i].page; + if (adev->mman.buffer_funcs_ring == sdma) { + amdgpu_ttm_set_buffer_funcs_status(adev, false); + break; + } + } + sdma = &adev->sdma.instance[i].ring; + if (adev->mman.buffer_funcs_ring == sdma) { + amdgpu_ttm_set_buffer_funcs_status(adev, false); + break; + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 53ac3ebae8d6..7d99205c2e01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -124,4 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, + char *fw_name, u32 instance, bool duplicate); +void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, + bool duplicate); +void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 504af1b93bfa..090e66a1b284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014 Advanced Micro Devices, Inc. * All Rights Reserved. @@ -315,6 +316,7 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) struct hlist_node *tmp; struct dma_fence *f; int i; + hash_for_each_safe(sync->fences, i, tmp, e, node) { f = e->fence; @@ -392,7 +394,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; - unsigned i; + unsigned int i; hash_for_each_safe(sync->fences, i, tmp, e, node) { hash_del(&e->node); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 06dfcf297a8d..5e6ddc7e101c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -140,8 +140,10 @@ TRACE_EVENT(amdgpu_bo_create, ); TRACE_EVENT(amdgpu_cs, - TP_PROTO(struct amdgpu_cs_parser *p, int i), - TP_ARGS(p, i), + TP_PROTO(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib), + TP_ARGS(p, job, ib), TP_STRUCT__entry( __field(struct amdgpu_bo_list *, bo_list) __field(u32, ring) @@ -151,10 +153,10 @@ TRACE_EVENT(amdgpu_cs, TP_fast_assign( __entry->bo_list = p->bo_list; - __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx; - __entry->dw = p->job->ibs[i].length_dw; + __entry->ring = to_amdgpu_ring(job->base.sched)->idx; + __entry->dw = ib->length_dw; __entry->fences = amdgpu_fence_count_emitted( - to_amdgpu_ring(p->entity->rq->sched)); + to_amdgpu_ring(job->base.sched)); ), TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", __entry->bo_list, __entry->ring, __entry->dw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3b4c19412625..dc262d2c2925 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -424,8 +424,9 @@ error: static bool amdgpu_mem_visible(struct amdgpu_device *adev, struct ttm_resource *mem) { - uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT; + u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT; struct amdgpu_res_cursor cursor; + u64 end; if (mem->mem_type == TTM_PL_SYSTEM || mem->mem_type == TTM_PL_TT) @@ -434,12 +435,18 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev, return false; amdgpu_res_first(mem, 0, mem_size, &cursor); + end = cursor.start + cursor.size; + while (cursor.remaining) { + amdgpu_res_next(&cursor, cursor.size); - /* ttm_resource_ioremap only supports contiguous memory */ - if (cursor.size != mem_size) - return false; + /* ttm_resource_ioremap only supports contiguous memory */ + if (end != cursor.start) + return false; + + end = cursor.start + cursor.size; + } - return cursor.start + cursor.size <= adev->gmc.visible_vram_size; + return end <= adev->gmc.visible_vram_size; } /* @@ -471,7 +478,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, adev = amdgpu_ttm_adev(bo->bdev); - if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { + if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && + bo->ttm == NULL)) { ttm_bo_move_null(bo, new_mem); goto out; } @@ -637,6 +645,8 @@ struct amdgpu_ttm_tt { #endif }; +#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) + #ifdef CONFIG_DRM_AMDGPU_USERPTR /* * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user @@ -648,7 +658,7 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { struct ttm_tt *ttm = bo->tbo.ttm; - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); unsigned long start = gtt->userptr; struct vm_area_struct *vma; struct mm_struct *mm; @@ -702,7 +712,7 @@ out_unlock: */ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); bool r = false; if (!gtt || !gtt->userptr) @@ -751,7 +761,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -788,7 +798,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -822,7 +832,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); struct ttm_tt *ttm = tbo->ttm; - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (amdgpu_bo_encrypted(abo)) flags |= AMDGPU_PTE_TMZ; @@ -860,7 +870,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, struct ttm_resource *bo_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void*)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); uint64_t flags; int r; @@ -927,7 +937,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); struct ttm_placement placement; struct ttm_place placements; struct ttm_resource *tmp; @@ -998,7 +1008,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) { @@ -1025,7 +1035,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt->usertask) put_task_struct(gtt->usertask); @@ -1079,7 +1089,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, struct ttm_operation_ctx *ctx) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); pgoff_t i; int ret; @@ -1113,7 +1123,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_device *adev; pgoff_t i; @@ -1182,7 +1192,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */ bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL; - gtt = (void *)bo->ttm; + gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); gtt->userptr = addr; gtt->userflags = flags; @@ -1199,7 +1209,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, */ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL) return NULL; @@ -1218,7 +1228,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end, unsigned long *userptr) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1241,7 +1251,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, */ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL || !gtt->userptr) return false; @@ -1254,7 +1264,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) */ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL) return false; @@ -1327,11 +1337,12 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { - unsigned long num_pages = bo->resource->num_pages; struct dma_resv_iter resv_cursor; - struct amdgpu_res_cursor cursor; struct dma_fence *f; + if (!amdgpu_bo_is_amdgpu_bo(bo)) + return ttm_bo_eviction_valuable(bo, place); + /* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) return true; @@ -1350,40 +1361,20 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return false; } - switch (bo->resource->mem_type) { - case AMDGPU_PL_PREEMPT: - /* Preemptible BOs don't own system resources managed by the - * driver (pages, VRAM, GART space). They point to resources - * owned by someone else (e.g. pageable memory in user mode - * or a DMABuf). They are used in a preemptible context so we - * can guarantee no deadlocks and good QoS in case of MMU - * notifiers or DMABuf move notifiers from the resource owner. - */ + /* Preemptible BOs don't own system resources managed by the + * driver (pages, VRAM, GART space). They point to resources + * owned by someone else (e.g. pageable memory in user mode + * or a DMABuf). They are used in a preemptible context so we + * can guarantee no deadlocks and good QoS in case of MMU + * notifiers or DMABuf move notifiers from the resource owner. + */ + if (bo->resource->mem_type == AMDGPU_PL_PREEMPT) return false; - case TTM_PL_TT: - if (amdgpu_bo_is_amdgpu_bo(bo) && - amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo))) - return false; - return true; - case TTM_PL_VRAM: - /* Check each drm MM node individually */ - amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT, - &cursor); - while (cursor.remaining) { - if (place->fpfn < PFN_DOWN(cursor.start + cursor.size) - && !(place->lpfn && - place->lpfn <= PFN_DOWN(cursor.start))) - return true; - - amdgpu_res_next(&cursor, cursor.size); - } + if (bo->resource->mem_type == TTM_PL_TT && + amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo))) return false; - default: - break; - } - return ttm_bo_eviction_valuable(bo, place); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 939c8614f0e3..dd0bc649a57d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -164,70 +164,138 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr) } else if (version_major == 2) { const struct rlc_firmware_header_v2_0 *rlc_hdr = container_of(hdr, struct rlc_firmware_header_v2_0, header); + const struct rlc_firmware_header_v2_1 *rlc_hdr_v2_1 = + container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0); + const struct rlc_firmware_header_v2_2 *rlc_hdr_v2_2 = + container_of(rlc_hdr_v2_1, struct rlc_firmware_header_v2_2, v2_1); + const struct rlc_firmware_header_v2_3 *rlc_hdr_v2_3 = + container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2); + const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 = + container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3); - DRM_DEBUG("ucode_feature_version: %u\n", - le32_to_cpu(rlc_hdr->ucode_feature_version)); - DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset)); - DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size)); - DRM_DEBUG("save_and_restore_offset: %u\n", - le32_to_cpu(rlc_hdr->save_and_restore_offset)); - DRM_DEBUG("clear_state_descriptor_offset: %u\n", - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset)); - DRM_DEBUG("avail_scratch_ram_locations: %u\n", - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations)); - DRM_DEBUG("reg_restore_list_size: %u\n", - le32_to_cpu(rlc_hdr->reg_restore_list_size)); - DRM_DEBUG("reg_list_format_start: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_start)); - DRM_DEBUG("reg_list_format_separate_start: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_separate_start)); - DRM_DEBUG("starting_offsets_start: %u\n", - le32_to_cpu(rlc_hdr->starting_offsets_start)); - DRM_DEBUG("reg_list_format_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes)); - DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - DRM_DEBUG("reg_list_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_size_bytes)); - DRM_DEBUG("reg_list_array_offset_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes)); - DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); - DRM_DEBUG("reg_list_separate_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); - DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes)); - if (version_minor == 1) { - const struct rlc_firmware_header_v2_1 *v2_1 = - container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0); + switch (version_minor) { + case 0: + /* rlc_hdr v2_0 */ + DRM_DEBUG("ucode_feature_version: %u\n", + le32_to_cpu(rlc_hdr->ucode_feature_version)); + DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset)); + DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size)); + DRM_DEBUG("save_and_restore_offset: %u\n", + le32_to_cpu(rlc_hdr->save_and_restore_offset)); + DRM_DEBUG("clear_state_descriptor_offset: %u\n", + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset)); + DRM_DEBUG("avail_scratch_ram_locations: %u\n", + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations)); + DRM_DEBUG("reg_restore_list_size: %u\n", + le32_to_cpu(rlc_hdr->reg_restore_list_size)); + DRM_DEBUG("reg_list_format_start: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_start)); + DRM_DEBUG("reg_list_format_separate_start: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_separate_start)); + DRM_DEBUG("starting_offsets_start: %u\n", + le32_to_cpu(rlc_hdr->starting_offsets_start)); + DRM_DEBUG("reg_list_format_size_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes)); + DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + DRM_DEBUG("reg_list_size_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_size_bytes)); + DRM_DEBUG("reg_list_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes)); + DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); + DRM_DEBUG("reg_list_separate_size_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); + DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes)); + break; + case 1: + /* rlc_hdr v2_1 */ DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n", - le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length)); + le32_to_cpu(rlc_hdr_v2_1->reg_list_format_direct_reg_list_length)); DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_ucode_ver)); DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_feature_ver)); DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n", - le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_size_bytes)); DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n", - le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_offset_bytes)); DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_ucode_ver)); DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_feature_ver)); DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n", - le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_size_bytes)); DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n", - le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_offset_bytes)); DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_ucode_ver)); DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_srm_feature_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_feature_ver)); DRM_DEBUG("save_restore_list_srm_size_bytes %u\n", - le32_to_cpu(v2_1->save_restore_list_srm_size_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_size_bytes)); DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n", - le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_offset_bytes)); + break; + case 2: + /* rlc_hdr v2_2 */ + DRM_DEBUG("rlc_iram_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_size_bytes)); + DRM_DEBUG("rlc_iram_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_offset_bytes)); + DRM_DEBUG("rlc_dram_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_size_bytes)); + DRM_DEBUG("rlc_dram_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_offset_bytes)); + break; + case 3: + /* rlc_hdr v2_3 */ + DRM_DEBUG("rlcp_ucode_version: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_version)); + DRM_DEBUG("rlcp_ucode_feature_version: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_feature_version)); + DRM_DEBUG("rlcp_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_size_bytes)); + DRM_DEBUG("rlcp_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_offset_bytes)); + DRM_DEBUG("rlcv_ucode_version: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_version)); + DRM_DEBUG("rlcv_ucode_feature_version: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_feature_version)); + DRM_DEBUG("rlcv_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_size_bytes)); + DRM_DEBUG("rlcv_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_offset_bytes)); + break; + case 4: + /* rlc_hdr v2_4 */ + DRM_DEBUG("global_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_size_bytes)); + DRM_DEBUG("global_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_offset_bytes)); + DRM_DEBUG("se0_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_size_bytes)); + DRM_DEBUG("se0_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_offset_bytes)); + DRM_DEBUG("se1_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_size_bytes)); + DRM_DEBUG("se1_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_offset_bytes)); + DRM_DEBUG("se2_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_size_bytes)); + DRM_DEBUG("se2_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_offset_bytes)); + DRM_DEBUG("se3_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_size_bytes)); + DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes)); + break; + default: + DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor); + break; } } else { DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index ebed3f5226db..1c36235b4539 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -124,6 +124,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_SOC_DRV, PSP_FW_TYPE_PSP_INTF_DRV, PSP_FW_TYPE_PSP_DBG_DRV, + PSP_FW_TYPE_PSP_RAS_DRV, }; /* version_major=2, version_minor=0 */ @@ -260,8 +261,12 @@ struct rlc_firmware_header_v2_2 { /* version_major=2, version_minor=3 */ struct rlc_firmware_header_v2_3 { struct rlc_firmware_header_v2_2 v2_2; + uint32_t rlcp_ucode_version; + uint32_t rlcp_ucode_feature_version; uint32_t rlcp_ucode_size_bytes; uint32_t rlcp_ucode_offset_bytes; + uint32_t rlcv_ucode_version; + uint32_t rlcv_ucode_feature_version; uint32_t rlcv_ucode_size_bytes; uint32_t rlcv_ucode_offset_bytes; }; @@ -390,6 +395,7 @@ union amdgpu_firmware_header { struct rlc_firmware_header_v2_1 rlc_v2_1; struct rlc_firmware_header_v2_2 rlc_v2_2; struct rlc_firmware_header_v2_3 rlc_v2_3; + struct rlc_firmware_header_v2_4 rlc_v2_4; struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; struct sdma_firmware_header_v2_0 sdma_v2_0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 3629d8f292ef..e46439274f3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -51,6 +51,9 @@ struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); bool (*query_ras_poison_mode)(struct amdgpu_device *adev); + void (*convert_ras_error_address)(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst); void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f36e4f08db6d..0b52af415b28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -191,7 +191,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) fw_name = FIRMWARE_VCN4_0_2; if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) - adev->vcn.indirect_sram = false; + adev->vcn.indirect_sram = true; break; case IP_VERSION(4, 0, 4): fw_name = FIRMWARE_VCN4_0_4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 60c608144480..253ea6b159df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -161,6 +161,8 @@ #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) #define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10) #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) +#define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11) +#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14) #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -170,6 +172,9 @@ #define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2) #define VCN_CODEC_DISABLE_MASK_H264 (1 << 3) +#define AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU (0) +#define AMDGPU_VCN_SMU_DPM_INTERFACE_APU (1) + enum fw_queue_mode { FW_QUEUE_RING_RESET = 1, FW_QUEUE_DPG_HOLD_OFF = 2, @@ -317,12 +322,26 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; +struct amdgpu_fw_shared_rb_setup { + uint32_t is_rb_enabled_flags; + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; + uint32_t rb4_addr_lo; + uint32_t rb4_addr_hi; + uint32_t rb4_size; + uint32_t reserved[6]; +}; + struct amdgpu_vcn4_fw_shared { uint32_t present_flag_0; uint8_t pad[12]; struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; + struct amdgpu_fw_shared_rb_setup rb_setup; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; }; struct amdgpu_vcn_fwlog { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 9be57389301b..e4af40b9a8aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -690,7 +690,6 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) } } - void amdgpu_detect_virtualization(struct amdgpu_device *adev) { uint32_t reg; @@ -707,6 +706,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: case CHIP_ARCTURUS: case CHIP_ALDEBARAN: + case CHIP_IP_DISCOVERY: reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER); break; default: /* other chip doesn't support SRIOV */ @@ -750,6 +750,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_IP_DISCOVERY: nv_set_virt_ops(adev); /* try send GPU_INIT_DATA request to host */ amdgpu_virt_request_init_data(adev); @@ -807,6 +808,60 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad return mode; } +bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) +{ + switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(13, 0, 0): + /* no vf autoload, white list */ + if (ucode_id == AMDGPU_UCODE_ID_VCN1 || + ucode_id == AMDGPU_UCODE_ID_VCN) + return false; + else + return true; + case IP_VERSION(13, 0, 10): + /* white list */ + if (ucode_id == AMDGPU_UCODE_ID_CAP + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK + || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK + || ucode_id == AMDGPU_UCODE_ID_CP_MES + || ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA + || ucode_id == AMDGPU_UCODE_ID_CP_MES1 + || ucode_id == AMDGPU_UCODE_ID_CP_MES1_DATA + || ucode_id == AMDGPU_UCODE_ID_VCN1 + || ucode_id == AMDGPU_UCODE_ID_VCN) + return false; + else + return true; + default: + /* lagacy black list */ + if (ucode_id == AMDGPU_UCODE_ID_SDMA0 + || ucode_id == AMDGPU_UCODE_ID_SDMA1 + || ucode_id == AMDGPU_UCODE_ID_SDMA2 + || ucode_id == AMDGPU_UCODE_ID_SDMA3 + || ucode_id == AMDGPU_UCODE_ID_SDMA4 + || ucode_id == AMDGPU_UCODE_ID_SDMA5 + || ucode_id == AMDGPU_UCODE_ID_SDMA6 + || ucode_id == AMDGPU_UCODE_ID_SDMA7 + || ucode_id == AMDGPU_UCODE_ID_RLC_G + || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL + || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM + || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM + || ucode_id == AMDGPU_UCODE_ID_SMC) + return true; + else + return false; + } +} + void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, struct amdgpu_video_codec_info *encode, uint32_t encode_array_size, struct amdgpu_video_codec_info *decode, uint32_t decode_array_size) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 239f232f9c02..d94c31e68a14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -253,6 +253,9 @@ struct amdgpu_virt { uint32_t decode_max_frame_pixels; uint32_t encode_max_dimension_pixels; uint32_t encode_max_frame_pixels; + + /* the ucode id to signal the autoload */ + uint32_t autoload_ucode_id; }; struct amdgpu_video_codec_info; @@ -343,4 +346,6 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev, u32 acc_flags, u32 hwip); u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip); +bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, + uint32_t ucode_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 108e8e8a1a36..f4b5301ea2a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -282,8 +282,8 @@ static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane, return PTR_ERR(crtc_state); ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, false, true); if (ret != 0) return ret; @@ -496,8 +496,7 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = YRES_MAX; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 59cac347baa3..83b0c5d86e48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -183,10 +183,12 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) struct amdgpu_bo *bo = vm_bo->bo; vm_bo->moved = true; + spin_lock(&vm_bo->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm->evicted); else list_move_tail(&vm_bo->vm_status, &vm->evicted); + spin_unlock(&vm_bo->vm->status_lock); } /** * amdgpu_vm_bo_moved - vm_bo is moved @@ -198,7 +200,9 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->moved); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -211,7 +215,9 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->idle); + spin_unlock(&vm_bo->vm->status_lock); vm_bo->moved = false; } @@ -225,9 +231,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) { - spin_lock(&vm_bo->vm->invalidated_lock); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); - spin_unlock(&vm_bo->vm->invalidated_lock); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -240,10 +246,13 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) { - if (vm_bo->bo->parent) + if (vm_bo->bo->parent) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); - else + spin_unlock(&vm_bo->vm->status_lock); + } else { amdgpu_vm_bo_idle(vm_bo); + } } /** @@ -256,9 +265,9 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { - spin_lock(&vm_bo->vm->invalidated_lock); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->done); - spin_unlock(&vm_bo->vm->invalidated_lock); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -363,12 +372,20 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { - struct amdgpu_vm_bo_base *bo_base, *tmp; + struct amdgpu_vm_bo_base *bo_base; + struct amdgpu_bo *shadow; + struct amdgpu_bo *bo; int r; - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { - struct amdgpu_bo *bo = bo_base->bo; - struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); + spin_lock(&vm->status_lock); + while (!list_empty(&vm->evicted)) { + bo_base = list_first_entry(&vm->evicted, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_base->bo; + shadow = amdgpu_bo_shadowed(bo); r = validate(param, bo); if (r) @@ -385,7 +402,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs->map_table(to_amdgpu_bo_vm(bo)); amdgpu_vm_bo_relocated(bo_base); } + spin_lock(&vm->status_lock); } + spin_unlock(&vm->status_lock); amdgpu_vm_eviction_lock(vm); vm->evicting = false; @@ -406,13 +425,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { + bool empty; bool ret; amdgpu_vm_eviction_lock(vm); ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); - return ret && list_empty(&vm->evicted); + spin_lock(&vm->status_lock); + empty = list_empty(&vm->evicted); + spin_unlock(&vm->status_lock); + + return ret && empty; } /** @@ -680,9 +704,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm_update_params params; struct amdgpu_vm_bo_base *entry; bool flush_tlb_needed = false; + LIST_HEAD(relocated); int r, idx; - if (list_empty(&vm->relocated)) + spin_lock(&vm->status_lock); + list_splice_init(&vm->relocated, &relocated); + spin_unlock(&vm->status_lock); + + if (list_empty(&relocated)) return 0; if (!drm_dev_enter(adev_to_drm(adev), &idx)) @@ -697,7 +726,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - list_for_each_entry(entry, &vm->relocated, vm_status) { + list_for_each_entry(entry, &relocated, vm_status) { /* vm_flush_needed after updating moved PDEs */ flush_tlb_needed |= entry->moved; @@ -713,9 +742,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (flush_tlb_needed) atomic64_inc(&vm->tlb_seq); - while (!list_empty(&vm->relocated)) { - entry = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, + while (!list_empty(&relocated)) { + entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base, vm_status); amdgpu_vm_bo_idle(entry); } @@ -912,6 +940,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, { struct amdgpu_bo_va *bo_va, *tmp; + spin_lock(&vm->status_lock); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { if (!bo_va->base.bo) continue; @@ -936,7 +965,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, gtt_mem, cpu_mem); } - spin_lock(&vm->invalidated_lock); list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { if (!bo_va->base.bo) continue; @@ -949,7 +977,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, gtt_mem, cpu_mem); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); } /** * amdgpu_vm_bo_update - update all BO mappings in the vm page table @@ -1278,24 +1306,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_bo_va *bo_va, *tmp; + struct amdgpu_bo_va *bo_va; struct dma_resv *resv; bool clear; int r; - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); + /* Per VM BOs never need to bo cleared in the page tables */ r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; + spin_lock(&vm->status_lock); } - spin_lock(&vm->invalidated_lock); while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); resv = bo_va->base.bo->tbo.base.resv; - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ if (!amdgpu_vm_debug && dma_resv_trylock(resv)) @@ -1310,9 +1343,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, if (!clear) dma_resv_unlock(resv); - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); return 0; } @@ -1387,7 +1420,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && !bo_va->base.moved) { - list_move(&bo_va->base.vm_status, &vm->moved); + amdgpu_vm_bo_moved(&bo_va->base); } trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -1763,9 +1796,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, } } - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); list_del(&bo_va->base.vm_status); - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2019,9 +2052,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); INIT_LIST_HEAD(&vm->invalidated); - spin_lock_init(&vm->invalidated_lock); + spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->done); + INIT_LIST_HEAD(&vm->pt_freed); + INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); /* create scheduler entities for page table updates */ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, @@ -2223,6 +2258,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); + flush_work(&vm->pt_free_work); + root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); amdgpu_vm_set_pasid(adev, vm, 0); @@ -2484,8 +2521,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ - flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | - AMDGPU_PTE_TF; + flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT; value = 0; } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ @@ -2548,6 +2584,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) unsigned int total_done_objs = 0; unsigned int id = 0; + spin_lock(&vm->status_lock); seq_puts(m, "\tIdle BOs:\n"); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { if (!bo_va->base.bo) @@ -2585,7 +2622,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) id = 0; seq_puts(m, "\tInvalidated BOs:\n"); - spin_lock(&vm->invalidated_lock); list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { if (!bo_va->base.bo) continue; @@ -2600,7 +2636,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) continue; total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); total_done_objs = id; seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 9ecb7f663e19..83acb7bd80fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -254,6 +254,9 @@ struct amdgpu_vm { bool evicting; unsigned int saved_flags; + /* Lock to protect vm_bo add/del/move on all lists of vm */ + spinlock_t status_lock; + /* BOs who needs a validation */ struct list_head evicted; @@ -268,7 +271,6 @@ struct amdgpu_vm { /* regular invalidated BOs, but not yet updated in the PT */ struct list_head invalidated; - spinlock_t invalidated_lock; /* BO mappings freed, but not yet updated in the PT */ struct list_head freed; @@ -276,6 +278,10 @@ struct amdgpu_vm { /* BOs which are invalidated, has been updated in the PTs */ struct list_head done; + /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */ + struct list_head pt_freed; + struct work_struct pt_free_work; + /* contains the page directory */ struct amdgpu_vm_bo_base root; struct dma_fence *last_update; @@ -471,6 +477,7 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags); +void amdgpu_vm_pt_free_work(struct work_struct *work); #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 88de9f0d4728..358b91243e37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -637,10 +637,34 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); entry->bo->vm_bo = NULL; + + spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); + spin_unlock(&entry->vm->status_lock); amdgpu_bo_unref(&entry->bo); } +void amdgpu_vm_pt_free_work(struct work_struct *work) +{ + struct amdgpu_vm_bo_base *entry, *next; + struct amdgpu_vm *vm; + LIST_HEAD(pt_freed); + + vm = container_of(work, struct amdgpu_vm, pt_free_work); + + spin_lock(&vm->status_lock); + list_splice_init(&vm->pt_freed, &pt_freed); + spin_unlock(&vm->status_lock); + + /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ + amdgpu_bo_reserve(vm->root.bo, true); + + list_for_each_entry_safe(entry, next, &pt_freed, vm_status) + amdgpu_vm_pt_free(entry); + + amdgpu_bo_unreserve(vm->root.bo); +} + /** * amdgpu_vm_pt_free_dfs - free PD/PT levels * @@ -652,11 +676,24 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) */ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start) + struct amdgpu_vm_pt_cursor *start, + bool unlocked) { struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_bo_base *entry; + if (unlocked) { + spin_lock(&vm->status_lock); + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + list_move(&entry->vm_status, &vm->pt_freed); + + if (start) + list_move(&start->entry->vm_status, &vm->pt_freed); + spin_unlock(&vm->status_lock); + schedule_work(&vm->pt_free_work); + return; + } + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) amdgpu_vm_pt_free(entry); @@ -673,7 +710,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, */ void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - amdgpu_vm_pt_free_dfs(adev, vm, NULL); + amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); } /** @@ -966,7 +1003,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, if (cursor.entry->bo) { params->table_freed = true; amdgpu_vm_pt_free_dfs(adev, params->vm, - &cursor); + &cursor, + params->unlocked); } amdgpu_vm_pt_next(adev, &cursor); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index c7bf189d50de..69e105fa41f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -112,7 +112,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, swap(p->vm->last_unlocked, tmp); dma_fence_put(tmp); } else { - amdgpu_bo_fence(p->vm->root.bo, f, true); + dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f, + DMA_RESV_USAGE_BOOKKEEP); } if (fence && !p->immediate) { @@ -218,12 +219,15 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, int r; /* Wait for PD/PT moves to be completed */ - dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, - DMA_RESV_USAGE_KERNEL, fence) { + dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL); + dma_resv_for_each_fence_unlocked(&cursor, fence) { r = amdgpu_sync_fence(&p->job->sync, fence); - if (r) + if (r) { + dma_resv_iter_end(&cursor); return r; + } } + dma_resv_iter_end(&cursor); do { ndw = p->num_dw_left; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 28ec5f8ac1c1..73a517bcf5c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -721,6 +721,72 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr) } /** + * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection + * + * @man: TTM memory type manager + * @res: The resource to test + * @place: The place to test against + * @size: Size of the new allocation + * + * Test each drm buddy block for intersection for eviction decision. + */ +static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res); + struct drm_buddy_block *block; + + /* Check each drm buddy block individually */ + list_for_each_entry(block, &mgr->blocks, link) { + unsigned long fpfn = + amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; + unsigned long lpfn = fpfn + + (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT); + + if (place->fpfn < lpfn && + (!place->lpfn || place->lpfn > fpfn)) + return true; + } + + return false; +} + +/** + * amdgpu_vram_mgr_compatible - test each drm buddy block for compatibility + * + * @man: TTM memory type manager + * @res: The resource to test + * @place: The place to test against + * @size: Size of the new allocation + * + * Test each drm buddy block for placement compatibility. + */ +static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res); + struct drm_buddy_block *block; + + /* Check each drm buddy block individually */ + list_for_each_entry(block, &mgr->blocks, link) { + unsigned long fpfn = + amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; + unsigned long lpfn = fpfn + + (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT); + + if (fpfn < place->fpfn || + (place->lpfn && lpfn > place->lpfn)) + return false; + } + + return true; +} + +/** * amdgpu_vram_mgr_debug - dump VRAM table * * @man: TTM memory type manager @@ -753,6 +819,8 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { .alloc = amdgpu_vram_mgr_new, .free = amdgpu_vram_mgr_del, + .intersects = amdgpu_vram_mgr_intersects, + .compatible = amdgpu_vram_mgr_compatible, .debug = amdgpu_vram_mgr_debug }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1b108d03e785..47159e9a0884 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -392,12 +392,20 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) } /** + * Only init hive->reset_domain for none SRIOV configuration. For SRIOV, + * Host driver decide how to reset the GPU either through FLR or chain reset. + * Guest side will get individual notifications from the host for the FLR + * if necessary. + */ + if (!amdgpu_sriov_vf(adev)) { + /** * Avoid recreating reset domain when hive is reconstructed for the case - * of reset the devices in the XGMI hive during probe for SRIOV + * of reset the devices in the XGMI hive during probe for passthrough GPU * See https://www.spinics.net/lists/amd-gfx/msg58836.html */ - if (adev->reset_domain->type != XGMI_HIVE) { - hive->reset_domain = amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive"); + if (adev->reset_domain->type != XGMI_HIVE) { + hive->reset_domain = + amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive"); if (!hive->reset_domain) { dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); ret = -ENOMEM; @@ -406,9 +414,10 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) hive = NULL; goto pro_end; } - } else { - amdgpu_reset_get_reset_domain(adev->reset_domain); - hive->reset_domain = adev->reset_domain; + } else { + amdgpu_reset_get_reset_domain(adev->reset_domain); + hive->reset_domain = adev->reset_domain; + } } hive->hive_id = adev->gmc.xgmi.hive_id; @@ -504,6 +513,9 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev { int ret; + if (amdgpu_sriov_vf(adev)) + return 0; + /* Each psp need to set the latest topology */ ret = psp_xgmi_set_topology_info(&adev->psp, atomic_read(&hive->number_devices), @@ -742,7 +754,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) amdgpu_put_xgmi_hive(hive); } - return psp_xgmi_terminate(&adev->psp); + return 0; } static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 552e6fb55aa8..30dcc1681b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -43,6 +43,7 @@ struct amdgpu_hive_info { } pstate; struct amdgpu_reset_domain *reset_domain; + uint32_t device_remove_count; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c index 33a8a7365aef..f0e235f98afb 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c @@ -28,13 +28,44 @@ #include "navi10_enum.h" #include "soc15_common.h" +#define regATHUB_MISC_CNTL_V3_0_1 0x00d7 +#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 + + +static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) +{ + uint32_t data; + + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); + break; + default: + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + break; + } + return data; +} + +static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) +{ + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); + break; + default: + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + break; + } +} + static void athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { uint32_t def, data; - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + def = data = athub_v3_0_get_cg_cntl(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG)) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; if (def != data) - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + athub_v3_0_set_cg_cntl(adev, data); } static void @@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + def = data = athub_v3_0_get_cg_cntl(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS)) data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; @@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; if (def != data) - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + athub_v3_0_set_cg_cntl(adev, data); } int athub_v3_0_set_clockgating(struct amdgpu_device *adev, @@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev, switch (adev->ip_versions[ATHUB_HWIP][0]) { case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 1): case IP_VERSION(3, 0, 2): athub_v3_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) int data; /* AMD_CG_SUPPORT_ATHUB_MGCG */ - data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + data = athub_v3_0_get_cg_cntl(adev); if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index fa7421afb9a6..6be9ac2b9c5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -26,6 +26,8 @@ #include <linux/pci.h> +#include <acpi/video.h> + #include <drm/drm_crtc_helper.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" @@ -182,7 +184,12 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode return; if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU)) - return; + goto register_acpi_backlight; + + if (!acpi_video_backlight_use_native()) { + drm_info(dev, "Skipping amdgpu atom DIG backlight registration\n"); + goto register_acpi_backlight; + } pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), GFP_KERNEL); if (!pdata) { @@ -218,6 +225,11 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode error: kfree(pdata); return; + +register_acpi_backlight: + /* Try registering an ACPI video backlight device instead. */ + acpi_video_register_backlight(); + return; } void diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 5647f13b98d4..cbca9866645c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -309,14 +309,10 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq */ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 9c964cd3b5d4..288fce7dc0ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2796,8 +2796,7 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index e0ad9f27dc3f..cbe5250b31cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2914,8 +2914,7 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 77f5e998a120..b1c44fab074f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2673,8 +2673,7 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_width = 16384; adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 802e5c753271..a22b45c92792 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2693,8 +2693,11 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + if (adev->asic_type == CHIP_HAWAII) + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; + else + adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index fafbad3cf08d..af94ac580d3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3943,56 +3943,6 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) DRM_WARN_ONCE("CP firmware version too old, please update!"); } - -static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_1 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); - adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); - adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); - adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); - adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); - adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); - adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); - adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); - adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); - adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); - adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); - adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); - adev->gfx.rlc.reg_list_format_direct_reg_list_length = - le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); -} - -static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_2 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes); - adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes); - adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes); - adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); -} - -static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_4 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); - adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); - adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); - adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); - adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); - adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); - adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); - adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); - adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); - adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); -} - static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) { bool ret = false; @@ -4028,12 +3978,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) char fw_name[40]; char *wks = ""; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; uint16_t version_major; uint16_t version_minor; @@ -4091,9 +4036,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.pfp_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); @@ -4102,9 +4045,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.me_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); @@ -4113,69 +4054,27 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.ce_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; + /* don't check this. There are apparently firmwares in the wild with + * incorrect size in the header + */ err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + if (err) + dev_dbg(adev->dev, + "gfx10: amdgpu_ucode_validate() failed \"%s\"\n", + fw_name); rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = - le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = - le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = - le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = - le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = - le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); + if (err) goto out; - } - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - - if (version_major == 2) { - if (version_minor >= 1) - gfx_v10_0_init_rlc_ext_microcode(adev); - if (version_minor >= 2) - gfx_v10_0_init_rlc_iram_dram_microcode(adev); - if (version_minor == 4) { - gfx_v10_0_init_tap_delays_microcode(adev); - } - } } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); @@ -4185,9 +4084,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.mec_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); @@ -4195,154 +4093,18 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.mec2_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.mec2_fw->data; - adev->gfx.mec2_fw_version = - le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = - le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); } else { err = 0; adev->gfx.mec2_fw = NULL; } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_G; - info->fw = adev->gfx.rlc_fw; - if (info->fw) { - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } - if (adev->gfx.rlc.save_restore_list_cntl_size_bytes && - adev->gfx.rlc.save_restore_list_gpm_size_bytes && - adev->gfx.rlc.save_restore_list_srm_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); - - if (adev->gfx.rlc.rlc_iram_ucode_size_bytes && - adev->gfx.rlc.rlc_dram_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); - } - - } - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; - info->fw = adev->gfx.mec_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - if (adev->gfx.mec2_fw) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; - info->fw = adev->gfx.mec2_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - - le32_to_cpu(cp_hdr->jt_size) * 4, - PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; - info->fw = adev->gfx.mec2_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, - PAGE_SIZE); - } - } - gfx_v10_0_check_fw_write_wait(adev); out: if (err) { dev_err(adev->dev, - "gfx10: Failed to load firmware \"%s\"\n", + "gfx10: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; @@ -4846,7 +4608,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 2; + adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; @@ -5971,6 +5733,9 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); } + if (adev->job_hang && !enable) + return 0; + for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) break; @@ -7569,8 +7334,10 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], PREEMPT_QUEUES, 0, 0); - - return amdgpu_ring_test_helper(kiq_ring); + if (!adev->job_hang) + return amdgpu_ring_test_helper(kiq_ring); + else + return 0; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 6fd71cb10e54..251109723ab6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -53,6 +53,7 @@ #define GFX11_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 #define regCGTT_WD_CLK_CTRL 0x5086 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 @@ -72,21 +73,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); - -static const struct soc15_reg_golden golden_settings_gc_11_0[] = -{ - /* Pending on emulation bring up */ -}; - -static const struct soc15_reg_golden golden_settings_gc_11_0_0[] = -{ - /* Pending on emulation bring up */ -}; - -static const struct soc15_reg_golden golden_settings_gc_rlc_spm_11_0[] = -{ - /* Pending on emulation bring up */ -}; +MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { @@ -130,6 +120,8 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, bool all_hub, uint8_t dst_sel); static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, + bool enable); static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -266,42 +258,17 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs; } -static void gfx_v11_0_init_spm_golden_registers(struct amdgpu_device *adev) -{ - switch (adev->ip_versions[GC_HWIP][0]) { - case IP_VERSION(11, 0, 0): - soc15_program_register_sequence(adev, - golden_settings_gc_rlc_spm_11_0, - (const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_11_0)); - break; - default: - break; - } -} - static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { - case IP_VERSION(11, 0, 0): - soc15_program_register_sequence(adev, - golden_settings_gc_11_0, - (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); - soc15_program_register_sequence(adev, - golden_settings_gc_11_0_0, - (const u32)ARRAY_SIZE(golden_settings_gc_11_0_0)); - break; case IP_VERSION(11, 0, 1): soc15_program_register_sequence(adev, - golden_settings_gc_11_0, - (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); - soc15_program_register_sequence(adev, golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); break; default: break; } - gfx_v11_0_init_spm_golden_registers(adev); } static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, @@ -471,61 +438,12 @@ static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } -static void gfx_v11_0_init_rlc_ext_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_1 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); - adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); - adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); - adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); - adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); - adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); - adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); - adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); - adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); - adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); - adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); - adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); - adev->gfx.rlc.reg_list_format_direct_reg_list_length = - le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); -} - -static void gfx_v11_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_2 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes); - adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes); - adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes); - adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); -} - -static void gfx_v11_0_init_rlcp_rlcv_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_3 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes); - adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes); - adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes); - adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes); -} - static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { char fw_name[40]; char ucode_prefix[30]; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; - const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; uint16_t version_major; uint16_t version_minor; @@ -546,14 +464,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_fw->data, 2, 0); if (adev->gfx.rs64_enable) { dev_info(adev->dev, "CP RS64 enable\n"); - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data; - adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version); - adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); - + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); } else { - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix); @@ -564,14 +479,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; if (adev->gfx.rs64_enable) { - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data; - adev->gfx.me_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); - + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); } else { - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); } if (!amdgpu_sriov_vf(adev)) { @@ -580,58 +492,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + if (err) + goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = - le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = - le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = - le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = - le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = - le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); + if (err) goto out; - } - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - - if (version_major == 2) { - if (version_minor >= 1) - gfx_v11_0_init_rlc_ext_microcode(adev); - if (version_minor >= 2) - gfx_v11_0_init_rlc_iram_dram_microcode(adev); - if (version_minor == 3) - gfx_v11_0_init_rlcp_rlcv_microcode(adev); - } } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); @@ -642,190 +510,23 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; if (adev->gfx.rs64_enable) { - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); - + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); } else { - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); } /* only one MEC for gfx 11.0.0. */ adev->gfx.mec2_fw = NULL; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - if (adev->gfx.rs64_enable) { - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data; - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data; - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - } else { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; - info->fw = adev->gfx.mec_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - } - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_G; - info->fw = adev->gfx.rlc_fw; - if (info->fw) { - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } - if (adev->gfx.rlc.save_restore_list_gpm_size_bytes && - adev->gfx.rlc.save_restore_list_srm_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlc_iram_ucode_size_bytes && - adev->gfx.rlc.rlc_dram_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlcp_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_P; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlcv_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_V; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE); - } - } - out: if (err) { dev_err(adev->dev, - "gfx11: Failed to load firmware \"%s\"\n", + "gfx11: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; @@ -1137,7 +838,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, - .init_spm_golden = &gfx_v11_0_init_spm_golden_registers, + .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, }; static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) @@ -1147,6 +848,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1582,6 +1284,7 @@ static int gfx_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -2756,6 +2459,21 @@ static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) mec_hdr->ucode_start_addr_hi >> 2); } soc21_grbm_select(adev, 0, 0, 0, 0); + + /* reset mec pipe */ + tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); + + /* clear mec pipe reset */ + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); + tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); } static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) @@ -5181,9 +4899,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } } else { /* Program RLC_CGCG_CGLS_CTRL */ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); @@ -5212,9 +4933,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } } } @@ -5250,6 +4974,8 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) { u32 reg, data; + amdgpu_gfx_off_ctrl(adev, false); + reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); @@ -5263,6 +4989,8 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); else WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + + amdgpu_gfx_off_ctrl(adev, true); } static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { @@ -5279,6 +5007,38 @@ static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { .update_spm_vmid = gfx_v11_0_update_spm_vmid, }; +static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ + u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); + + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + + WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); + + // Program RLC_PG_DELAY3 for CGPG hysteresis + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 1): + WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); + break; + default: + break; + } + } +} + +static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev); + + gfx_v11_cntl_power_gating(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev); +} + static int gfx_v11_0_set_powergating_state(void *handle, enum amd_powergating_state state) { @@ -5293,6 +5053,10 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 0, 2): amdgpu_gfx_off_ctrl(adev, enable); break; + case IP_VERSION(11, 0, 1): + gfx_v11_cntl_pg(adev, enable); + amdgpu_gfx_off_ctrl(adev, enable); + break; default: break; } @@ -5310,6 +5074,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c6e0f9313a7f..0320be4a5fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -126,6 +126,8 @@ MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 @@ -1089,27 +1091,6 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } -static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_1 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); - adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); - adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); - adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); - adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); - adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); - adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); - adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); - adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); - adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); - adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); - adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); - adev->gfx.rlc.reg_list_format_direct_reg_list_length = - le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); -} - static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) { adev->gfx.me_fw_write_wait = false; @@ -1271,9 +1252,6 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, { char fw_name[30]; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); @@ -1282,9 +1260,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_validate(adev->gfx.pfp_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); @@ -1293,9 +1269,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_validate(adev->gfx.me_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); @@ -1304,37 +1278,12 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_validate(adev->gfx.ce_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); out: if (err) { dev_err(adev->dev, - "gfx9: Failed to load firmware \"%s\"\n", + "gfx9: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; @@ -1351,11 +1300,7 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, { char fw_name[30]; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; uint16_t version_major; uint16_t version_minor; uint32_t smu_version; @@ -1384,92 +1329,17 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + if (err) + goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - if (version_major == 2 && version_minor == 1) - adev->gfx.rlc.is_rlc_v2_1 = true; - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = - le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = - le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = - le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = - le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = - le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; - goto out; - } - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - - if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v9_0_init_rlc_ext_microcode(adev); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_G; - info->fw = adev->gfx.rlc_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - if (adev->gfx.rlc.is_rlc_v2_1 && - adev->gfx.rlc.save_restore_list_cntl_size_bytes && - adev->gfx.rlc.save_restore_list_gpm_size_bytes && - adev->gfx.rlc.save_restore_list_srm_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); - } - } - + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); out: if (err) { dev_err(adev->dev, - "gfx9: Failed to load firmware \"%s\"\n", + "gfx9: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.rlc_fw); adev->gfx.rlc_fw = NULL; @@ -1492,35 +1362,34 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, { char fw_name[30]; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.mec_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); if (!err) { err = amdgpu_ucode_validate(adev->gfx.mec2_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.mec2_fw->data; - adev->gfx.mec2_fw_version = - le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = - le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); } else { err = 0; adev->gfx.mec2_fw = NULL; @@ -1530,49 +1399,12 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; - info->fw = adev->gfx.mec_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - if (adev->gfx.mec2_fw) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; - info->fw = adev->gfx.mec2_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - /* TODO: Determine if MEC2 JT FW loading can be removed - for all GFX V9 asic and above */ - if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; - info->fw = adev->gfx.mec2_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, - PAGE_SIZE); - } - } - } - out: gfx_v9_0_check_if_need_gfxoff(adev); gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, - "gfx9: Failed to load firmware \"%s\"\n", + "gfx9: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.mec_fw); adev->gfx.mec_fw = NULL; @@ -2587,7 +2419,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) gfx_v9_0_tiling_mode_table_init(adev); - gfx_v9_0_setup_rb(adev); + if (adev->gfx.num_gfx_rings) + gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); @@ -5596,7 +5429,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne BUG_ON(offset > ring->buf_mask); BUG_ON(ring->ring[offset] != 0x55aa55aa); - cur = (ring->wptr & ring->buf_mask) - 1; + cur = (ring->wptr - 1) & ring->buf_mask; if (likely(cur > offset)) ring->ring[offset] = cur - offset; else diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index d8c531581116..8cf53e039c11 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -576,6 +576,111 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) } } +static void gfxhub_v2_1_save_regs(struct amdgpu_device *adev) +{ + int i; + adev->gmc.VM_L2_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL); + adev->gmc.VM_L2_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2); + adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL); + adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32); + adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32); + adev->gmc.VM_L2_PROTECTION_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2); + adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3); + adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4); + adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32); + adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32); + adev->gmc.VM_DEBUG = RREG32_SOC15(GC, 0, mmGCVM_DEBUG); + adev->gmc.VM_L2_MM_GROUP_RT_CLASSES = RREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES); + adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID); + adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2 = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2); + adev->gmc.VM_L2_CACHE_PARITY_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL); + adev->gmc.VM_L2_IH_LOG_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL); + + for (i = 0; i <= 15; i++) { + adev->gmc.VM_CONTEXT_CNTL[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i); + adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2); + } + + adev->gmc.MC_VM_MX_L1_TLB_CNTL = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); +} + +static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev) +{ + int i; + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, adev->gmc.VM_L2_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, adev->gmc.VM_L2_CNTL2); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL, adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32); + WREG32_SOC15(GC, 0, mmGCVM_DEBUG, adev->gmc.VM_DEBUG); + WREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES, adev->gmc.VM_L2_MM_GROUP_RT_CLASSES); + WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID); + WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2); + WREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL, adev->gmc.VM_L2_CACHE_PARITY_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL, adev->gmc.VM_L2_IH_LOG_CNTL); + + for (i = 0; i <= 15; i++) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, adev->gmc.VM_CONTEXT_CNTL[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i]); + } + + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL); +} + +static void gfxhub_v2_1_halt(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + int i; + uint32_t tmp; + int time = 1000; + + gfxhub_v2_1_set_fault_enable_default(adev, false); + + for (i = 0; i <= 14; i++) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, ~0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, ~0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + 0); + } + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); + while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK | + GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 && + time) { + udelay(100); + time--; + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); + } + + if (!time) { + DRM_WARN("failed to wait for GRBM(EA) idle\n"); + } +} + const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .get_fb_location = gfxhub_v2_1_get_fb_location, .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset, @@ -586,4 +691,7 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .init = gfxhub_v2_1_init, .get_xgmi_info = gfxhub_v2_1_get_xgmi_info, .utcl2_harvest = gfxhub_v2_1_utcl2_harvest, + .mode2_save_regs = gfxhub_v2_1_save_regs, + .mode2_restore_regs = gfxhub_v2_1_restore_regs, + .halt = gfxhub_v2_1_halt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c new file mode 100644 index 000000000000..5d3fffd4929f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c @@ -0,0 +1,511 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "gfxhub_v3_0_3.h" + +#include "gc/gc_11_0_3_offset.h" +#include "gc/gc_11_0_3_sh_mask.h" +#include "navi10_enum.h" +#include "soc15_common.h" + +#define regGCVM_L2_CNTL3_DEFAULT 0x80100007 +#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0 + +static const char *gfxhub_client_ids[] = { + "CB/DB", + "Reserved", + "GE1", + "GE2", + "CPF", + "CPC", + "CPG", + "RLC", + "TCP", + "SQC (inst)", + "SQC (data)", + "SQG", + "Reserved", + "SDMA0", + "SDMA1", + "GCR", + "SDMA2", + "SDMA3", +}; + +static uint32_t gfxhub_v3_0_3_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static void +gfxhub_v3_0_3_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + u32 cid = REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, CID); + + dev_err(adev->dev, + "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], + cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, RW)); +} + +static u64 gfxhub_v3_0_3_get_fb_location(struct amdgpu_device *adev) +{ + u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE); + + base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +static u64 gfxhub_v3_0_3_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24; +} + +static void gfxhub_v3_0_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); +} + +static void gfxhub_v3_0_3_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v3_0_3_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + + /* Disable AGP. */ + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0); + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, 0x00FFFFFF); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->gmc.vram_start >> 18); + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->gmc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +} + + +static void gfxhub_v3_0_3_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void gfxhub_v3_0_3_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp); + + tmp = regGCVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp); + + tmp = regGCVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp); + + tmp = regGCVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp); +} + +static void gfxhub_v3_0_3_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp); +} + +static void gfxhub_v3_0_3_disable_identity_aperture(struct amdgpu_device *adev) +{ + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + +} + +static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + hub->vm_cntx_cntl = tmp; +} + +static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + gfxhub_v3_0_3_init_gart_aperture_regs(adev); + gfxhub_v3_0_3_init_system_aperture_regs(adev); + gfxhub_v3_0_3_init_tlb_regs(adev); + gfxhub_v3_0_3_init_cache_regs(adev); + + gfxhub_v3_0_3_enable_system_domain(adev); + gfxhub_v3_0_3_disable_identity_aperture(adev); + gfxhub_v3_0_3_setup_vmid_config(adev); + gfxhub_v3_0_3_program_invalidation(adev); + + return 0; +} + +static void gfxhub_v3_0_3_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0); +} + +/** + * gfxhub_v3_0_3_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void gfxhub_v3_0_3_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs gfxhub_v3_0_3_vmhub_funcs = { + .print_l2_protection_fault_status = gfxhub_v3_0_3_print_l2_protection_fault_status, + .get_invalidate_req = gfxhub_v3_0_3_get_invalidate_req, +}; + +static void gfxhub_v3_0_3_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, 0, + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, 0, + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ - + regGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vmhub_funcs = &gfxhub_v3_0_3_vmhub_funcs; +} + +const struct amdgpu_gfxhub_funcs gfxhub_v3_0_3_funcs = { + .get_fb_location = gfxhub_v3_0_3_get_fb_location, + .get_mc_fb_offset = gfxhub_v3_0_3_get_mc_fb_offset, + .setup_vm_pt_regs = gfxhub_v3_0_3_setup_vm_pt_regs, + .gart_enable = gfxhub_v3_0_3_gart_enable, + .gart_disable = gfxhub_v3_0_3_gart_disable, + .set_fault_enable_default = gfxhub_v3_0_3_set_fault_enable_default, + .init = gfxhub_v3_0_3_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h new file mode 100644 index 000000000000..6153bd5e3083 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h @@ -0,0 +1,29 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V3_0_3_H__ +#define __GFXHUB_V3_0_3_H__ + +extern const struct amdgpu_gfxhub_funcs gfxhub_v3_0_3_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 9ae8cdaa033e..f513e2c2e964 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); return -ETIME; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 1471bfb9ae38..846ccb6cf07d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -39,6 +39,7 @@ #include "soc15_common.h" #include "nbio_v4_3.h" #include "gfxhub_v3_0.h" +#include "gfxhub_v3_0_3.h" #include "mmhub_v3_0.h" #include "mmhub_v3_0_1.h" #include "mmhub_v3_0_2.h" @@ -233,7 +234,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, /* Issue additional private vm invalidation to MMHUB */ if ((vmhub != AMDGPU_GFXHUB_0) && - (hub->vm_l2_bank_select_reserved_cid2)) { + (hub->vm_l2_bank_select_reserved_cid2) && + !amdgpu_sriov_vf(adev)) { inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */ inv_req |= (1 << 25); @@ -590,7 +592,14 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - adev->gfxhub.funcs = &gfxhub_v3_0_funcs; + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 3): + adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs; + break; + default: + adev->gfxhub.funcs = &gfxhub_v3_0_funcs; + break; + } } static int gmc_v11_0_early_init(void *handle) @@ -640,7 +649,10 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_gart_location(adev, mc); /* base offset of vram pages */ - adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev); + if (amdgpu_sriov_vf(adev)) + adev->vm_manager.vram_base_offset = 0; + else + adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev); } /** @@ -732,6 +744,7 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): adev->num_vmhubs = 2; /* * To fulfill 4-level page support, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 22761a3bb818..67ca16a8027c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); up_read(&adev->reset_domain->sem); @@ -1102,10 +1103,13 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, *flags |= AMDGPU_PDE_BFS(0x9); } else if (level == AMDGPU_VM_PDB0) { - if (*flags & AMDGPU_PDE_PTE) + if (*flags & AMDGPU_PDE_PTE) { *flags &= ~AMDGPU_PDE_PTE; - else + if (!(*flags & AMDGPU_PTE_VALID)) + *addr |= 1 << PAGE_SHIFT; + } else { *flags |= AMDGPU_PTE_TF; + } } } @@ -1624,12 +1628,15 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); else amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 1): adev->num_vmhubs = 3; /* Keep the vm size same with Vega20 */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index 39a696cd45b5..29c3484ae1f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, 0); } +static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, forced on MEM clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + + /* disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } + + /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } + } + + /* disable MEM clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + + if (enable) { + hdp_clk_cntl &= + ~(uint32_t) + (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); + } else { + hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; + } + + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t tmp; + + /* AMD_CG_SUPPORT_HDP_MGCG */ + tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; +} + +static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + hdp_v5_2_update_mem_power_gating(adev, enable); + hdp_v5_2_update_medium_grain_clock_gating(adev, enable); +} + const struct amdgpu_hdp_funcs hdp_v5_2_funcs = { .flush_hdp = hdp_v5_2_flush_hdp, + .update_clock_gating = hdp_v5_2_update_clock_gating, + .get_clock_gating_state = hdp_v5_2_get_clockgating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 92dc60a9d209..7cd79a3844b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -105,7 +105,13 @@ force_update_wptr_for_self_int(struct amdgpu_device *adev, ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_USED_INT_THRESHOLD, threshold); - WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) + return; + } else { + WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl); + } + WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl); } @@ -132,7 +138,13 @@ static int ih_v6_0_toggle_ring_interrupts(struct amdgpu_device *adev, /* enable_intr field is only valid in ring0 */ if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (enable) { ih->enabled = true; @@ -242,7 +254,15 @@ static int ih_v6_0_enable_ring(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); } - WREG32(ih_regs->ih_rb_cntl, tmp); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } if (ih == &adev->irq.ih) { /* set the ih ring 0 writeback address whether it's enabled or not */ @@ -727,6 +747,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = { static const struct amdgpu_ih_funcs ih_v6_0_funcs = { .get_wptr = ih_v6_0_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = ih_v6_0_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 76383baa3929..95548c512f4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -26,12 +26,15 @@ #include "amdgpu_imu.h" #include "amdgpu_dpm.h" +#include "imu_v11_0_3.h" + #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { @@ -360,6 +363,9 @@ static void imu_v11_0_program_rlc_ram(struct amdgpu_device *adev) program_imu_rlc_ram(adev, imu_rlc_ram_golden_11_0_2, (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_2)); break; + case IP_VERSION(11, 0, 3): + imu_v11_0_3_program_rlc_ram(adev); + break; default: BUG(); break; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c new file mode 100644 index 000000000000..fc69c1a29e23 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c @@ -0,0 +1,145 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_imu.h" +#include "imu_v11_0_3.h" + +#include "gc/gc_11_0_3_offset.h" +#include "gc/gc_11_0_3_sh_mask.h" + +static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_3[] = { + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS, 0x003f3fff, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS, 0x003f3fbf, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10200800, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000088, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000007ff, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_DRAM_PAGE_BURST, 0x20080200, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0xffffff01, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0x40000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0x42000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x44000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x46000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x48000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x4A000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCGTS_TCC_DISABLE, 0x00000001, 0x00000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_RATE_CONFIG, 0x00000001, 0x00000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_EDC_CONFIG, 0x00000001, 0x00000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000005ff, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000065ff, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000444, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x54105410, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x76323276, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000244, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x00000006, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0xe0000000), + IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0xe0000000), +}; + +static void program_rlc_ram_register_setting(struct amdgpu_device *adev, + const struct imu_rlc_ram_golden *regs, + const u32 array_size) +{ + const struct imu_rlc_ram_golden *entry; + u32 reg, data; + int i; + + for (i = 0; i < array_size; ++i) { + entry = ®s[i]; + reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + reg |= entry->addr_mask; + + data = entry->data; + if (entry->reg == regGCMC_VM_AGP_BASE) + data = 0x00ffffff; + else if (entry->reg == regGCMC_VM_AGP_TOP) + data = 0x0; + else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE) + data = adev->gmc.vram_start >> 24; + else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP) + data = adev->gmc.vram_end >> 24; + + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data); + } + //Indicate the latest entry + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0); + WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0); +} + +void imu_v11_0_3_program_rlc_ram(struct amdgpu_device *adev) +{ + program_rlc_ram_register_setting(adev, + imu_rlc_ram_golden_11_0_3, + (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_3)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h new file mode 100644 index 000000000000..702be568f26b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h @@ -0,0 +1,29 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __IMU_V11_0_3_H__ +#define __IMU_V11_0_3_H__ + +void imu_v11_0_3_program_rlc_ram(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 120ea294abef..5cec6b259b7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -38,6 +38,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); @@ -183,6 +185,20 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.tma_addr = input->tma_addr; mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; + mes_add_queue_pkt.trap_en = 1; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + + if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) && + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) && + (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3)))) + mes_add_queue_pkt.trap_en = 1; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 3f44a099c52a..3e51e773f92b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -176,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); + tmp = mmVM_L2_CNTL3_DEFAULT; if (adev->gmc.translate_further) { tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index bc11b2de37ae..a1d26c4d80b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -169,17 +169,17 @@ static void mmhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; uint32_t tmp; - /* Disable AGP. */ - WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, 0x00FFFFFF); - if (!amdgpu_sriov_vf(adev)) { /* * the new L1 policy will block SRIOV guest from writing * these regs, and they will be programed at host. * so skip programing these regs. */ + /* Disable AGP. */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, 0x00FFFFFF); + /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->gmc.vram_start >> 18); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index cac72ced94c8..e8058edc1d10 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -518,18 +518,41 @@ static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev) static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { - //TODO + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); } static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable) { - //TODO + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); } static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state) { + if (amdgpu_sriov_vf(adev)) + return 0; + mmhub_v3_0_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); mmhub_v3_0_1_update_medium_grain_light_sleep(adev, @@ -539,7 +562,20 @@ static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) { - //TODO + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; } const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 6e0145b2b408..445cb06b9d26 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -295,9 +295,17 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + unsigned int num_level, block_size; uint32_t tmp; int i; + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->gmc.translate_further) + num_level -= 1; + else + block_size -= 9; + for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); @@ -305,7 +313,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - adev->vm_manager.num_level); + num_level); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, @@ -323,7 +331,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, - adev->vm_manager.block_size - 9); + block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h new file mode 100644 index 000000000000..f772bb499f3e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -0,0 +1,140 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V4_0_H__ +#define __MMSCH_V4_0_H__ + +#include "amdgpu_vcn.h" + +#define MMSCH_VERSION_MAJOR 4 +#define MMSCH_VERSION_MINOR 0 +#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) + +#define RB_ENABLED (1 << 0) +#define RB4_ENABLED (1 << 1) +#define MMSCH_DOORBELL_OFFSET 0x8 + +#define MMSCH_VF_ENGINE_STATUS__PASS 0x1 + +#define MMSCH_VF_MAILBOX_RESP__OK 0x1 +#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 + +enum mmsch_v4_0_command_type { + MMSCH_COMMAND__DIRECT_REG_WRITE = 0, + MMSCH_COMMAND__DIRECT_REG_POLLING = 2, + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3, + MMSCH_COMMAND__INDIRECT_REG_WRITE = 8, + MMSCH_COMMAND__END = 0xf +}; + +struct mmsch_v4_0_table_info { + uint32_t init_status; + uint32_t table_offset; + uint32_t table_size; +}; + +struct mmsch_v4_0_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v4_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES]; + struct mmsch_v4_0_table_info jpegdec; +}; + +struct mmsch_v4_0_cmd_direct_reg_header { + uint32_t reg_offset : 28; + uint32_t command_type : 4; +}; + +struct mmsch_v4_0_cmd_indirect_reg_header { + uint32_t reg_offset : 20; + uint32_t reg_idx_space : 8; + uint32_t command_type : 4; +}; + +struct mmsch_v4_0_cmd_direct_write { + struct mmsch_v4_0_cmd_direct_reg_header cmd_header; + uint32_t reg_value; +}; + +struct mmsch_v4_0_cmd_direct_read_modify_write { + struct mmsch_v4_0_cmd_direct_reg_header cmd_header; + uint32_t write_data; + uint32_t mask_value; +}; + +struct mmsch_v4_0_cmd_direct_polling { + struct mmsch_v4_0_cmd_direct_reg_header cmd_header; + uint32_t mask_value; + uint32_t wait_value; +}; + +struct mmsch_v4_0_cmd_end { + struct mmsch_v4_0_cmd_direct_reg_header cmd_header; +}; + +struct mmsch_v4_0_cmd_indirect_write { + struct mmsch_v4_0_cmd_indirect_reg_header cmd_header; + uint32_t reg_value; +}; + +#define MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ + size = sizeof(struct mmsch_v4_0_cmd_direct_read_modify_write); \ + size_dw = size / 4; \ + direct_rd_mod_wt.cmd_header.reg_offset = reg; \ + direct_rd_mod_wt.mask_value = mask; \ + direct_rd_mod_wt.write_data = data; \ + memcpy((void *)table_loc, &direct_rd_mod_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V4_0_INSERT_DIRECT_WT(reg, value) { \ + size = sizeof(struct mmsch_v4_0_cmd_direct_write); \ + size_dw = size / 4; \ + direct_wt.cmd_header.reg_offset = reg; \ + direct_wt.reg_value = value; \ + memcpy((void *)table_loc, &direct_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V4_0_INSERT_DIRECT_POLL(reg, mask, wait) { \ + size = sizeof(struct mmsch_v4_0_cmd_direct_polling); \ + size_dw = size / 4; \ + direct_poll.cmd_header.reg_offset = reg; \ + direct_poll.mask_value = mask; \ + direct_poll.wait_value = wait; \ + memcpy((void *)table_loc, &direct_poll, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V4_0_INSERT_END() { \ + size = sizeof(struct mmsch_v4_0_cmd_end); \ + size_dw = size / 4; \ + memcpy((void *)table_loc, &end, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 12906ba74462..a2f04b249132 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -290,6 +290,7 @@ flr_done: reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index e07757eea7ad..a977f0027928 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -317,6 +317,7 @@ flr_done: reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 288c414babdf..fd14fa9b9cd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -529,6 +529,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 4b5396d3e60f..eec13cb5bf75 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index b465baa26762..aa761ff3a5fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -380,6 +380,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, WREG32_PCIE(smnPCIE_LC_CNTL, data); } +#ifdef CONFIG_PCIEASPM static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -401,9 +402,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); @@ -459,7 +462,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v2_3_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v2_3_program_ltr(adev); def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -483,6 +489,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index 982a89f841d5..15eb3658d70e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -488,3 +488,47 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = { .get_rom_offset = nbio_v4_3_get_rom_offset, .program_aspm = nbio_v4_3_program_aspm, }; + + +static void nbio_v4_3_sriov_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ +} + +static void nbio_v4_3_sriov_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, + int doorbell_size) +{ +} + +static void nbio_v4_3_sriov_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ +} + +static void nbio_v4_3_sriov_gc_doorbell_init(struct amdgpu_device *adev) +{ +} + +const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs = { + .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v4_3_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v4_3_get_pcie_data_offset, + .get_rev_id = nbio_v4_3_get_rev_id, + .mc_access_enable = nbio_v4_3_mc_access_enable, + .get_memsize = nbio_v4_3_get_memsize, + .sdma_doorbell_range = nbio_v4_3_sriov_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v4_3_sriov_vcn_doorbell_range, + .gc_doorbell_init = nbio_v4_3_sriov_gc_doorbell_init, + .enable_doorbell_aperture = nbio_v4_3_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v4_3_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v4_3_sriov_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v4_3_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v4_3_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v4_3_get_clockgating_state, + .ih_control = nbio_v4_3_ih_control, + .init_registers = nbio_v4_3_init_registers, + .remap_hdp_registers = nbio_v4_3_remap_hdp_registers, + .get_rom_offset = nbio_v4_3_get_rom_offset, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h index ade43661d7a9..711999ceedf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h @@ -28,5 +28,6 @@ extern const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v4_3_funcs; +extern const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index f7f6ddebd3e4..37615a77287b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -282,6 +282,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } +#ifdef CONFIG_PCIEASPM static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -303,9 +304,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); @@ -361,7 +364,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v6_1_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v6_1_program_ltr(adev); def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -385,6 +391,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 11848d1e238b..19455a725939 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -673,6 +673,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = { }; +#ifdef CONFIG_PCIEASPM static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -694,9 +695,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4)) @@ -755,7 +758,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v7_4_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v7_4_program_ltr(adev); def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -779,6 +785,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index 01e8288d09a8..def89379b51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -28,6 +28,14 @@ #include "nbio/nbio_7_7_0_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev) { u32 tmp; @@ -68,12 +76,6 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan doorbell_range = REG_SET_FIELD(doorbell_range, GDC0_BIF_CSDMA_DOORBELL_RANGE, SIZE, doorbell_size); - doorbell_range = REG_SET_FIELD(doorbell_range, - GDC0_BIF_SDMA0_DOORBELL_RANGE, - OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, - GDC0_BIF_SDMA0_DOORBELL_RANGE, - SIZE, doorbell_size); } else { doorbell_range = REG_SET_FIELD(doorbell_range, GDC0_BIF_SDMA0_DOORBELL_RANGE, @@ -247,6 +249,81 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev) } +static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); + if (enable) { + data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data); +} + +static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); + if (enable) + data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1); + if (enable) { + data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data); +} + +static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); + if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); + if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset, @@ -262,6 +339,10 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_7_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_7_get_clockgating_state, .ih_control = nbio_v7_7_ih_control, .init_registers = nbio_v7_7_init_registers, + .remap_hdp_registers = nbio_v7_7_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index a2588200ea58..0b2ac418e4ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) adev->psp.dtm_context.context.bin_desc.start_addr = (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + le32_to_cpu(ta_hdr->dtm.offset_bytes); + + if (adev->apu_flags & AMD_APU_IS_RENOIR) { + adev->psp.securedisplay_context.context.bin_desc.fw_version = + le32_to_cpu(ta_hdr->securedisplay.fw_version); + adev->psp.securedisplay_context.context.bin_desc.size_bytes = + le32_to_cpu(ta_hdr->securedisplay.size_bytes); + adev->psp.securedisplay_context.context.bin_desc.start_addr = + (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + + le32_to_cpu(ta_hdr->securedisplay.offset_bytes); + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 726a5bba40b2..21d822b1d589 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -20,7 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#include <linux/dev_printk.h> #include <drm/drm_drv.h> #include <linux/vmalloc.h> #include "amdgpu.h" @@ -45,6 +44,7 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -110,6 +110,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) break; case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): err = psp_init_sos_microcode(psp, chip_name); if (err) return err; @@ -223,6 +224,12 @@ static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp) return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); } +static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); +} + + static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) { int ret; @@ -719,6 +726,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv, .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, + .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, .ring_init = psp_v13_0_ring_init, .ring_create = psp_v13_0_ring_create, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 6bdffdc1c0b9..c52d246a1d96 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -342,14 +342,10 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 2584fa3cb13e..486d9b5c1b9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -516,14 +516,10 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 65181efba50e..298fa11702e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -561,44 +561,6 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev) } } -static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) -{ - int err = 0; - const struct sdma_firmware_header_v1_0 *hdr; - - err = amdgpu_ucode_validate(sdma_inst->fw); - if (err) - return err; - - hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; - sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); - sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); - - if (sdma_inst->feature_version >= 20) - sdma_inst->burst_nop = true; - - return 0; -} - -static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - - /* arcturus shares the same FW memory across - all SDMA isntances */ - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) || - adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) - break; - } - - memset((void *)adev->sdma.instance, 0, - sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); -} - /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -615,9 +577,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; char fw_name[30]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; + int ret, i; DRM_DEBUG("\n"); @@ -656,58 +616,25 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); - - err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); - if (err) - goto out; - - for (i = 1; i < adev->sdma.num_instances; i++) { + for (i = 0; i < adev->sdma.num_instances; i++) { + if (i == 0) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) || adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) { /* Acturus & Aldebaran will leverage the same FW memory for every SDMA instance */ - memcpy((void *)&adev->sdma.instance[i], - (void *)&adev->sdma.instance[0], - sizeof(struct amdgpu_sdma_instance)); - } - else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); - - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); - if (err) - goto out; - } - } - - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - for (i = 0; i < adev->sdma.num_instances; i++) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; - info->fw = adev->sdma.instance[i].fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + ret = amdgpu_sdma_init_microcode(adev, fw_name, 0, true); + break; + } else { + ret = amdgpu_sdma_init_microcode(adev, fw_name, i, false); + if (ret) + return ret; } } -out: - if (err) { - DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); - sdma_v4_0_destroy_inst_ctx(adev); - } - return err; + return ret; } /** @@ -988,18 +915,12 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; - int i, unset = 0; - - for (i = 0; i < adev->sdma.num_instances; i++) { - sdma[i] = &adev->sdma.instance[i].ring; + int i; - if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = 1; - } + amdgpu_sdma_unset_buffer_funcs_helper(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); @@ -1030,20 +951,12 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - bool unset = false; - for (i = 0; i < adev->sdma.num_instances; i++) { - sdma[i] = &adev->sdma.instance[i].page; - - if ((adev->mman.buffer_funcs_ring == sdma[i]) && - (!unset)) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = true; - } + amdgpu_sdma_unset_buffer_funcs_helper(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -1504,6 +1417,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) WREG32_SDMA(i, mmSDMA0_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { + ring = &adev->sdma.instance[i].ring; + adev->nbio.funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + /* unhalt engine */ temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); @@ -1995,14 +1913,17 @@ static int sdma_v4_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - sdma_v4_0_destroy_inst_ctx(adev); + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0) || + adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) + amdgpu_sdma_destroy_inst_ctx(adev, true); + else + amdgpu_sdma_destroy_inst_ctx(adev, false); return 0; } static int sdma_v4_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->flags & AMD_IS_APU) @@ -2011,9 +1932,7 @@ static int sdma_v4_0_hw_init(void *handle) if (!amdgpu_sriov_vf(adev)) sdma_v4_0_init_golden_registers(adev); - r = sdma_v4_0_start(adev); - - return r; + return sdma_v4_0_start(adev); } static int sdma_v4_0_hw_fini(void *handle) @@ -2021,8 +1940,11 @@ static int sdma_v4_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + /* disable the scheduler for SDMA */ + amdgpu_sdma_unset_buffer_funcs_helper(adev); return 0; + } for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_irq_put(adev, &adev->sdma.ecc_irq, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index a019ac92edb7..d4d9f196db83 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -240,10 +240,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; char fw_name[40]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct sdma_firmware_header_v1_0 *hdr; + int ret, i; if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 0, 5))) return 0; @@ -272,38 +269,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); else snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); - if (err) - goto out; - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; - adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); - if (adev->sdma.instance[i].feature_version >= 20) - adev->sdma.instance[i].burst_nop = true; - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; - info->fw = adev->sdma.instance[i].fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } + ret = amdgpu_sdma_init_microcode(adev, fw_name, i, false); + if (ret) + return ret; } -out: - if (err) { - DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name); - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } - } - return err; + + return ret; } static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) @@ -613,14 +584,10 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); @@ -1465,12 +1432,10 @@ static int sdma_v5_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - + for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); - } + + amdgpu_sdma_destroy_inst_ctx(adev, false); return 0; } @@ -1491,8 +1456,11 @@ static int sdma_v5_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + /* disable the scheduler for SDMA */ + amdgpu_sdma_unset_buffer_funcs_helper(adev); return 0; + } sdma_v5_0_ctx_switch_enable(adev, false); sdma_v5_0_enable(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 83c6ccaaa9e4..809eca54fc61 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -89,33 +89,6 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) -{ - int err = 0; - const struct sdma_firmware_header_v1_0 *hdr; - - err = amdgpu_ucode_validate(sdma_inst->fw); - if (err) - return err; - - hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; - sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); - sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); - - if (sdma_inst->feature_version >= 20) - sdma_inst->burst_nop = true; - - return 0; -} - -static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev) -{ - release_firmware(adev->sdma.instance[0].fw); - - memset((void *)adev->sdma.instance, 0, - sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); -} - /** * sdma_v5_2_init_microcode - load ucode images from disk * @@ -132,9 +105,6 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) { const char *chip_name; char fw_name[40]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; DRM_DEBUG("\n"); @@ -169,42 +139,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name); - err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]); - if (err) - goto out; - - for (i = 1; i < adev->sdma.num_instances; i++) - memcpy((void *)&adev->sdma.instance[i], - (void *)&adev->sdma.instance[0], - sizeof(struct amdgpu_sdma_instance)); - - if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 0))) - return 0; - - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - for (i = 0; i < adev->sdma.num_instances; i++) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; - info->fw = adev->sdma.instance[i].fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } - } - -out: - if (err) { - DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name); - sdma_v5_2_destroy_inst_ctx(adev); - } - return err; + return amdgpu_sdma_init_microcode(adev, fw_name, 0, true); } static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring) @@ -479,18 +414,10 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; - struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring; - struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1) || - (adev->mman.buffer_funcs_ring == sdma2) || - (adev->mman.buffer_funcs_ring == sdma3)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); @@ -1406,27 +1333,27 @@ static int sdma_v5_2_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); - sdma_v5_2_destroy_inst_ctx(adev); + amdgpu_sdma_destroy_inst_ctx(adev, true); return 0; } static int sdma_v5_2_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = sdma_v5_2_start(adev); - - return r; + return sdma_v5_2_start(adev); } static int sdma_v5_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + /* disable the scheduler for SDMA */ + amdgpu_sdma_unset_buffer_funcs_helper(adev); return 0; + } sdma_v5_2_ctx_switch_enable(adev, false); sdma_v5_2_enable(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 0200cb3a31a4..da3beb0bf2fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -47,6 +47,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -77,33 +78,6 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static int sdma_v6_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) -{ - int err = 0; - const struct sdma_firmware_header_v2_0 *hdr; - - err = amdgpu_ucode_validate(sdma_inst->fw); - if (err) - return err; - - hdr = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data; - sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); - sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); - - if (sdma_inst->feature_version >= 20) - sdma_inst->burst_nop = true; - - return 0; -} - -static void sdma_v6_0_destroy_inst_ctx(struct amdgpu_device *adev) -{ - release_firmware(adev->sdma.instance[0].fw); - - memset((void*)adev->sdma.instance, 0, - sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); -} - /** * sdma_v6_0_init_microcode - load ucode images from disk * @@ -113,16 +87,10 @@ static void sdma_v6_0_destroy_inst_ctx(struct amdgpu_device *adev) * the driver (not loaded into hw). * Returns 0 on success, error on failure. */ - -// emulation only, won't work on real chip -// sdma 6.0.0 real chip need to use PSP to load firmware static int sdma_v6_0_init_microcode(struct amdgpu_device *adev) { char fw_name[30]; char ucode_prefix[30]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct sdma_firmware_header_v2_0 *sdma_hdr; DRM_DEBUG("\n"); @@ -130,43 +98,7 @@ static int sdma_v6_0_init_microcode(struct amdgpu_device *adev) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); - err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v6_0_init_inst_ctx(&adev->sdma.instance[0]); - if (err) - goto out; - - for (i = 1; i < adev->sdma.num_instances; i++) { - memcpy((void*)&adev->sdma.instance[i], - (void*)&adev->sdma.instance[0], - sizeof(struct amdgpu_sdma_instance)); - } - - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - sdma_hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data; - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0; - info->fw = adev->sdma.instance[0].fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1; - info->fw = adev->sdma.instance[0].fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE); - } - -out: - if (err) { - DRM_ERROR("sdma_v6_0: Failed to load firmware \"%s\"\n", fw_name); - sdma_v6_0_destroy_inst_ctx(adev); - } - return err; + return amdgpu_sdma_init_microcode(adev, fw_name, 0, true); } static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring) @@ -466,14 +398,10 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); @@ -483,9 +411,6 @@ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0); WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); } - - sdma0->sched.ready = false; - sdma1->sched.ready = false; } /** @@ -559,7 +484,8 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + if (!amdgpu_sriov_vf(adev)) + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); @@ -593,7 +519,10 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); + if (amdgpu_sriov_vf(adev)) + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); + else + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1); WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); @@ -910,7 +839,8 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_cntl = order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT | 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | - 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | + 1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT; m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); @@ -1365,27 +1295,27 @@ static int sdma_v6_0_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); - sdma_v6_0_destroy_inst_ctx(adev); + amdgpu_sdma_destroy_inst_ctx(adev, true); return 0; } static int sdma_v6_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = sdma_v6_0_start(adev); - - return r; + return sdma_v6_0_start(adev); } static int sdma_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + /* disable the scheduler for SDMA */ + amdgpu_sdma_unset_buffer_funcs_helper(adev); return 0; + } sdma_v6_0_ctx_switch_enable(adev, false); sdma_v6_0_enable(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index f675111ace20..4d5e718540aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -116,15 +116,14 @@ static void si_dma_stop(struct amdgpu_device *adev) u32 rb_cntl; unsigned i; + amdgpu_sdma_unset_buffer_funcs_helper(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; /* dma0 */ rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); rb_cntl &= ~DMA_RB_ENABLE; WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, false); } } diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c new file mode 100644 index 000000000000..7aa570c1ce4a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -0,0 +1,303 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "sienna_cichlid.h" +#include "amdgpu_reset.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_dpm.h" +#include "amdgpu_job.h" +#include "amdgpu_ring.h" +#include "amdgpu_ras.h" +#include "amdgpu_psp.h" +#include "amdgpu_xgmi.h" + +static struct amdgpu_reset_handler * +sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_reset_handler *handler; + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if (reset_context->method != AMD_RESET_METHOD_NONE) { + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == reset_context->method) + return handler; + } + } else { + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == AMD_RESET_METHOD_MODE2 && + adev->pm.fw_version >= 0x3a5500 && + !amdgpu_sriov_vf(adev)) { + reset_context->method = AMD_RESET_METHOD_MODE2; + return handler; + } + } + } + + return NULL; +} + +static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev) +{ + int r, i; + + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + + r = adev->ip_blocks[i].version->funcs->suspend(adev); + + if (r) { + dev_err(adev->dev, + "suspend of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + adev->ip_blocks[i].status.hw = false; + } + + return r; +} + +static int +sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + int r = 0; + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if (!amdgpu_sriov_vf(adev)) { + if (adev->gfxhub.funcs->mode2_save_regs) + adev->gfxhub.funcs->mode2_save_regs(adev); + if (adev->gfxhub.funcs->halt) + adev->gfxhub.funcs->halt(adev); + r = sienna_cichlid_mode2_suspend_ip(adev); + } + + return r; +} + +static void sienna_cichlid_async_reset(struct work_struct *work) +{ + struct amdgpu_reset_handler *handler; + struct amdgpu_reset_control *reset_ctl = + container_of(work, struct amdgpu_reset_control, reset_work); + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == reset_ctl->active_reset) { + dev_dbg(adev->dev, "Resetting device\n"); + handler->do_reset(adev); + break; + } + } +} + +static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev) +{ + /* disable BM */ + pci_clear_master(adev->pdev); + return amdgpu_dpm_mode2_reset(adev); +} + +static int +sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + int r; + + r = sienna_cichlid_mode2_reset(adev); + if (r) { + dev_err(adev->dev, + "ASIC reset failed with error, %d ", r); + } + return r; +} + +static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev) +{ + int i, r; + struct psp_context *psp = &adev->psp; + + r = psp_rlc_autoload_start(psp); + if (r) { + dev_err(adev->dev, "Failed to start rlc autoload\n"); + return r; + } + + /* Reinit GFXHUB */ + if (adev->gfxhub.funcs->mode2_restore_regs) + adev->gfxhub.funcs->mode2_restore_regs(adev); + adev->gfxhub.funcs->init(adev); + r = adev->gfxhub.funcs->gart_enable(adev); + if (r) { + dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n"); + return r; + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + dev_err(adev->dev, + "resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + + adev->ip_blocks[i].status.hw = true; + } + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + dev_err(adev->dev, + "resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + + adev->ip_blocks[i].status.hw = true; + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + + if (adev->ip_blocks[i].version->funcs->late_init) { + r = adev->ip_blocks[i].version->funcs->late_init( + (void *)adev); + if (r) { + dev_err(adev->dev, + "late_init of IP block <%s> failed %d after reset\n", + adev->ip_blocks[i].version->funcs->name, + r); + return r; + } + } + adev->ip_blocks[i].status.late_initialized = true; + } + + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); + + return r; +} + +static int +sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + int r; + struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; + + dev_info(tmp_adev->dev, + "GPU reset succeeded, trying to resume\n"); + r = sienna_cichlid_mode2_restore_ip(tmp_adev); + if (r) + goto end; + + /* + * Add this ASIC as tracked as reset was already + * complete successfully. + */ + amdgpu_register_gpu_instance(tmp_adev); + + /* Resume RAS */ + amdgpu_ras_resume(tmp_adev); + + amdgpu_irq_gpu_reset_resume_helper(tmp_adev); + + r = amdgpu_ib_ring_tests(tmp_adev); + if (r) { + dev_err(tmp_adev->dev, + "ib ring test failed (%d).\n", r); + r = -EAGAIN; + goto end; + } + +end: + if (r) + return -EAGAIN; + else + return r; +} + +static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = { + .reset_method = AMD_RESET_METHOD_MODE2, + .prepare_env = NULL, + .prepare_hwcontext = sienna_cichlid_mode2_prepare_hwcontext, + .perform_reset = sienna_cichlid_mode2_perform_reset, + .restore_hwcontext = sienna_cichlid_mode2_restore_hwcontext, + .restore_env = NULL, + .do_reset = sienna_cichlid_mode2_reset, +}; + +int sienna_cichlid_reset_init(struct amdgpu_device *adev) +{ + struct amdgpu_reset_control *reset_ctl; + + reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL); + if (!reset_ctl) + return -ENOMEM; + + reset_ctl->handle = adev; + reset_ctl->async_reset = sienna_cichlid_async_reset; + reset_ctl->active_reset = AMD_RESET_METHOD_NONE; + reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler; + + INIT_LIST_HEAD(&reset_ctl->reset_handlers); + INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset); + /* Only mode2 is handled through reset control now */ + amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler); + + adev->reset_cntl = reset_ctl; + + return 0; +} + +int sienna_cichlid_reset_fini(struct amdgpu_device *adev) +{ + kfree(adev->reset_cntl); + adev->reset_cntl = NULL; + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h new file mode 100644 index 000000000000..5213b162dacd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h @@ -0,0 +1,32 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SIENNA_CICHLID_H__ +#define __SIENNA_CICHLID_H__ + +#include "amdgpu.h" + +int sienna_cichlid_reset_init(struct amdgpu_device *adev); +int sienna_cichlid_reset_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fde6154f2009..183024d7c184 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1211,25 +1211,6 @@ static int soc15_common_sw_fini(void *handle) return 0; } -static void soc15_doorbell_range_init(struct amdgpu_device *adev) -{ - int i; - struct amdgpu_ring *ring; - - /* sdma/ih doorbell range are programed by hypervisor */ - if (!amdgpu_sriov_vf(adev)) { - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); - } - - adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, - adev->irq.ih.doorbell_index); - } -} - static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1249,12 +1230,6 @@ static int soc15_common_hw_init(void *handle) /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); - /* HW doorbell routing policy: doorbell writing not - * in SDMA/IH/MM/ACV range will be routed to CP. So - * we need to init SDMA/IH/MM/ACV doorbell range prior - * to CP ip block init and ring test. - */ - soc15_doorbell_range_init(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 52816de5e17b..795706b3b092 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -179,7 +179,7 @@ void soc21_grbm_select(struct amdgpu_device *adev, grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); - WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL), grbm_gfx_cntl); + WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl); } static void soc21_vga_set_state(struct amdgpu_device *adev, bool state) @@ -421,6 +421,7 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): + return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): return false; default: @@ -494,6 +495,20 @@ static void soc21_pre_asic_init(struct amdgpu_device *adev) { } +static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev, + bool enter) +{ + if (enter) + amdgpu_gfx_rlc_enter_safe_mode(adev); + else + amdgpu_gfx_rlc_exit_safe_mode(adev); + + if (adev->gfx.funcs->update_perfmon_mgcg) + adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); + + return 0; +} + static const struct amdgpu_asic_funcs soc21_asic_funcs = { .read_disabled_bios = &soc21_read_disabled_bios, @@ -513,6 +528,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = .supports_baco = &amdgpu_dpm_is_baco_supported, .pre_asic_init = &soc21_pre_asic_init, .query_video_codecs = &soc21_query_video_codecs, + .update_umd_stable_pstate = &soc21_update_umd_stable_pstate, }; static int soc21_common_early_init(void *handle) @@ -546,8 +562,10 @@ static int soc21_common_early_init(void *handle) case IP_VERSION(11, 0, 0): adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | +#if 0 AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS | +#endif AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_REPEATER_FGCG | AMD_CG_SUPPORT_GFX_FGCG | @@ -565,6 +583,10 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB | AMD_PG_SUPPORT_MMHUB; + if (amdgpu_sriov_vf(adev)) { + adev->cg_flags = 0; + adev->pg_flags = 0; + } adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update break; case IP_VERSION(11, 0, 2): @@ -575,7 +597,9 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_ATHUB_MGCG | - AMD_CG_SUPPORT_ATHUB_LS; + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_SD; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | @@ -586,12 +610,43 @@ static int soc21_common_early_init(void *handle) break; case IP_VERSION(11, 0, 1): adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; break; + case IP_VERSION(11, 0, 3): + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + if (amdgpu_sriov_vf(adev)) { + /* hypervisor control CG and PG enablement */ + adev->cg_flags = 0; + adev->pg_flags = 0; + } + adev->external_rev_id = adev->rev_id + 0x20; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -683,6 +738,8 @@ static int soc21_common_set_clockgating_state(void *handle, switch (adev->ip_versions[NBIO_HWIP][0]) { case IP_VERSION(4, 3, 0): + case IP_VERSION(4, 3, 1): + case IP_VERSION(7, 7, 0): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 939cb203f7ad..f17d297b594b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -327,10 +327,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, return; } - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ @@ -343,10 +342,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, ADDR_OF_256B_BLOCK(channel_index) | OFFSET_IN_256B_BLOCK(err_addr); - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) - amdgpu_umc_fill_error_record(err_data, err_addr, + amdgpu_umc_fill_error_record(err_data, err_addr, retired_page, channel_index, umc_inst); } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index bf7524f16b66..5d5d031c9e7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -187,20 +187,51 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, } } +static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) +{ + uint32_t channel_index; + uint64_t soc_pa, retired_page, column; + + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + /* translate umc channel address to soc pa, 3 parts are included */ + soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* The umc channel bits are not original values, they are hashed */ + SET_CHANNEL_HASH(channel_index, soc_pa); + + /* clear [C4 C3 C2] in soc physical address */ + soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); + + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + + /* shift R14 bit */ + retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + } +} + static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint32_t ch_inst, uint32_t umc_inst) { - uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column; - uint32_t channel_index; + uint64_t mc_umc_status, err_addr; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; - channel_index = - adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; - mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) @@ -209,42 +240,15 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, if (!err_data->err_addr) return; - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - /* translate umc channel address to soc pa, 3 parts are included */ - soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - /* The umc channel bits are not original values, they are hashed */ - SET_CHANNEL_HASH(channel_index, soc_pa); - - /* clear [C4 C3 C2] in soc physical address */ - soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); - - /* shift R14 bit */ - retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); - } - } + umc_v6_7_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst); } } @@ -452,14 +456,11 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, static void umc_v6_7_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - uint32_t umc_reg_offset, - uint32_t ch_inst, + uint32_t umc_reg_offset, uint32_t ch_inst, uint32_t umc_inst) { uint32_t mc_umc_status_addr; - uint32_t channel_index; - uint64_t mc_umc_status, mc_umc_addrt0; - uint64_t err_addr, soc_pa, retired_page, column; + uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -477,45 +478,15 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, return; } - channel_index = - adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; - - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); - err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + err_addr = + REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - /* translate umc channel address to soc pa, 3 parts are included */ - soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - /* The umc channel bits are not original values, they are hashed */ - SET_CHANNEL_HASH(channel_index, soc_pa); - - /* clear [C4 C3 C2] in soc physical address */ - soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); - - /* shift R14 bit */ - retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); - } - } + umc_v6_7_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst); } /* clear umc status */ @@ -540,8 +511,7 @@ static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, ch_inst); umc_v6_7_query_error_address(adev, err_data, - umc_reg_offset, - ch_inst, + umc_reg_offset, ch_inst, umc_inst); } } @@ -583,4 +553,5 @@ struct amdgpu_umc_ras umc_v6_7_ras = { .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, + .convert_ras_error_address = umc_v6_7_convert_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index 36a2053f2e8b..91235df54e22 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -101,22 +101,16 @@ static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) { - uint32_t ecc_err_cnt, ecc_err_cnt_addr; uint64_t mc_umc_status; uint32_t mc_umc_status_addr; /* UMC 8_10 registers */ - ecc_err_cnt_addr = - SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); - ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); - *error_count += - (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - - UMC_V8_10_CE_CNT_INIT); - - /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + /* Rely on MCUMC_STATUS for correctable error counter + * MCUMC_STATUS is a 64 bit register + */ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) @@ -214,7 +208,10 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev, { uint64_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr; - uint32_t channel_index; + uint64_t mc_umc_addrt0, na_err_addr_base; + uint64_t na_err_addr, retired_page_addr; + uint32_t channel_index, addr_lsb, col = 0; + int ret = 0; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -235,13 +232,10 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev, umc_inst * adev->umc.channel_inst_num + ch_inst]; - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - uint32_t addr_lsb; - uint64_t mc_umc_addrt0; + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); @@ -249,32 +243,24 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev, /* the lowest lsb bits should be ignored */ addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); - err_addr &= ~((0x1ULL << addr_lsb) - 1); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { - uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); - uint64_t na_err_addr, retired_page_addr; - uint32_t col = 0; - int ret = 0; - - /* loop for all possibilities of [C6 C5] in normal address. */ - for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { - na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); - - /* Mapping normal error address to retired soc physical address. */ - ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, - na_err_addr, &retired_page_addr); - if (ret) { - dev_err(adev->dev, "Failed to map pa from umc na.\n"); - break; - } - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", - retired_page_addr); - amdgpu_umc_fill_error_record(err_data, na_err_addr, - retired_page_addr, channel_index, umc_inst); + na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); + + /* loop for all possibilities of [C6 C5] in normal address. */ + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); + + /* Mapping normal error address to retired soc physical address. */ + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, + na_err_addr, &retired_page_addr); + if (ret) { + dev_err(adev->dev, "Failed to map pa from umc na.\n"); + break; } + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", + retired_page_addr); + amdgpu_umc_fill_error_record(err_data, na_err_addr, + retired_page_addr, channel_index, umc_inst); } } @@ -344,6 +330,31 @@ static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) } } +static uint32_t umc_v8_10_query_ras_poison_mode_per_channel( + struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_ctrl_addr, ecc_ctrl; + + ecc_ctrl_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccCtrl); + ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr + + umc_reg_offset) * 4); + + return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_GeccCtrl, UCFatalEn); +} + +static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev) +{ + uint32_t umc_reg_offset = 0; + + /* Enabling fatal error in umc node0 instance0 channel0 will be + * considered as fatal error mode + */ + umc_reg_offset = get_umc_v8_10_reg_offset(adev, 0, 0, 0); + return !umc_v8_10_query_ras_poison_mode_per_channel(adev, umc_reg_offset); +} + const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { .query_ras_error_count = umc_v8_10_query_ras_error_count, .query_ras_error_address = umc_v8_10_query_ras_error_address, @@ -354,4 +365,5 @@ struct amdgpu_umc_ras umc_v8_10_ras = { .hw_ops = &umc_v8_10_ras_hw_ops, }, .err_cnt_init = umc_v8_10_err_cnt_init, + .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index f35253e0eaa6..b717fdaa46e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -108,20 +108,35 @@ static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, } } +static void umc_v8_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) +{ + uint64_t retired_page; + uint32_t channel_index; + + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + /* translate umc channel address to soc pa, 3 parts are included */ + retired_page = ADDR_OF_4KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); +} + static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint32_t ch_inst, uint32_t umc_inst) { - uint64_t mc_umc_status, err_addr, retired_page; - uint32_t channel_index; + uint64_t mc_umc_status, err_addr; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; - channel_index = - adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; - mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) @@ -130,24 +145,15 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, if (!err_data->err_addr) return; - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - /* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_4KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + umc_v8_7_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst); } } @@ -324,14 +330,12 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, uint32_t umc_inst) { uint32_t lsb, mc_umc_status_addr; - uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; - uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + uint64_t mc_umc_status, err_addr, mc_umc_addrt0; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); - mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (mc_umc_status == 0) @@ -343,10 +347,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, return; } - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ @@ -354,16 +357,8 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); err_addr &= ~((0x1ULL << lsb) - 1); - /* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_4KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + umc_v8_7_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst); } /* clear umc status */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 39405f0db824..9c8b5fd99037 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1761,21 +1761,23 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&p->entity->fence_seq)) + if (atomic_read(&job->base.entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; - drm_sched_entity_modify_sched(p->entity, scheds, 1); + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1846,7 +1848,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v3_0_limit_sched(p); + r = vcn_v3_0_limit_sched(p, job); if (r) goto out; } @@ -1860,7 +1862,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_ring *ring = amdgpu_job_ring(job); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1879,7 +1881,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { - r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); + r = vcn_v3_0_dec_msg(p, job, + ((u64)msg_hi) << 32 | msg_lo); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index ca14c3ef742e..897a5ce9c9da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -30,6 +30,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "mmsch_v4_0.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -45,6 +46,8 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN_HARVEST_MMSCH 0 + #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 @@ -53,12 +56,14 @@ static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN1 }; +static int vcn_v4_0_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); +static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring); /** * vcn_v4_0_early_init - set function pointers @@ -71,6 +76,9 @@ static int vcn_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + adev->vcn.harvest_config = VCN_HARVEST_MMSCH; + /* re-use enc ring as unified ring */ adev->vcn.num_enc_rings = 1; @@ -92,6 +100,7 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + int vcn_doorbell_index = 0; r = amdgpu_vcn_sw_init(adev); if (r) @@ -103,6 +112,12 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + if (amdgpu_sriov_vf(adev)) { + vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET; + /* get DWORD offset */ + vcn_doorbell_index = vcn_doorbell_index << 1; + } + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -119,7 +134,10 @@ static int vcn_v4_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; + if (amdgpu_sriov_vf(adev)) + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1; + else + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; sprintf(ring->name, "vcn_unified_%d", i); @@ -132,10 +150,23 @@ static int vcn_v4_0_sw_init(void *handle) fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + + if (amdgpu_sriov_vf(adev)) + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; @@ -169,6 +200,9 @@ static int vcn_v4_0_sw_fini(void *handle) drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -191,18 +225,42 @@ static int vcn_v4_0_hw_init(void *handle) struct amdgpu_ring *ring; int i, r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v4_0_start_sriov(adev); + if (r) + goto done; - ring = &adev->vcn.inst[i].ring_enc[0]; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + ring = &adev->vcn.inst[i].ring_enc[0]; + if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) { + ring->sched.ready = false; + ring->no_scheduler = true; + dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); + } else { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v4_0_unified_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + ring = &adev->vcn.inst[i].ring_enc[0]; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + + } } done: @@ -230,12 +288,14 @@ static int vcn_v4_0_hw_fini(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - - if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + if (!amdgpu_sriov_vf(adev)) { + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, regUVD_STATUS))) { vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } } + } return 0; @@ -1107,6 +1167,214 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) +{ + int i; + struct amdgpu_ring *ring_enc; + uint64_t cache_addr; + uint64_t rb_enc_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + uint32_t init_status; + uint32_t enabled_vcn; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_init_header header; + + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2; + for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { + header.inst[i].init_status = 0; + header.inst[i].table_offset = 0; + header.inst[i].table_size = 0; + } + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + table_size = 0; + + MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + offset = 0; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET0), + 0); + } else { + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET1), + 0); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE1), + AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET2), + 0); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE2), + AMDGPU_VCN_CONTEXT_SIZE); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + rb_setup = &fw_shared->rb_setup; + + ring_enc = &adev->vcn.inst[i].ring_enc[0]; + ring_enc->wptr = 0; + rb_enc_addr = ring_enc->gpu_addr; + + rb_setup->is_rb_enabled_flags |= RB_ENABLED; + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + + /* add end packet */ + MMSCH_V4_0_INSERT_END(); + + /* refine header */ + header.inst[i].init_status = 0; + header.inst[i].table_offset = header.total_size; + header.inst[i].table_size = table_size; + header.total_size += table_size; + } + + /* Update init table header in memory */ + size = sizeof(struct mmsch_v4_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + /* message MMSCH (in VCN[0]) to initialize this client + * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr + * of memory descriptor location + */ + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + /* 2, update vmid of descriptor */ + tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp); + + /* 3, notify mmsch about the size of this descriptor */ + size = header.total_size; + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0); + + /* 5, kick off the initialization and wait until + * MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + param = 0x00000001; + WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + while (resp != expected) { + resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP); + if (resp != 0) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; + init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status; + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); + + return 0; +} + /** * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode * @@ -1115,7 +1383,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) * * Stop VCN block with dpg mode */ -static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { uint32_t tmp; @@ -1133,7 +1401,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); - return 0; } /** @@ -1154,7 +1421,7 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v4_0_stop_dpg_mode(adev, i); + vcn_v4_0_stop_dpg_mode(adev, i); continue; } @@ -1328,21 +1595,23 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&p->entity->fence_seq)) + if (atomic_read(&job->base.entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] [AMDGPU_RING_PRIO_0].sched; - drm_sched_entity_modify_sched(p->entity, scheds, 1); + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); return 0; } -static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1413,7 +1682,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v4_0_limit_sched(p); + r = vcn_v4_0_limit_sched(p, job); if (r) goto out; } @@ -1426,32 +1695,34 @@ out: #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - struct amdgpu_job *job, - struct amdgpu_ib *ib) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + struct amdgpu_ring *ring = amdgpu_job_ring(job); + struct amdgpu_vcn_decode_buffer *decode_buffer; + uint64_t addr; uint32_t val; - int r = 0; /* The first instance can decode anything */ if (!ring->me) - return r; + return 0; /* unified queue ib header has 8 double words. */ if (ib->length_dw < 8) - return r; + return 0; val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + if (val != RADEON_VCN_ENGINE_TYPE_DECODE) + return 0; - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; - if (decode_buffer->valid_buf_flag & 0x1) - r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo); - } - return r; + if (!(decode_buffer->valid_buf_flag & 0x1)) + return 0; + + addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo; + return vcn_v4_0_dec_msg(p, job, addr); } static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { @@ -1597,6 +1868,15 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if(state == adev->vcn.cur_state) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index cdd599a08125..1e83db0c5438 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) } } + if (!amdgpu_sriov_vf(adev)) + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + pci_set_master(adev->pdev); /* enable interrupts */ @@ -334,9 +338,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -409,6 +415,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 3b4eb8285943..59dfca093155 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) } } + if (!amdgpu_sriov_vf(adev)) + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + pci_set_master(adev->pdev); /* enable interrupts */ @@ -385,9 +389,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -461,6 +467,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; |