diff options
author | Dave Airlie <airlied@redhat.com> | 2018-09-28 09:48:35 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-09-28 09:48:40 +1000 |
commit | 87c2ee740c07f1edae9eec8bc45cb9b32a68f323 (patch) | |
tree | 1515f53eacb86689f2f96279e51cf0053ae8a308 | |
parent | 2de0b0a158bf423208c3898522c8fa1c1078df48 (diff) | |
parent | 6a96243056217662843694a4cbc83158d0e84403 (diff) | |
download | linux-87c2ee740c07f1edae9eec8bc45cb9b32a68f323.tar.bz2 |
Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/linux into drm-next
More new features and fixes for 4.20:
- Add dynamic powergating support for VCN on picasso
- Scheduler cleanup
- Vega20 support for KFD
- DC cleanups and bug fixes
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180927184348.2696-1-alexander.deucher@amd.com
118 files changed, 1856 insertions, 717 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6cb35e3dab30..c05b39438663 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -146,6 +146,7 @@ extern int amdgpu_cik_support; #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ +#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) /* AMDGPU_IB_POOL_SIZE must be a power of 2 */ #define AMDGPU_IB_POOL_SIZE 16 #define AMDGPU_DEBUGFS_MAX_COMPONENTS 32 @@ -408,16 +409,25 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, /* - * Other graphics doorbells can be allocated here: from 0x8c to 0xef + * Other graphics doorbells can be allocated here: from 0x8c to 0xdf * Graphics voltage island aperture 1 - * default non-graphics QWORD index is 0xF0 - 0xFF inclusive + * default non-graphics QWORD index is 0xe0 - 0xFF inclusive */ - /* sDMA engines */ - AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0, - AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1, - AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2, - AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3, + /* sDMA engines reserved from 0xe0 -oxef */ + AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1, + AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9, + + /* For vega10 sriov, the sdma doorbell must be fixed as follow + * to keep the same setting with host driver, or it will + * happen conflicts + */ + AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0, + AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1, + AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2, + AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3, /* Interrupt handler */ AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0f9947edb12a..c31a8849e9f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -76,6 +76,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; case CHIP_VEGA10: + case CHIP_VEGA20: case CHIP_RAVEN: kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); break; @@ -123,7 +124,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { - int i; + int i, n; int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { @@ -162,7 +163,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); - if (adev->asic_type >= CHIP_VEGA10) { + + if (adev->asic_type < CHIP_VEGA10) { + kgd2kfd->device_init(adev->kfd, &gpu_resources); + return; + } + + n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8; + + for (i = 0; i < n; i += 2) { /* On SOC15 the BIF is involved in routing * doorbells using the low 12 bits of the * address. Communicate the assignments to @@ -170,20 +179,31 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) * process in case of 64-bit doorbells so we * can use each doorbell assignment twice. */ - gpu_resources.sdma_doorbell[0][0] = - AMDGPU_DOORBELL64_sDMA_ENGINE0; - gpu_resources.sdma_doorbell[0][1] = - AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; - gpu_resources.sdma_doorbell[1][0] = - AMDGPU_DOORBELL64_sDMA_ENGINE1; - gpu_resources.sdma_doorbell[1][1] = - AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; - /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for - * SDMA, IH and VCN. So don't use them for the CP. - */ - gpu_resources.reserved_doorbell_mask = 0x1f0; - gpu_resources.reserved_doorbell_val = 0x0f0; + if (adev->asic_type == CHIP_VEGA10) { + gpu_resources.sdma_doorbell[0][i] = + AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1); + gpu_resources.sdma_doorbell[0][i+1] = + AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1); + gpu_resources.sdma_doorbell[1][i] = + AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1); + gpu_resources.sdma_doorbell[1][i+1] = + AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1); + } else { + gpu_resources.sdma_doorbell[0][i] = + AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1); + gpu_resources.sdma_doorbell[0][i+1] = + AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1); + gpu_resources.sdma_doorbell[1][i] = + AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1); + gpu_resources.sdma_doorbell[1][i+1] = + AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1); + } } + /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for + * SDMA, IH and VCN. So don't use them for the CP. + */ + gpu_resources.reserved_doorbell_mask = 0x1e0; + gpu_resources.reserved_doorbell_val = 0x0e0; kgd2kfd->device_init(adev->kfd, &gpu_resources); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 056fc6ef6c63..8e0d4f7196b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -174,7 +174,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); -uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, void *vm, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index b2e45c8e2e0d..244d9834a381 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -142,7 +142,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base); + uint64_t page_table_base); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); @@ -874,7 +874,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) } static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base) + uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -882,7 +882,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, pr_err("trying to set page table base for wrong VMID\n"); return; } - WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, + lower_32_bits(page_table_base)); } static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index ea7c18ce7754..9f149914ad6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -45,8 +45,6 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -struct vi_sdma_mqd; - /* * Register access functions */ @@ -100,7 +98,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base); + uint64_t page_table_base); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); @@ -282,7 +280,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) lock_srbm(kgd, mec, pipe, 0, 0); - WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK); + WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); unlock_srbm(kgd); @@ -834,7 +833,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) } static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base) + uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -842,7 +841,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, pr_err("trying to set page table base for wrong VMID\n"); return; } - WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, + lower_32_bits(page_table_base)); } static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index c9176537550b..42cb4c4e0929 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -138,7 +138,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base); + uint64_t page_table_base); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); @@ -1013,11 +1013,10 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) } static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base) + uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | - AMDGPU_PTE_VALID; + uint64_t base = page_table_base | AMDGPU_PTE_VALID; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6ee9dc476c86..df0a059565f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1131,11 +1131,15 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) amdgpu_vm_release_compute(adev, avm); } -uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) { struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_bo *pd = avm->root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); - return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; + if (adev->asic_type < CHIP_VEGA10) + return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; + return avm->pd_phys_addr; } int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 176f28777f5e..5448cf27654e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -196,6 +196,19 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) } /** + * amdgpu_fence_schedule_fallback - schedule fallback check + * + * @ring: pointer to struct amdgpu_ring + * + * Start a timer as fallback to our interrupts. + */ +static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) +{ + mod_timer(&ring->fence_drv.fallback_timer, + jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT); +} + +/** * amdgpu_fence_process - check for fence activity * * @ring: pointer to struct amdgpu_ring @@ -203,8 +216,10 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) * Checks the current fence value and calculates the last * signalled fence value. Wakes the fence queue if the * sequence number has increased. + * + * Returns true if fence was processed */ -void amdgpu_fence_process(struct amdgpu_ring *ring) +bool amdgpu_fence_process(struct amdgpu_ring *ring) { struct amdgpu_fence_driver *drv = &ring->fence_drv; uint32_t seq, last_seq; @@ -216,8 +231,12 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); + if (del_timer(&ring->fence_drv.fallback_timer) && + seq != ring->fence_drv.sync_seq) + amdgpu_fence_schedule_fallback(ring); + if (unlikely(seq == last_seq)) - return; + return false; last_seq &= drv->num_fences_mask; seq &= drv->num_fences_mask; @@ -244,6 +263,24 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) dma_fence_put(fence); } while (last_seq != seq); + + return true; +} + +/** + * amdgpu_fence_fallback - fallback for hardware interrupts + * + * @work: delayed work item + * + * Checks for fence activity. + */ +static void amdgpu_fence_fallback(struct timer_list *t) +{ + struct amdgpu_ring *ring = from_timer(ring, t, + fence_drv.fallback_timer); + + if (amdgpu_fence_process(ring)) + DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name); } /** @@ -393,6 +430,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, atomic_set(&ring->fence_drv.last_seq, 0); ring->fence_drv.initialized = false; + timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0); + ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1; spin_lock_init(&ring->fence_drv.lock); ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), @@ -468,6 +507,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); drm_sched_fini(&ring->sched); + del_timer_sync(&ring->fence_drv.fallback_timer); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) dma_fence_put(ring->fence_drv.fences[j]); kfree(ring->fence_drv.fences); @@ -561,6 +601,27 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) } /** + * amdgpu_fence_enable_signaling - enable signalling on fence + * @fence: fence + * + * This function is called with fence_queue lock held, and adds a callback + * to fence_queue that checks if this fence is signaled, and if so it + * signals the fence and removes itself. + */ +static bool amdgpu_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_fence *fence = to_amdgpu_fence(f); + struct amdgpu_ring *ring = fence->ring; + + if (!timer_pending(&ring->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(ring); + + DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); + + return true; +} + +/** * amdgpu_fence_free - free up the fence memory * * @rcu: RCU callback head @@ -590,6 +651,7 @@ static void amdgpu_fence_release(struct dma_fence *f) static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, + .enable_signaling = amdgpu_fence_enable_signaling, .release = amdgpu_fence_release, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 4ed86218cef3..8af67f649660 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -24,46 +24,21 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" -#include "amdgpu_amdkfd.h" - -/** - * amdgpu_ih_ring_alloc - allocate memory for the IH ring - * - * @adev: amdgpu_device pointer - * - * Allocate a ring buffer for the interrupt controller. - * Returns 0 for success, errors for failure. - */ -static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev) -{ - int r; - - /* Allocate ring buffer */ - if (adev->irq.ih.ring_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, adev->irq.ih.ring_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &adev->irq.ih.ring_obj, - &adev->irq.ih.gpu_addr, - (void **)&adev->irq.ih.ring); - if (r) { - DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r); - return r; - } - } - return 0; -} /** * amdgpu_ih_ring_init - initialize the IH state * * @adev: amdgpu_device pointer + * @ih: ih ring to initialize + * @ring_size: ring size to allocate + * @use_bus_addr: true when we can use dma_alloc_coherent * * Initializes the IH state and allocates a buffer * for the IH ring buffer. * Returns 0 for success, errors for failure. */ -int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, - bool use_bus_addr) +int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + unsigned ring_size, bool use_bus_addr) { u32 rb_bufsz; int r; @@ -71,70 +46,76 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, /* Align ring size */ rb_bufsz = order_base_2(ring_size / 4); ring_size = (1 << rb_bufsz) * 4; - adev->irq.ih.ring_size = ring_size; - adev->irq.ih.ptr_mask = adev->irq.ih.ring_size - 1; - adev->irq.ih.rptr = 0; - adev->irq.ih.use_bus_addr = use_bus_addr; - - if (adev->irq.ih.use_bus_addr) { - if (!adev->irq.ih.ring) { - /* add 8 bytes for the rptr/wptr shadows and - * add them to the end of the ring allocation. - */ - adev->irq.ih.ring = pci_alloc_consistent(adev->pdev, - adev->irq.ih.ring_size + 8, - &adev->irq.ih.rb_dma_addr); - if (adev->irq.ih.ring == NULL) - return -ENOMEM; - memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8); - adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0; - adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1; - } - return 0; + ih->ring_size = ring_size; + ih->ptr_mask = ih->ring_size - 1; + ih->rptr = 0; + ih->use_bus_addr = use_bus_addr; + + if (use_bus_addr) { + if (ih->ring) + return 0; + + /* add 8 bytes for the rptr/wptr shadows and + * add them to the end of the ring allocation. + */ + ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8, + &ih->rb_dma_addr, GFP_KERNEL); + if (ih->ring == NULL) + return -ENOMEM; + + memset((void *)ih->ring, 0, ih->ring_size + 8); + ih->wptr_offs = (ih->ring_size / 4) + 0; + ih->rptr_offs = (ih->ring_size / 4) + 1; } else { - r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs); + r = amdgpu_device_wb_get(adev, &ih->wptr_offs); + if (r) + return r; + + r = amdgpu_device_wb_get(adev, &ih->rptr_offs); if (r) { - dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r); + amdgpu_device_wb_free(adev, ih->wptr_offs); return r; } - r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs); + r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &ih->ring_obj, &ih->gpu_addr, + (void **)&ih->ring); if (r) { - amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs); - dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r); + amdgpu_device_wb_free(adev, ih->rptr_offs); + amdgpu_device_wb_free(adev, ih->wptr_offs); return r; } - - return amdgpu_ih_ring_alloc(adev); } + return 0; } /** * amdgpu_ih_ring_fini - tear down the IH state * * @adev: amdgpu_device pointer + * @ih: ih ring to tear down * * Tears down the IH state and frees buffer * used for the IH ring buffer. */ -void amdgpu_ih_ring_fini(struct amdgpu_device *adev) +void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - if (adev->irq.ih.use_bus_addr) { - if (adev->irq.ih.ring) { - /* add 8 bytes for the rptr/wptr shadows and - * add them to the end of the ring allocation. - */ - pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8, - (void *)adev->irq.ih.ring, - adev->irq.ih.rb_dma_addr); - adev->irq.ih.ring = NULL; - } + if (ih->use_bus_addr) { + if (!ih->ring) + return; + + /* add 8 bytes for the rptr/wptr shadows and + * add them to the end of the ring allocation. + */ + dma_free_coherent(adev->dev, ih->ring_size + 8, + (void *)ih->ring, ih->rb_dma_addr); + ih->ring = NULL; } else { - amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj, - &adev->irq.ih.gpu_addr, - (void **)&adev->irq.ih.ring); - amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs); - amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs); + amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr, + (void **)&ih->ring); + amdgpu_device_wb_free(adev, ih->wptr_offs); + amdgpu_device_wb_free(adev, ih->rptr_offs); } } @@ -142,56 +123,43 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev) * amdgpu_ih_process - interrupt handler * * @adev: amdgpu_device pointer + * @ih: ih ring to process * * Interrupt hander (VI), walk the IH ring. * Returns irq process return code. */ -int amdgpu_ih_process(struct amdgpu_device *adev) +int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + void (*callback)(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih)) { - struct amdgpu_iv_entry entry; u32 wptr; - if (!adev->irq.ih.enabled || adev->shutdown) + if (!ih->enabled || adev->shutdown) return IRQ_NONE; wptr = amdgpu_ih_get_wptr(adev); restart_ih: /* is somebody else already processing irqs? */ - if (atomic_xchg(&adev->irq.ih.lock, 1)) + if (atomic_xchg(&ih->lock, 1)) return IRQ_NONE; - DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, adev->irq.ih.rptr, wptr); + DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); - while (adev->irq.ih.rptr != wptr) { - u32 ring_index = adev->irq.ih.rptr >> 2; - - /* Prescreening of high-frequency interrupts */ - if (!amdgpu_ih_prescreen_iv(adev)) { - adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; - continue; - } - - /* Before dispatching irq to IP blocks, send it to amdkfd */ - amdgpu_amdkfd_interrupt(adev, - (const void *) &adev->irq.ih.ring[ring_index]); - - entry.iv_entry = (const uint32_t *) - &adev->irq.ih.ring[ring_index]; - amdgpu_ih_decode_iv(adev, &entry); - adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; - - amdgpu_irq_dispatch(adev, &entry); + while (ih->rptr != wptr) { + callback(adev, ih); + ih->rptr &= ih->ptr_mask; } + amdgpu_ih_set_rptr(adev); - atomic_set(&adev->irq.ih.lock, 0); + atomic_set(&ih->lock, 0); /* make sure wptr hasn't changed while processing */ wptr = amdgpu_ih_get_wptr(adev); - if (wptr != adev->irq.ih.rptr) + if (wptr != ih->rptr) goto restart_ih; return IRQ_HANDLED; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 0d5b3f5201d2..9ce8c93ec19b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -24,12 +24,8 @@ #ifndef __AMDGPU_IH_H__ #define __AMDGPU_IH_H__ -#include "soc15_ih_clientid.h" - struct amdgpu_device; - -#define AMDGPU_IH_CLIENTID_LEGACY 0 -#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX +struct amdgpu_iv_entry; /* * R6xx+ IH ring @@ -51,22 +47,6 @@ struct amdgpu_ih_ring { dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ }; -#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4 - -struct amdgpu_iv_entry { - unsigned client_id; - unsigned src_id; - unsigned ring_id; - unsigned vmid; - unsigned vmid_src; - uint64_t timestamp; - unsigned timestamp_src; - unsigned pasid; - unsigned pasid_src; - unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW]; - const uint32_t *iv_entry; -}; - /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ @@ -82,9 +62,11 @@ struct amdgpu_ih_funcs { #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) -int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, - bool use_bus_addr); -void amdgpu_ih_ring_fini(struct amdgpu_device *adev); -int amdgpu_ih_process(struct amdgpu_device *adev); +int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + unsigned ring_size, bool use_bus_addr); +void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); +int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + void (*callback)(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih)); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index b927e8798534..52c17f6219a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -51,6 +51,7 @@ #include "atom.h" #include "amdgpu_connectors.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" #include <linux/pm_runtime.h> @@ -123,7 +124,7 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) int r; spin_lock_irqsave(&adev->irq.lock, irqflags); - for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; @@ -147,6 +148,34 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) } /** + * amdgpu_irq_callback - callback from the IH ring + * + * @adev: amdgpu device pointer + * @ih: amdgpu ih ring + * + * Callback from IH ring processing to handle the entry at the current position + * and advance the read pointer. + */ +static void amdgpu_irq_callback(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 ring_index = ih->rptr >> 2; + struct amdgpu_iv_entry entry; + + /* Prescreening of high-frequency interrupts */ + if (!amdgpu_ih_prescreen_iv(adev)) + return; + + /* Before dispatching irq to IP blocks, send it to amdkfd */ + amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]); + + entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; + amdgpu_ih_decode_iv(adev, &entry); + + amdgpu_irq_dispatch(adev, &entry); +} + +/** * amdgpu_irq_handler - IRQ handler * * @irq: IRQ number (unused) @@ -163,7 +192,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) struct amdgpu_device *adev = dev->dev_private; irqreturn_t ret; - ret = amdgpu_ih_process(adev); + ret = amdgpu_ih_process(adev, &adev->irq.ih, amdgpu_irq_callback); if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); return ret; @@ -273,7 +302,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) cancel_work_sync(&adev->reset_work); } - for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; @@ -313,7 +342,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned client_id, unsigned src_id, struct amdgpu_irq_src *source) { - if (client_id >= AMDGPU_IH_CLIENTID_MAX) + if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) return -EINVAL; if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) @@ -367,7 +396,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, trace_amdgpu_iv(entry); - if (client_id >= AMDGPU_IH_CLIENTID_MAX) { + if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); return; } @@ -440,7 +469,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) { int i, j, k; - for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 3375ad778edc..f6ce171cb8aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -25,19 +25,38 @@ #define __AMDGPU_IRQ_H__ #include <linux/irqdomain.h> +#include "soc15_ih_clientid.h" #include "amdgpu_ih.h" -#define AMDGPU_MAX_IRQ_SRC_ID 0x100 +#define AMDGPU_MAX_IRQ_SRC_ID 0x100 #define AMDGPU_MAX_IRQ_CLIENT_ID 0x100 +#define AMDGPU_IRQ_CLIENTID_LEGACY 0 +#define AMDGPU_IRQ_CLIENTID_MAX SOC15_IH_CLIENTID_MAX + +#define AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW 4 + struct amdgpu_device; -struct amdgpu_iv_entry; enum amdgpu_interrupt_state { AMDGPU_IRQ_STATE_DISABLE, AMDGPU_IRQ_STATE_ENABLE, }; +struct amdgpu_iv_entry { + unsigned client_id; + unsigned src_id; + unsigned ring_id; + unsigned vmid; + unsigned vmid_src; + uint64_t timestamp; + unsigned timestamp_src; + unsigned pasid; + unsigned pasid_src; + unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW]; + const uint32_t *iv_entry; +}; + struct amdgpu_irq_src { unsigned num_types; atomic_t *enabled_types; @@ -63,7 +82,7 @@ struct amdgpu_irq { bool installed; spinlock_t lock; /* interrupt sources */ - struct amdgpu_irq_client client[AMDGPU_IH_CLIENTID_MAX]; + struct amdgpu_irq_client client[AMDGPU_IRQ_CLIENTID_MAX]; /* status, etc. */ bool msi_enabled; /* msi enabled */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 8c334fc808c2..18d989e0e362 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1976,6 +1976,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) { uint32_t value; + uint64_t value64; uint32_t query = 0; int size; @@ -2014,6 +2015,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a seq_printf(m, "GPU Load: %u %%\n", value); seq_printf(m, "\n"); + /* SMC feature mask */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size)) + seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64); + /* UVD clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { if (!value) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 44fc665e4577..4caa301ce454 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -77,6 +77,7 @@ struct amdgpu_fence_driver { bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; + struct timer_list fallback_timer; unsigned num_fences_mask; spinlock_t lock; struct dma_fence **fences; @@ -96,7 +97,7 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); -void amdgpu_fence_process(struct amdgpu_ring *ring); +bool amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, uint32_t wait_seq, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 2e87414422f9..e9bf70e2ac51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -103,7 +103,7 @@ TRACE_EVENT(amdgpu_iv, __entry->src_data[2] = iv->src_data[2]; __entry->src_data[3] = iv->src_data[3]; ), - TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n", + TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x", __entry->client_id, __entry->src_id, __entry->ring_id, __entry->vmid, __entry->timestamp, __entry->pasid, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a73674f9a0f5..2a2eb0143f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -36,6 +36,7 @@ #include "soc15_common.h" #include "vcn/vcn_1_0_offset.h" +#include "vcn/vcn_1_0_sh_mask.h" /* 1 second timeout */ #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) @@ -212,18 +213,158 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) return 0; } +static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state) +{ + int ret_code; + uint32_t reg_data = 0; + uint32_t reg_data2 = 0; + struct amdgpu_ring *ring; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG non-jpeg */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr) | 0x80000000); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg non-jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.jpeg != new_state->jpeg) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); + + if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* Make sure JPRG Snoop is disabled before sending the pause */ + reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); + + /* pause DPG jpeg */ + reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_jpeg; + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000001L | 0x00000002L); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr) | 0x80000000); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.jpeg = new_state->jpeg; + } + + return 0; +} + static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); - unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); - unsigned i; + unsigned int fences = 0; + unsigned int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; + + if (fences) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + + amdgpu_vcn_pause_dpg_mode(adev, &new_state); + } + fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); + fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); @@ -250,6 +391,22 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; + + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = adev->vcn.pause_state.fw_based; + + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = adev->vcn.pause_state.jpeg; + + amdgpu_vcn_pause_dpg_mode(adev, &new_state); + } } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) @@ -264,7 +421,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", @@ -272,11 +429,11 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) return r; } amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); @@ -616,7 +773,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { @@ -626,12 +783,12 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) } amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0)); + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); @@ -665,7 +822,7 @@ static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0); + ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0); ib->ptr[1] = 0xDEADBEEF; for (i = 2; i < 16; i += 2) { ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); @@ -714,7 +871,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH)); + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 0b0b8638d73f..0b88a4672da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -56,6 +56,16 @@ enum engine_status_constants { UVD_STATUS__RBC_BUSY = 0x1, }; +enum internal_dpg_state { + VCN_DPG_STATE__UNPAUSE = 0, + VCN_DPG_STATE__PAUSE, +}; + +struct dpg_pause_state { + enum internal_dpg_state fw_based; + enum internal_dpg_state jpeg; +}; + struct amdgpu_vcn { struct amdgpu_bo *vcpu_bo; void *cpu_addr; @@ -69,6 +79,8 @@ struct amdgpu_vcn { struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; unsigned num_enc_rings; + enum amd_powergating_state cur_state; + struct dpg_pause_state pause_state; }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index d2469453dca2..79220a91abe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -6277,12 +6277,12 @@ static int ci_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 44d10c2172f6..b5775c6a857b 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -276,7 +276,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -318,7 +318,7 @@ static int cik_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, false); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) return r; @@ -332,7 +332,7 @@ static int cik_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); amdgpu_irq_remove_domain(adev); return 0; @@ -468,8 +468,7 @@ static const struct amdgpu_ih_funcs cik_ih_funcs = { static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &cik_ih_funcs; + adev->irq.ih_funcs = &cik_ih_funcs; } const struct amdgpu_ip_block_version cik_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 154b1499b07e..b918c8886b75 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -970,19 +970,19 @@ static int cik_sdma_sw_init(void *handle) } /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 247, &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1370,10 +1370,8 @@ static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = { static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &cik_sdma_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &cik_sdma_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { @@ -1389,15 +1387,13 @@ static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version cik_sdma_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 960c29e17da6..df5ac4d85a00 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -255,7 +255,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -297,7 +297,7 @@ static int cz_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, false); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) return r; @@ -311,7 +311,7 @@ static int cz_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); amdgpu_irq_remove_domain(adev); return 0; @@ -449,8 +449,7 @@ static const struct amdgpu_ih_funcs cz_ih_funcs = { static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &cz_ih_funcs; + adev->irq.ih_funcs = &cz_ih_funcs; } const struct amdgpu_ip_block_version cz_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 3916aa6cc4ec..4cfecdce29a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2746,19 +2746,19 @@ static int dce_v10_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) return r; @@ -3570,8 +3570,7 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_v10_0_display_funcs; + adev->mode_info.funcs = &dce_v10_0_display_funcs; } static const struct amdgpu_irq_src_funcs dce_v10_0_crtc_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 4ffb612a4e53..7c868916d90f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2867,19 +2867,19 @@ static int dce_v11_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) return r; @@ -3702,8 +3702,7 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_v11_0_display_funcs; + adev->mode_info.funcs = &dce_v11_0_display_funcs; } static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 480c5348a14f..17eaaba36017 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2616,19 +2616,19 @@ static int dce_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = 8; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq); if (r) return r; @@ -3376,8 +3376,7 @@ static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_v6_0_display_funcs; + adev->mode_info.funcs = &dce_v6_0_display_funcs; } static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 797196476c94..8c0576978d36 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2643,19 +2643,19 @@ static int dce_v8_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = 8; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq); if (r) return r; @@ -3458,8 +3458,7 @@ static const struct amdgpu_display_funcs dce_v8_0_display_funcs = { static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_v8_0_display_funcs; + adev->mode_info.funcs = &dce_v8_0_display_funcs; } static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 15257634a53a..fdace004544d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -372,7 +372,7 @@ static int dce_virtual_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq); if (r) return r; @@ -649,8 +649,7 @@ static const struct amdgpu_display_funcs dce_virtual_display_funcs = { static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_virtual_display_funcs; + adev->mode_info.funcs = &dce_virtual_display_funcs; } static int dce_virtual_pageflip(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index de184a886057..d76eb27945dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1552,7 +1552,7 @@ static void gfx_v6_0_config_init(struct amdgpu_device *adev) adev->gfx.config.double_offchip_lds_buf = 0; } -static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v6_0_constants_init(struct amdgpu_device *adev) { u32 gb_addr_config = 0; u32 mc_shared_chmap, mc_arb_ramcfg; @@ -3094,15 +3094,15 @@ static int gfx_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); if (r) return r; @@ -3175,7 +3175,7 @@ static int gfx_v6_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v6_0_gpu_init(adev); + gfx_v6_0_constants_init(adev); r = gfx_v6_0_rlc_resume(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index fc39ebbc9d9f..0e72bc09939a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1886,14 +1886,14 @@ static void gfx_v7_0_config_init(struct amdgpu_device *adev) } /** - * gfx_v7_0_gpu_init - setup the 3D engine + * gfx_v7_0_constants_init - setup the 3D engine * * @adev: amdgpu_device pointer * - * Configures the 3D engine and tiling configuration - * registers so that the 3D engine is usable. + * init the gfx constants such as the 3D engine, tiling configuration + * registers, maximum number of quad pipes, render backends... */ -static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v7_0_constants_init(struct amdgpu_device *adev) { u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; u32 tmp; @@ -4516,18 +4516,18 @@ static int gfx_v7_0_sw_init(void *handle) adev->gfx.mec.num_queue_per_pipe = 8; /* EOP Event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); if (r) return r; @@ -4624,7 +4624,7 @@ static int gfx_v7_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v7_0_gpu_init(adev); + gfx_v7_0_constants_init(adev); /* init rlc */ r = gfx_v7_0_rlc_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 470dc80f4fe7..2aeef2bb93a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2049,35 +2049,35 @@ static int gfx_v8_0_sw_init(void *handle) adev->gfx.mec.num_queue_per_pipe = 8; /* KIQ event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq); if (r) return r; /* EOP Event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, &adev->gfx.priv_inst_irq); if (r) return r; /* Add CP EDC/ECC irq */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, &adev->gfx.cp_ecc_error_irq); if (r) return r; /* SQ interrupts. */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, &adev->gfx.sq_irq); if (r) { DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); @@ -3835,7 +3835,7 @@ static void gfx_v8_0_config_init(struct amdgpu_device *adev) } } -static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v8_0_constants_init(struct amdgpu_device *adev) { u32 tmp, sh_static_mem_cfg; int i; @@ -4208,31 +4208,11 @@ static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) { int r; - u32 tmp; gfx_v8_0_rlc_stop(adev); - - /* disable CG */ - tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); - tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | - RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); - WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); - if (adev->asic_type == CHIP_POLARIS11 || - adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12 || - adev->asic_type == CHIP_VEGAM) { - tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); - tmp &= ~0x3; - WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); - } - - /* disable PG */ - WREG32(mmRLC_PG_CNTL, 0); - gfx_v8_0_rlc_reset(adev); gfx_v8_0_init_pg(adev); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { /* legacy rlc firmware loading */ r = gfx_v8_0_rlc_load_microcode(adev); @@ -5039,7 +5019,7 @@ static int gfx_v8_0_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfx_v8_0_init_golden_registers(adev); - gfx_v8_0_gpu_init(adev); + gfx_v8_0_constants_init(adev); r = gfx_v8_0_rlc_resume(adev); if (r) @@ -5080,6 +5060,55 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) return r; } +static bool gfx_v8_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE) + || RREG32(mmGRBM_STATUS2) != 0x8) + return false; + else + return true; +} + +static bool gfx_v8_0_rlc_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (RREG32(mmGRBM_STATUS2) != 0x8) + return false; + else + return true; +} + +static int gfx_v8_0_wait_for_rlc_idle(void *handle) +{ + unsigned int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + if (gfx_v8_0_rlc_is_idle(handle)) + return 0; + + udelay(1); + } + return -ETIMEDOUT; +} + +static int gfx_v8_0_wait_for_idle(void *handle) +{ + unsigned int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + if (gfx_v8_0_is_idle(handle)) + return 0; + + udelay(1); + } + return -ETIMEDOUT; +} + static int gfx_v8_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5098,9 +5127,16 @@ static int gfx_v8_0_hw_fini(void *handle) pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } - gfx_v8_0_cp_enable(adev, false); - gfx_v8_0_rlc_stop(adev); - + adev->gfx.rlc.funcs->enter_safe_mode(adev); + if (!gfx_v8_0_wait_for_idle(adev)) + gfx_v8_0_cp_enable(adev, false); + else + pr_err("cp is busy, skip halt cp\n"); + if (!gfx_v8_0_wait_for_rlc_idle(adev)) + gfx_v8_0_rlc_stop(adev); + else + pr_err("rlc is busy, skip halt rlc\n"); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } @@ -5121,30 +5157,6 @@ static int gfx_v8_0_resume(void *handle) return r; } -static bool gfx_v8_0_is_idle(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) - return false; - else - return true; -} - -static int gfx_v8_0_wait_for_idle(void *handle) -{ - unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v8_0_is_idle(handle)) - return 0; - - udelay(1); - } - return -ETIMEDOUT; -} - static bool gfx_v8_0_check_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f369d9603435..261bb051b14d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1847,7 +1847,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } -static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v9_0_constants_init(struct amdgpu_device *adev) { u32 tmp; int i; @@ -3235,7 +3235,7 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_init_golden_registers(adev); - gfx_v9_0_gpu_init(adev); + gfx_v9_0_constants_init(adev); r = gfx_v9_0_csb_vram_pin(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 65f58ebcf835..ceb7847b504f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -82,7 +82,8 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * to get rid of the VM fault and hardware hang. */ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - (max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18) + 0x1); + max((adev->gmc.vram_end >> 18) + 0x1, + adev->gmc.agp_end >> 18)); else WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 4411463ca719..e1c2b4e9c7b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -859,11 +859,11 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); if (r) return r; @@ -1180,8 +1180,7 @@ static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs; } static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index ae776ce9a415..910c4ce19cb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -991,11 +991,11 @@ static int gmc_v7_0_sw_init(void *handle) adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); if (r) return r; @@ -1388,8 +1388,7 @@ static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs; } static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 53ae49b8bde8..1d3265c97b70 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1095,11 +1095,11 @@ static int gmc_v8_0_sw_init(void *handle) adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); if (r) return r; @@ -1733,8 +1733,7 @@ static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs; } static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index aad3c7c5fb3a..f35d7a554ad5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -593,8 +593,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } static int gmc_v9_0_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 842c4b677b4d..cf0fc61aebe6 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -255,7 +255,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -297,7 +297,7 @@ static int iceland_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, false); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) return r; @@ -311,7 +311,7 @@ static int iceland_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); amdgpu_irq_remove_domain(adev); return 0; @@ -447,8 +447,7 @@ static const struct amdgpu_ih_funcs iceland_ih_funcs = { static void iceland_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &iceland_ih_funcs; + adev->irq.ih_funcs = &iceland_ih_funcs; } const struct amdgpu_ip_block_version iceland_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index cb79a93c2eb7..d0e478f43443 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -2995,12 +2995,12 @@ static int kv_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 80698b5ffa4a..14649f8475f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -100,7 +100,8 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * to get rid of the VM fault and hardware hang. */ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - (max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18) + 0x1); + max((adev->gmc.vram_end >> 18) + 0x1, + adev->gmc.agp_end >> 18)); else WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 842567b53df5..64e875d528dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -580,11 +580,11 @@ int xgpu_vi_mailbox_add_irq_id(struct amdgpu_device *adev) { int r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); if (r) { amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index c403bdf8ad70..cd781abc4953 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -898,19 +898,19 @@ static int sdma_v2_4_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1296,10 +1296,8 @@ static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = { static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { @@ -1315,15 +1313,13 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v2_4_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 2677d6a1bf42..6d5c8ac64874 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1177,19 +1177,19 @@ static int sdma_v3_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1736,10 +1736,8 @@ static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { @@ -1755,15 +1753,13 @@ static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v3_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 2ea1f0d8f5be..a3e2ed15fff2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1320,9 +1320,15 @@ static int sdma_v4_0_sw_init(void *handle) DRM_INFO("use_doorbell being set to: [%s]\n", ring->use_doorbell?"true":"false"); - ring->doorbell_index = (i == 0) ? - (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset - : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset + if (adev->asic_type == CHIP_VEGA10) + ring->doorbell_index = (i == 0) ? + (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset + : (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset + else + ring->doorbell_index = (i == 0) ? + (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset + : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset + sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -1801,10 +1807,8 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { @@ -1820,15 +1824,13 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index fafaf259b17b..d4ceaf440f26 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -502,12 +502,12 @@ static int si_dma_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* DMA0 trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq); if (r) return r; /* DMA1 trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 244, &adev->sdma.trap_irq_1); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, &adev->sdma.trap_irq_1); if (r) return r; @@ -863,10 +863,8 @@ static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &si_dma_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &si_dma_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { @@ -882,15 +880,13 @@ static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version si_dma_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 1de96995e690..da58040fdbdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -7687,11 +7687,11 @@ static int si_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 60dad63098a2..b3d7d9f83202 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -142,7 +142,7 @@ static void si_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -170,7 +170,7 @@ static int si_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, false); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) return r; @@ -182,7 +182,7 @@ static int si_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); return 0; } @@ -308,8 +308,7 @@ static const struct amdgpu_ih_funcs si_ih_funcs = { static void si_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &si_ih_funcs; + adev->irq.ih_funcs = &si_ih_funcs; } const struct amdgpu_ip_block_version si_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 138c4810a3de..fb26039beeba 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -739,7 +739,8 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_MMHUB | - AMD_PG_SUPPORT_VCN; + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } else { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index f5d602540673..958b10a57073 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -57,13 +57,33 @@ loop--; \ if (!loop) { \ DRM_ERROR("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \ - inst, #reg, expected_value, (tmp_ & (mask))); \ + inst, #reg, (unsigned)expected_value, (unsigned)(tmp_ & (mask))); \ ret = -ETIMEDOUT; \ break; \ } \ } \ } while (0) +#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__MASK_EN_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); }) + +#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ + do { \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } while (0) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 52853d8a8fdd..3abffd06b5c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -266,7 +266,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -317,7 +317,7 @@ static int tonga_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true); if (r) return r; @@ -334,7 +334,7 @@ static int tonga_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); amdgpu_irq_remove_domain(adev); return 0; @@ -513,8 +513,7 @@ static const struct amdgpu_ih_funcs tonga_ih_funcs = { static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &tonga_ih_funcs; + adev->irq.ih_funcs = &tonga_ih_funcs; } const struct amdgpu_ip_block_version tonga_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 8a926d1df939..1fc17bf39fed 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -108,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 50248059412e..fde6ad5ac9ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -105,7 +105,7 @@ static int uvd_v5_0_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 6ae82cc2e55e..8ef4a5392112 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -393,14 +393,14 @@ static int uvd_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); if (r) return r; /* UVD ENC TRAP */ if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 7eaa54ba016b..ea28828360d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -417,7 +417,7 @@ static int vce_v2_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCE */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index c8390f9adfd6..6dbd39730070 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -423,7 +423,7 @@ static int vce_v3_0_sw_init(void *handle) int r, i; /* VCE */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 2664bb2c47c3..63d7f97e81b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -198,7 +198,8 @@ static int vcn_v1_0_hw_init(void *handle) done: if (!r) - DRM_INFO("VCN decode and encode initialized successfully.\n"); + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); return r; } @@ -266,13 +267,13 @@ static int vcn_v1_0_resume(void *handle) } /** - * vcn_v1_0_mc_resume - memory controller programming + * vcn_v1_0_mc_resume_spg_mode - memory controller programming * * @adev: amdgpu_device pointer * * Let the VCN memory controller know it's offsets */ -static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) +static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) { uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); uint32_t offset; @@ -319,6 +320,65 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config); } +static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0, + 0xFFFFFFFF, 0); + offset = 0; + } else { + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + offset = size; + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); + } + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE, + 0xFFFFFFFF, 0); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE), + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE), + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, + AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40), + 0xFFFFFFFF, 0); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); +} + /** * vcn_v1_0_disable_clock_gating - disable VCN clock gating * @@ -519,6 +579,62 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); } +static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel) +{ + uint32_t reg_data = 0; + + /* disable JPEG CGC */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel); + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); + + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel); +} + static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) { uint32_t data = 0; @@ -614,7 +730,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) * * Setup and start the VCN block */ -static int vcn_v1_0_start(struct amdgpu_device *adev) +static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) { struct amdgpu_ring *ring = &adev->vcn.ring_dec; uint32_t rb_bufsz, tmp; @@ -628,7 +744,7 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) /* disable clock gating */ vcn_v1_0_disable_clock_gating(adev); - vcn_v1_0_mc_resume(adev); + vcn_v1_0_mc_resume_spg_mode(adev); /* disable interupt */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, @@ -799,6 +915,170 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + uint32_t rb_bufsz, tmp, reg_data; + uint32_t lmi_swap_cntl; + + /* disable byte swapping */ + lmi_swap_cntl = 0; + + vcn_1_0_enable_static_power_gating(adev); + + /* enable dynamic power gating mode */ + reg_data = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + reg_data |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + reg_data |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data); + + /* enable clock gating */ + vcn_v1_0_clock_gating_dpg_mode(adev, 0); + + /* enable VCPU clock */ + reg_data = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + reg_data |= UVD_VCPU_CNTL__CLK_EN_MASK; + reg_data |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CNTL, reg_data, 0xFFFFFFFF, 0); + + /* disable interupt */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN, + 0, UVD_MASTINT_EN__VCPU_EN_MASK, 0); + + /* stall UMC and register bus before resetting VCPU */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL2, + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK, + 0xFFFFFFFF, 0); + + /* initialize VCN memory controller */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL, + (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L, 0xFFFFFFFF, 0); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; +#endif + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_ALU, 0, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUX, 0x88, 0xFFFFFFFF, 0); + + vcn_v1_0_mc_resume_dpg_mode(adev); + + /* take all subblocks out of reset, except VCPU */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, 0xFFFFFFFF, 0); + + /* enable VCPU clock */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CNTL, + UVD_VCPU_CNTL__CLK_EN_MASK, 0xFFFFFFFF, 0); + + /* enable UMC */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL2, + 0, UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); + + /* boot up the VCPU */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN, + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), 0); + + vcn_v1_0_clock_gating_dpg_mode(adev, 1); + /* setup mmUVD_LMI_CTRL */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL, + (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L), 0xFFFFFFFF, 1); + + tmp = adev->gfx.config.gb_addr_config; + /* setup VCN global tiling registers */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1); + + /* enable System Interrupt for JRBC */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SYS_INT_EN, + UVD_SYS_INT_EN__UVD_JRBC_EN_MASK, 0xFFFFFFFF, 1); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, + ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); + + /* initialize wptr */ + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + + /* copy patch commands to the jpeg ring */ + vcn_v1_0_jpeg_ring_set_patch_ring(ring, + (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); + + return 0; +} + +static int vcn_v1_0_start(struct amdgpu_device *adev) +{ + int r; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + r = vcn_v1_0_start_dpg_mode(adev); + else + r = vcn_v1_0_start_spg_mode(adev); + return r; +} + /** * vcn_v1_0_stop - stop VCN block * @@ -806,7 +1086,7 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) * * stop the VCN block */ -static int vcn_v1_0_stop(struct amdgpu_device *adev) +static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) { /* force RBC into idle state */ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); @@ -836,6 +1116,33 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) return 0; } +static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev) +{ + int ret_code; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + +static int vcn_v1_0_stop(struct amdgpu_device *adev) +{ + int r; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + r = vcn_v1_0_stop_dpg_mode(adev); + else + r = vcn_v1_0_stop_spg_mode(adev); + + return r; +} + static bool vcn_v1_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1633,12 +1940,20 @@ static int vcn_v1_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ + int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if(state == adev->vcn.cur_state) + return 0; + if (state == AMD_PG_STATE_GATE) - return vcn_v1_0_stop(adev); + ret = vcn_v1_0_stop(adev); else - return vcn_v1_0_start(adev); + ret = vcn_v1_0_start(adev); + + if(!ret) + adev->vcn.cur_state = state; + return ret; } static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index acbe5a770207..a99f71797aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -380,7 +380,7 @@ static int vega10_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 256 * 1024, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true); if (r) return r; @@ -397,7 +397,7 @@ static int vega10_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); return 0; } @@ -494,8 +494,7 @@ static const struct amdgpu_ih_funcs vega10_ih_funcs = { static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &vega10_ih_funcs; + adev->irq.ih_funcs = &vega10_ih_funcs; } const struct amdgpu_ip_block_version vega10_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 758398bdb39b..14d5b5fa822d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -447,6 +447,24 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, return retval; } +static int kfd_ioctl_get_queue_wave_state(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_queue_wave_state_args *args = data; + int r; + + mutex_lock(&p->mutex); + + r = pqm_get_wave_state(&p->pqm, args->queue_id, + (void __user *)args->ctl_stack_address, + &args->ctl_stack_used_size, + &args->save_area_used_size); + + mutex_unlock(&p->mutex); + + return r; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1615,6 +1633,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, kfd_ioctl_set_cu_mask, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, + kfd_ioctl_get_queue_wave_state, 0) + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index d4560f1869bd..56412b0e7e1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -647,6 +647,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); break; case CHIP_VEGA10: + case CHIP_VEGA20: pcache_info = vega10_cache_info; num_of_cache_types = ARRAY_SIZE(vega10_cache_info); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9b4e6ad4a7df..a9f18ea7e354 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -53,6 +53,7 @@ static const struct kfd_device_info kaveri_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info carrizo_device_info = { @@ -69,6 +70,7 @@ static const struct kfd_device_info carrizo_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info raven_device_info = { @@ -84,6 +86,7 @@ static const struct kfd_device_info raven_device_info = { .needs_iommu_device = true, .needs_pci_atomics = true, .num_sdma_engines = 1, + .num_sdma_queues_per_engine = 2, }; #endif @@ -101,6 +104,7 @@ static const struct kfd_device_info hawaii_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info tonga_device_info = { @@ -116,21 +120,7 @@ static const struct kfd_device_info tonga_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, -}; - -static const struct kfd_device_info tonga_vf_device_info = { - .asic_family = CHIP_TONGA, - .max_pasid_bits = 16, - .max_no_of_hqd = 24, - .doorbell_size = 4, - .ih_ring_entry_size = 4 * sizeof(uint32_t), - .event_interrupt_class = &event_interrupt_class_cik, - .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED, - .supports_cwsr = false, - .needs_iommu_device = false, - .needs_pci_atomics = false, - .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info fiji_device_info = { @@ -146,6 +136,7 @@ static const struct kfd_device_info fiji_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info fiji_vf_device_info = { @@ -161,6 +152,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; @@ -177,6 +169,7 @@ static const struct kfd_device_info polaris10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info polaris10_vf_device_info = { @@ -192,6 +185,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info polaris11_device_info = { @@ -207,6 +201,7 @@ static const struct kfd_device_info polaris11_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info vega10_device_info = { @@ -222,6 +217,7 @@ static const struct kfd_device_info vega10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; static const struct kfd_device_info vega10_vf_device_info = { @@ -237,8 +233,24 @@ static const struct kfd_device_info vega10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, }; +static const struct kfd_device_info vega20_device_info = { + .asic_family = CHIP_VEGA20, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 8, +}; struct kfd_deviceid { unsigned short did; @@ -293,7 +305,6 @@ static const struct kfd_deviceid supported_devices[] = { { 0x6928, &tonga_device_info }, /* Tonga */ { 0x6929, &tonga_device_info }, /* Tonga */ { 0x692B, &tonga_device_info }, /* Tonga */ - { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ { 0x6938, &tonga_device_info }, /* Tonga */ { 0x6939, &tonga_device_info }, /* Tonga */ { 0x7300, &fiji_device_info }, /* Fiji */ @@ -328,6 +339,12 @@ static const struct kfd_deviceid supported_devices[] = { { 0x6868, &vega10_device_info }, /* Vega10 */ { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ { 0x687F, &vega10_device_info }, /* Vega10 */ + { 0x66a0, &vega20_device_info }, /* Vega20 */ + { 0x66a1, &vega20_device_info }, /* Vega20 */ + { 0x66a2, &vega20_device_info }, /* Vega20 */ + { 0x66a3, &vega20_device_info }, /* Vega20 */ + { 0x66a7, &vega20_device_info }, /* Vega20 */ + { 0x66af, &vega20_device_info } /* Vega20 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -366,6 +383,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); + if (!kfd) + return NULL; + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. * 32 and 64-bit requests are possible and must be * supported. @@ -377,12 +398,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, dev_info(kfd_device, "skipped device %x:%x, PCI rejects atomics\n", pdev->vendor, pdev->device); + kfree(kfd); return NULL; - } - - kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); - if (!kfd) - return NULL; + } else if (!ret) + kfd->pci_atomic_requested = true; kfd->kgd = kgd; kfd->device_info = device_info; @@ -419,6 +438,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, { unsigned int size; + kfd->mec_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd, + KGD_ENGINE_MEC1); + kfd->sdma_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd, + KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ec0d62a16e53..d6af31ccf0ee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -109,7 +109,7 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return dqm->dev->device_info->num_sdma_engines - * KFD_SDMA_QUEUES_PER_ENGINE; + * dqm->dev->device_info->num_sdma_queues_per_engine; } void program_sh_mem_settings(struct device_queue_manager *dqm, @@ -656,7 +656,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, struct queue *q; struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; - uint32_t pd_base; + uint64_t pd_base; int retval = 0; pdd = qpd_to_pdd(qpd); @@ -676,7 +676,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, /* Update PD Base in QPD */ qpd->page_table_base = pd_base; - pr_debug("Updated PD address to 0x%08x\n", pd_base); + pr_debug("Updated PD address to 0x%llx\n", pd_base); if (!list_empty(&qpd->queues_list)) { dqm->dev->kfd2kgd->set_vm_context_page_table_base( @@ -717,7 +717,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, { struct queue *q; struct kfd_process_device *pdd; - uint32_t pd_base; + uint64_t pd_base; int retval = 0; pdd = qpd_to_pdd(qpd); @@ -737,7 +737,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, /* Update PD Base in QPD */ qpd->page_table_base = pd_base; - pr_debug("Updated PD address to 0x%08x\n", pd_base); + pr_debug("Updated PD address to 0x%llx\n", pd_base); /* activate all active queues on the qpd */ list_for_each_entry(q, &qpd->queues_list, list) { @@ -761,7 +761,7 @@ static int register_process(struct device_queue_manager *dqm, { struct device_process_node *n; struct kfd_process_device *pdd; - uint32_t pd_base; + uint64_t pd_base; int retval; n = kzalloc(sizeof(*n), GFP_KERNEL); @@ -779,6 +779,7 @@ static int register_process(struct device_queue_manager *dqm, /* Update PD Base in QPD */ qpd->page_table_base = pd_base; + pr_debug("Updated PD address to 0x%llx\n", pd_base); retval = dqm->asic_ops.update_qpd(dqm, qpd); @@ -1528,6 +1529,41 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, return retval; } +static int get_wave_state(struct device_queue_manager *dqm, + struct queue *q, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct mqd_manager *mqd; + int r; + + dqm_lock(dqm); + + if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || + q->properties.is_active || !q->device->cwsr_enabled) { + r = -EINVAL; + goto dqm_unlock; + } + + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + if (!mqd) { + r = -ENOMEM; + goto dqm_unlock; + } + + if (!mqd->get_wave_state) { + r = -EINVAL; + goto dqm_unlock; + } + + r = mqd->get_wave_state(mqd, q->mqd, ctl_stack, ctl_stack_used_size, + save_area_used_size); + +dqm_unlock: + dqm_unlock(dqm); + return r; +} static int process_termination_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) @@ -1649,6 +1685,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.process_termination = process_termination_cpsch; dqm->ops.evict_process_queues = evict_process_queues_cpsch; dqm->ops.restore_process_queues = restore_process_queues_cpsch; + dqm->ops.get_wave_state = get_wave_state; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1668,6 +1705,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.evict_process_queues = evict_process_queues_nocpsch; dqm->ops.restore_process_queues = restore_process_queues_nocpsch; + dqm->ops.get_wave_state = get_wave_state; break; default: pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); @@ -1695,6 +1733,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) break; case CHIP_VEGA10: + case CHIP_VEGA20: case CHIP_RAVEN: device_queue_manager_init_v9(&dqm->asic_ops); break; @@ -1806,7 +1845,9 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) } for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { - for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) { + for (queue = 0; + queue < dqm->dev->device_info->num_sdma_queues_per_engine; + queue++) { r = dqm->dev->kfd2kgd->hqd_sdma_dump( dqm->dev->kgd, pipe, queue, &dump, &n_regs); if (r) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 00da3169a004..70e38a2e23b9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -33,7 +33,6 @@ #define KFD_UNMAP_LATENCY_MS (4000) #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000) -#define KFD_SDMA_QUEUES_PER_ENGINE (2) struct device_process_node { struct qcm_process_device *qpd; @@ -82,6 +81,8 @@ struct device_process_node { * * @restore_process_queues: Restore all evicted queues queues of a process * + * @get_wave_state: Retrieves context save state and optionally copies the + * control stack, if kept in the MQD, to the given userspace address. */ struct device_queue_manager_ops { @@ -137,6 +138,12 @@ struct device_queue_manager_ops { struct qcm_process_device *qpd); int (*restore_process_queues)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); + + int (*get_wave_state)(struct device_queue_manager *dqm, + struct queue *q, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); }; struct device_queue_manager_asic_ops { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 97d5423c5673..3d66cec414af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -400,6 +400,7 @@ int kfd_init_apertures(struct kfd_process *process) kfd_init_apertures_vi(pdd, id); break; case CHIP_VEGA10: + case CHIP_VEGA20: case CHIP_RAVEN: kfd_init_apertures_v9(pdd, id); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 9f84b4d9fb88..6c31f7370193 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -322,6 +322,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, break; case CHIP_VEGA10: + case CHIP_VEGA20: case CHIP_RAVEN: kernel_queue_init_v9(&kq->ops_asic_specific); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 684a3bf07efd..33830b1a5a54 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -71,8 +71,7 @@ static int pm_map_process_v9(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd) { struct pm4_mes_map_process *packet; - uint64_t vm_page_table_base_addr = - (uint64_t)(qpd->page_table_base) << 12; + uint64_t vm_page_table_base_addr = qpd->page_table_base; packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 3bc25ab84f34..e33019a7a883 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -39,6 +39,7 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, case CHIP_POLARIS11: return mqd_manager_init_vi_tonga(type, dev); case CHIP_VEGA10: + case CHIP_VEGA20: case CHIP_RAVEN: return mqd_manager_init_v9(type, dev); default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 4e84052d4e21..f8261313ae7b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -43,6 +43,9 @@ * * @is_occupied: Checks if the relevant HQD slot is occupied. * + * @get_wave_state: Retrieves context save state and optionally copies the + * control stack, if kept in the MQD, to the given userspace address. + * * @mqd_mutex: Mqd manager mutex. * * @dev: The kfd device structure coupled with this module. @@ -85,6 +88,11 @@ struct mqd_manager { uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); + int (*get_wave_state)(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); + #if defined(CONFIG_DEBUG_FS) int (*debugfs_show_mqd)(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 0cedb37cf513..f381c1cb27bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -266,6 +266,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, pipe_id, queue_id); } +static int get_wave_state(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct v9_mqd *m; + + /* Control stack is located one page after MQD. */ + void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); + + m = get_mqd(mqd); + + *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - + m->cp_hqd_cntl_stack_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset; + + if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) + return -EFAULT; + + return 0; +} + static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -435,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->get_wave_state = get_wave_state; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index b81fda3754da..6469b3456f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -269,6 +269,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, pipe_id, queue_id); } +static int get_wave_state(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct vi_mqd *m; + + m = get_mqd(mqd); + + *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - + m->cp_hqd_cntl_stack_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset - + m->cp_hqd_cntl_stack_size; + + /* Control stack is not copied to user mode for GFXv8 because + * it's part of the context save area that is already + * accessible to user mode + */ + + return 0; +} + static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -436,6 +458,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->get_wave_state = get_wave_state; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 1092631765cb..c6080ed3b6a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -229,6 +229,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) pm->pmf = &kfd_vi_pm_funcs; break; case CHIP_VEGA10: + case CHIP_VEGA20: case CHIP_RAVEN: pm->pmf = &kfd_v9_pm_funcs; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b0064b08aa11..53ff86d45d91 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -176,6 +176,7 @@ struct kfd_device_info { bool needs_iommu_device; bool needs_pci_atomics; unsigned int num_sdma_engines; + unsigned int num_sdma_queues_per_engine; }; struct kfd_mem_obj { @@ -247,6 +248,10 @@ struct kfd_dev { /* Debug manager */ struct kfd_dbgmgr *dbgmgr; + /* Firmware versions */ + uint16_t mec_fw_version; + uint16_t sdma_fw_version; + /* Maximum process number mapped to HW scheduler */ unsigned int max_proc_per_quantum; @@ -257,6 +262,8 @@ struct kfd_dev { /* xGMI */ uint64_t hive_id; + + bool pci_atomic_requested; }; /* KGD2KFD callbacks */ @@ -500,11 +507,11 @@ struct qcm_process_device { * All the memory management data should be here too */ uint64_t gds_context_area; + uint64_t page_table_base; uint32_t sh_mem_config; uint32_t sh_mem_bases; uint32_t sh_mem_ape1_base; uint32_t sh_mem_ape1_limit; - uint32_t page_table_base; uint32_t gds_size; uint32_t num_gws; uint32_t num_oac; @@ -856,6 +863,11 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); +int pqm_get_wave_state(struct process_queue_manager *pqm, + unsigned int qid, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c8cad9c078ae..fcaaf93681ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -408,6 +408,28 @@ struct kernel_queue *pqm_get_kernel_queue( return NULL; } +int pqm_get_wave_state(struct process_queue_manager *pqm, + unsigned int qid, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct process_queue_node *pqn; + + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { + pr_debug("amdkfd: No queue %d exists for operation\n", + qid); + return -EFAULT; + } + + return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm, + pqn->q, + ctl_stack, + ctl_stack_used_size, + save_area_used_size); +} + #if defined(CONFIG_DEBUG_FS) int pqm_debugfs_mqds(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 0dff66be8d7a..e3843c5929ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -482,11 +482,11 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, (unsigned long long int) 0); sysfs_show_32bit_prop(buffer, "fw_version", - dev->gpu->kfd2kgd->get_fw_version( - dev->gpu->kgd, - KGD_ENGINE_MEC1)); + dev->gpu->mec_fw_version); sysfs_show_32bit_prop(buffer, "capability", dev->node_props.capability); + sysfs_show_32bit_prop(buffer, "sdma_fw_version", + dev->gpu->sdma_fw_version); } return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", @@ -1127,17 +1127,40 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) { - struct kfd_iolink_properties *link; + struct kfd_iolink_properties *link, *cpu_link; + struct kfd_topology_device *cpu_dev; + uint32_t cap; + uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED; + uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED; if (!dev || !dev->gpu) return; - /* GPU only creates direck links so apply flags setting to all */ - if (dev->gpu->device_info->asic_family == CHIP_HAWAII) - list_for_each_entry(link, &dev->io_link_props, list) - link->flags = CRAT_IOLINK_FLAGS_ENABLED | - CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | - CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; + pcie_capability_read_dword(dev->gpu->pdev, + PCI_EXP_DEVCAP2, &cap); + + if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64))) + cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | + CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; + + if (!dev->gpu->pci_atomic_requested || + dev->gpu->device_info->asic_family == CHIP_HAWAII) + flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | + CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; + + /* GPU only creates direct links so apply flags setting to all */ + list_for_each_entry(link, &dev->io_link_props, list) { + link->flags = flag; + cpu_dev = kfd_topology_device_by_proximity_domain( + link->node_to); + if (cpu_dev) { + list_for_each_entry(cpu_link, + &cpu_dev->io_link_props, list) + if (cpu_link->node_to == link->node_from) + cpu_link->flags = cpu_flag; + } + } } int kfd_topology_add_device(struct kfd_dev *gpu) @@ -1255,6 +1278,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; case CHIP_VEGA10: + case CHIP_VEGA20: case CHIP_RAVEN: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6c849e055038..715422bb30db 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -338,14 +338,6 @@ static int dm_set_powergating_state(void *handle, /* Prototypes of private functions */ static int dm_early_init(void* handle); -static void hotplug_notify_work_func(struct work_struct *work) -{ - struct amdgpu_display_manager *dm = container_of(work, struct amdgpu_display_manager, mst_hotplug_work); - struct drm_device *dev = dm->ddev; - - drm_kms_helper_hotplug_event(dev); -} - /* Allocate memory for FBC compressed data */ static void amdgpu_dm_fbc_init(struct drm_connector *connector) { @@ -447,8 +439,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } - INIT_WORK(&adev->dm.mst_hotplug_work, hotplug_notify_work_func); - adev->dm.freesync_module = mod_freesync_create(adev->dm.dc); if (!adev->dm.freesync_module) { DRM_ERROR( @@ -1214,7 +1204,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) struct dc_interrupt_params int_params = {0}; int r; int i; - unsigned client_id = AMDGPU_IH_CLIENTID_LEGACY; + unsigned client_id = AMDGPU_IRQ_CLIENTID_LEGACY; if (adev->asic_type == CHIP_VEGA10 || adev->asic_type == CHIP_VEGA12 || @@ -4079,6 +4069,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, /* TODO eliminate or rename surface_update */ struct dc_surface_update surface_updates[1] = { {0} }; struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); + struct dc_stream_status *stream_status; /* Prepare wait for target vblank early - before the fence-waits */ @@ -4134,7 +4125,19 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - surface_updates->surface = dc_stream_get_status(acrtc_state->stream)->plane_states[0]; + stream_status = dc_stream_get_status(acrtc_state->stream); + if (!stream_status) { + DRM_ERROR("No stream status for CRTC: id=%d\n", + acrtc->crtc_id); + return; + } + + surface_updates->surface = stream_status->plane_states[0]; + if (!surface_updates->surface) { + DRM_ERROR("No surface for CRTC: id=%d\n", + acrtc->crtc_id); + return; + } surface_updates->flip_addr = &addr; dc_commit_updates_for_stream(adev->dm.dc, @@ -4797,6 +4800,8 @@ void set_freesync_on_stream(struct amdgpu_display_manager *dm, mod_freesync_build_vrr_infopacket(dm->freesync_module, new_stream, &vrr, + packet_type_fs1, + NULL, &vrr_infopacket); new_crtc_state->adjust = vrr.adjust; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index d4f1bdf93207..978b34a5011c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -108,8 +108,6 @@ struct amdgpu_display_manager { const struct dc_link *backlight_link; - struct work_struct mst_hotplug_work; - struct mod_freesync *freesync_module; /** diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 6d16b4a0353d..0fab64a2a915 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -105,6 +105,8 @@ bool dm_pp_apply_display_requirements( adev->powerplay.pp_funcs->display_configuration_change( adev->powerplay.pp_handle, &adev->pm.pm_display_cfg); + + amdgpu_pm_compute_clocks(adev); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c index 5e2ea12fbb73..d0fc54f8fb1c 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c @@ -1625,11 +1625,11 @@ void dispclkdppclkdcfclk_deep_sleep_prefetch_parameters_watermarks_and_performan else { v->dsty_after_scaler = 0.0; } - v->v_update_offset_pix =dcn_bw_ceil2(v->htotal[k] / 4.0, 1.0); + v->v_update_offset_pix[k] = dcn_bw_ceil2(v->htotal[k] / 4.0, 1.0); v->total_repeater_delay_time = v->max_inter_dcn_tile_repeaters * (2.0 / v->dppclk + 3.0 / v->dispclk); - v->v_update_width_pix = (14.0 / v->dcf_clk_deep_sleep + 12.0 / v->dppclk + v->total_repeater_delay_time) * v->pixel_clock[k]; - v->v_ready_offset_pix =dcn_bw_max2(150.0 / v->dppclk, v->total_repeater_delay_time + 20.0 / v->dcf_clk_deep_sleep + 10.0 / v->dppclk) * v->pixel_clock[k]; - v->t_setup = (v->v_update_offset_pix + v->v_update_width_pix + v->v_ready_offset_pix) / v->pixel_clock[k]; + v->v_update_width_pix[k] = (14.0 / v->dcf_clk_deep_sleep + 12.0 / v->dppclk + v->total_repeater_delay_time) * v->pixel_clock[k]; + v->v_ready_offset_pix[k] = dcn_bw_max2(150.0 / v->dppclk, v->total_repeater_delay_time + 20.0 / v->dcf_clk_deep_sleep + 10.0 / v->dppclk) * v->pixel_clock[k]; + v->t_setup = (v->v_update_offset_pix[k] + v->v_update_width_pix[k] + v->v_ready_offset_pix[k]) / v->pixel_clock[k]; v->v_startup[k] =dcn_bw_min2(v->v_startup_lines, v->max_vstartup_lines[k]); if (v->prefetch_mode == 0.0) { v->t_wait =dcn_bw_max3(v->dram_clock_change_latency + v->urgent_latency, v->sr_enter_plus_exit_time, v->urgent_latency); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 80ec09eef44f..3208188b7ed4 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -1096,9 +1096,9 @@ bool dcn_validate_bandwidth( if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) continue; - pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; - pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; - pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; + pipe->pipe_dlg_param.vupdate_width = v->v_update_width_pix[input_idx]; + pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset_pix[input_idx]; + pipe->pipe_dlg_param.vready_offset = v->v_ready_offset_pix[input_idx]; pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx]; pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total; @@ -1137,9 +1137,9 @@ bool dcn_validate_bandwidth( TIMING_3D_FORMAT_SIDE_BY_SIDE))) { if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { /* update previously split pipe */ - hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; - hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; - hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; + hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width_pix[input_idx]; + hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset_pix[input_idx]; + hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset_pix[input_idx]; hsplit_pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx]; hsplit_pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 1c438eedf77a..76fe5a9af3bf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -460,9 +460,25 @@ void dc_link_set_preferred_link_settings(struct dc *dc, struct dc_link_settings *link_setting, struct dc_link *link) { + int i; + struct pipe_ctx *pipe; + struct dc_stream_state *link_stream; struct dc_link_settings store_settings = *link_setting; - struct dc_stream_state *link_stream = - link->dc->current_state->res_ctx.pipe_ctx[0].stream; + + for (i = 0; i < MAX_PIPES; i++) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->stream && pipe->stream->sink + && pipe->stream->sink->link) { + if (pipe->stream->sink->link == link) + break; + } + } + + /* Stream not found */ + if (i == MAX_PIPES) + return; + + link_stream = link->dc->current_state->res_ctx.pipe_ctx[i].stream; link->preferred_link_setting = store_settings; if (link_stream) @@ -1160,9 +1176,6 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa */ update_flags->bits.bpp_change = 1; - if (u->gamma && dce_use_lut(u->plane_info->format)) - update_flags->bits.gamma_change = 1; - if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, sizeof(union dc_tiling_info)) != 0) { update_flags->bits.swizzle_change = 1; @@ -1179,7 +1192,6 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa if (update_flags->bits.rotation_change || update_flags->bits.stereo_format_change || update_flags->bits.pixel_format_change - || update_flags->bits.gamma_change || update_flags->bits.bpp_change || update_flags->bits.bandwidth_change || update_flags->bits.output_tf_change) @@ -1269,13 +1281,26 @@ static enum surface_update_type det_surface_update(const struct dc *dc, if (u->coeff_reduction_factor) update_flags->bits.coeff_reduction_change = 1; + if (u->gamma) { + enum surface_pixel_format format = SURFACE_PIXEL_FORMAT_GRPH_BEGIN; + + if (u->plane_info) + format = u->plane_info->format; + else if (u->surface) + format = u->surface->format; + + if (dce_use_lut(format)) + update_flags->bits.gamma_change = 1; + } + if (update_flags->bits.in_transfer_func_change) { type = UPDATE_TYPE_MED; elevate_update_type(&overall_type, type); } if (update_flags->bits.input_csc_change - || update_flags->bits.coeff_reduction_change) { + || update_flags->bits.coeff_reduction_change + || update_flags->bits.gamma_change) { type = UPDATE_TYPE_FULL; elevate_update_type(&overall_type, type); } @@ -1379,7 +1404,7 @@ static void notify_display_count_to_smu( * sent as part of pplib_apply_display_requirements. * So just return. */ - if (!pp_smu->set_display_count) + if (!pp_smu || !pp_smu->set_display_count) return; display_count = 0; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index bd58dbae7d3e..9f9503a9b9aa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2559,23 +2559,24 @@ void core_link_enable_stream( pipe_ctx->stream_res.stream_enc, &stream->timing); - resource_build_info_frame(pipe_ctx); - core_dc->hwss.update_info_frame(pipe_ctx); - - /* eDP lit up by bios already, no need to enable again. */ - if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && - pipe_ctx->stream->apply_edp_fast_boot_optimization) { - pipe_ctx->stream->apply_edp_fast_boot_optimization = false; - pipe_ctx->stream->dpms_off = false; - return; - } + if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { + resource_build_info_frame(pipe_ctx); + core_dc->hwss.update_info_frame(pipe_ctx); + + /* eDP lit up by bios already, no need to enable again. */ + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + pipe_ctx->stream->apply_edp_fast_boot_optimization) { + pipe_ctx->stream->apply_edp_fast_boot_optimization = false; + pipe_ctx->stream->dpms_off = false; + return; + } - if (pipe_ctx->stream->dpms_off) - return; + if (pipe_ctx->stream->dpms_off) + return; - status = enable_link(state, pipe_ctx); + status = enable_link(state, pipe_ctx); - if (status != DC_OK) { + if (status != DC_OK) { DC_LOG_WARNING("enabling link %u failed: %d\n", pipe_ctx->stream->sink->link->link_index, status); @@ -2590,23 +2591,26 @@ void core_link_enable_stream( BREAK_TO_DEBUGGER(); return; } - } + } - core_dc->hwss.enable_audio_stream(pipe_ctx); + core_dc->hwss.enable_audio_stream(pipe_ctx); - /* turn off otg test pattern if enable */ - if (pipe_ctx->stream_res.tg->funcs->set_test_pattern) - pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg, - CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, - COLOR_DEPTH_UNDEFINED); + /* turn off otg test pattern if enable */ + if (pipe_ctx->stream_res.tg->funcs->set_test_pattern) + pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg, + CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, + COLOR_DEPTH_UNDEFINED); - core_dc->hwss.enable_stream(pipe_ctx); + core_dc->hwss.enable_stream(pipe_ctx); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) - allocate_mst_payload(pipe_ctx); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) + allocate_mst_payload(pipe_ctx); + + core_dc->hwss.unblank_stream(pipe_ctx, + &pipe_ctx->stream->sink->link->cur_link_settings); + + } - core_dc->hwss.unblank_stream(pipe_ctx, - &pipe_ctx->stream->sink->link->cur_link_settings); } void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7691139363a9..11ea2a226952 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -38,7 +38,7 @@ #include "inc/compressor.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.1.66" +#define DC_VER "3.1.67" #define MAX_SURFACES 3 #define MAX_STREAMS 6 diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c index bf6261a1584b..d42afa081452 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c @@ -468,6 +468,9 @@ static void dce12_update_clocks(struct dccg *dccg, { struct dm_pp_clock_for_voltage_req clock_voltage_req = {0}; + /* TODO: Investigate why this is needed to fix display corruption. */ + new_clocks->dispclk_khz = new_clocks->dispclk_khz * 115 / 100; + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, dccg->clks.dispclk_khz)) { clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAY_CLK; clock_voltage_req.clocks_in_khz = new_clocks->dispclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index dc1eed5ba996..6b7cccc486d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1377,26 +1377,13 @@ static enum dc_status apply_single_controller_ctx_to_hw( /* */ dc->hwss.enable_stream_timing(pipe_ctx, context, dc); - /* FPGA does not program backend */ - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( - pipe_ctx->stream_res.opp, - COLOR_SPACE_YCBCR601, - stream->timing.display_color_depth, - pipe_ctx->stream->signal); - - pipe_ctx->stream_res.opp->funcs->opp_program_fmt( - pipe_ctx->stream_res.opp, - &stream->bit_depth_params, - &stream->clamping); - return DC_OK; - } /* TODO: move to stream encoder */ if (pipe_ctx->stream->signal != SIGNAL_TYPE_VIRTUAL) if (DC_OK != bios_parser_crtc_source_select(pipe_ctx)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; } + pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion( pipe_ctx->stream_res.opp, COLOR_SPACE_YCBCR601, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 6bd4ec39f869..a881ff5559ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -44,6 +44,7 @@ #include "dcn10_hubp.h" #include "dcn10_hubbub.h" #include "dcn10_cm_common.h" +#include "dc_link_dp.h" #define DC_LOGGER_INIT(logger) diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index ddbb673caa08..e688eb9b975c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -504,10 +504,10 @@ struct dcn_bw_internal_vars { float prefetch_mode; float dstx_after_scaler; float dsty_after_scaler; - float v_update_offset_pix; + float v_update_offset_pix[number_of_planes_minus_one + 1]; float total_repeater_delay_time; - float v_update_width_pix; - float v_ready_offset_pix; + float v_update_width_pix[number_of_planes_minus_one + 1]; + float v_ready_offset_pix[number_of_planes_minus_one + 1]; float t_setup; float t_wait; float bandwidth_available_for_immediate_flip; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index e1688902a1b0..4018c7180d00 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -480,22 +480,11 @@ bool mod_freesync_get_v_position(struct mod_freesync *mod_freesync, return false; } -void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, - const struct dc_stream_state *stream, - const struct mod_vrr_params *vrr, - struct dc_info_packet *infopacket) +static void build_vrr_infopacket_header_v1(enum signal_type signal, + struct dc_info_packet *infopacket, + unsigned int *payload_size) { - /* SPD info packet for FreeSync */ - unsigned char checksum = 0; - unsigned int idx, payload_size = 0; - - /* Check if Freesync is supported. Return if false. If true, - * set the corresponding bit in the info packet - */ - if (!vrr->supported || !vrr->send_vsif) - return; - - if (dc_is_hdmi_signal(stream->signal)) { + if (dc_is_hdmi_signal(signal)) { /* HEADER */ @@ -510,9 +499,9 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x08] */ infopacket->hb2 = 0x08; - payload_size = 0x08; + *payload_size = 0x08; - } else if (dc_is_dp_signal(stream->signal)) { + } else if (dc_is_dp_signal(signal)) { /* HEADER */ @@ -536,9 +525,62 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, */ infopacket->hb3 = 0x04; - payload_size = 0x1B; + *payload_size = 0x1B; } +} + +static void build_vrr_infopacket_header_v2(enum signal_type signal, + struct dc_info_packet *infopacket, + unsigned int *payload_size) +{ + if (dc_is_hdmi_signal(signal)) { + + /* HEADER */ + + /* HB0 = Packet Type = 0x83 (Source Product + * Descriptor InfoFrame) + */ + infopacket->hb0 = DC_HDMI_INFOFRAME_TYPE_SPD; + + /* HB1 = Version = 0x02 */ + infopacket->hb1 = 0x02; + + /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */ + infopacket->hb2 = 0x09; + + *payload_size = 0x0A; + } else if (dc_is_dp_signal(signal)) { + + /* HEADER */ + + /* HB0 = Secondary-data Packet ID = 0 - Only non-zero + * when used to associate audio related info packets + */ + infopacket->hb0 = 0x00; + + /* HB1 = Packet Type = 0x83 (Source Product + * Descriptor InfoFrame) + */ + infopacket->hb1 = DC_HDMI_INFOFRAME_TYPE_SPD; + + /* HB2 = [Bits 7:0 = Least significant eight bits - + * For INFOFRAME, the value must be 1Bh] + */ + infopacket->hb2 = 0x1B; + + /* HB3 = [Bits 7:2 = INFOFRAME SDP Version Number = 0x2] + * [Bits 1:0 = Most significant two bits = 0x00] + */ + infopacket->hb3 = 0x08; + + *payload_size = 0x1B; + } +} + +static void build_vrr_infopacket_data(const struct mod_vrr_params *vrr, + struct dc_info_packet *infopacket) +{ /* PB1 = 0x1A (24bit AMD IEEE OUI (0x00001A) - Byte 0) */ infopacket->sb[1] = 0x1A; @@ -576,15 +618,39 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, */ infopacket->sb[8] = (unsigned char)(vrr->max_refresh_in_uhz / 1000000); - /* PB9 - PB27 = Reserved */ + //FreeSync HDR + infopacket->sb[9] = 0; + infopacket->sb[10] = 0; +} + +static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, + struct dc_info_packet *infopacket) +{ + if (app_tf != transfer_func_unknown) { + infopacket->valid = true; + + infopacket->sb[6] |= 0x08; // PB6 = [Bit 3 = Native Color Active] + + if (app_tf == transfer_func_gamma_22) { + infopacket->sb[9] |= 0x04; // PB6 = [Bit 2 = Gamma 2.2 EOTF Active] + } + } +} + +static void build_vrr_infopacket_checksum(unsigned int *payload_size, + struct dc_info_packet *infopacket) +{ /* Calculate checksum */ + unsigned int idx = 0; + unsigned char checksum = 0; + checksum += infopacket->hb0; checksum += infopacket->hb1; checksum += infopacket->hb2; checksum += infopacket->hb3; - for (idx = 1; idx <= payload_size; idx++) + for (idx = 1; idx <= *payload_size; idx++) checksum += infopacket->sb[idx]; /* PB0 = Checksum (one byte complement) */ @@ -593,6 +659,64 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, infopacket->valid = true; } +static void build_vrr_infopacket_v1(enum signal_type signal, + const struct mod_vrr_params *vrr, + struct dc_info_packet *infopacket) +{ + /* SPD info packet for FreeSync */ + unsigned int payload_size = 0; + + build_vrr_infopacket_header_v1(signal, infopacket, &payload_size); + build_vrr_infopacket_data(vrr, infopacket); + build_vrr_infopacket_checksum(&payload_size, infopacket); + + infopacket->valid = true; +} + +static void build_vrr_infopacket_v2(enum signal_type signal, + const struct mod_vrr_params *vrr, + const enum color_transfer_func *app_tf, + struct dc_info_packet *infopacket) +{ + unsigned int payload_size = 0; + + build_vrr_infopacket_header_v2(signal, infopacket, &payload_size); + build_vrr_infopacket_data(vrr, infopacket); + + if (app_tf != NULL) + build_vrr_infopacket_fs2_data(*app_tf, infopacket); + + build_vrr_infopacket_checksum(&payload_size, infopacket); + + infopacket->valid = true; +} + +void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, + const struct dc_stream_state *stream, + const struct mod_vrr_params *vrr, + enum vrr_packet_type packet_type, + const enum color_transfer_func *app_tf, + struct dc_info_packet *infopacket) +{ + /* SPD info packet for FreeSync */ + + /* Check if Freesync is supported. Return if false. If true, + * set the corresponding bit in the info packet + */ + if (!vrr->supported || !vrr->send_vsif) + return; + + switch (packet_type) { + case packet_type_fs2: + build_vrr_infopacket_v2(stream->signal, vrr, app_tf, infopacket); + break; + case packet_type_vrr: + case packet_type_fs1: + default: + build_vrr_infopacket_v1(stream->signal, vrr, infopacket); + } +} + void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, const struct dc_stream_state *stream, struct mod_freesync_config *in_config, diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index a0f32cde721c..949a8b62aa98 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -54,7 +54,7 @@ #ifndef MOD_FREESYNC_H_ #define MOD_FREESYNC_H_ -#include "dm_services.h" +#include "mod_shared.h" // Access structures struct mod_freesync { @@ -144,6 +144,8 @@ void mod_freesync_get_settings(struct mod_freesync *mod_freesync, void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, const struct dc_stream_state *stream, const struct mod_vrr_params *vrr, + enum vrr_packet_type packet_type, + const enum color_transfer_func *app_tf, struct dc_info_packet *infopacket); void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h new file mode 100644 index 000000000000..238c431ae483 --- /dev/null +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h @@ -0,0 +1,49 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#ifndef MOD_SHARED_H_ +#define MOD_SHARED_H_ + +enum color_transfer_func { + transfer_func_unknown, + transfer_func_srgb, + transfer_func_bt709, + transfer_func_pq2084, + transfer_func_pq2084_interim, + transfer_func_linear_0_1, + transfer_func_linear_0_125, + transfer_func_dolbyvision, + transfer_func_gamma_22, + transfer_func_gamma_26 +}; + +enum vrr_packet_type { + packet_type_vrr, + packet_type_fs1, + packet_type_fs2 +}; + +#endif /* MOD_SHARED_H_ */ diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 52378fc69079..ff8bfb9b43b0 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -48,9 +48,12 @@ static void mod_build_vsc_infopacket(const struct dc_stream_state *stream, unsigned int i; unsigned int pixelEncoding = 0; unsigned int colorimetryFormat = 0; + bool stereo3dSupport = false; - if (stream->timing.timing_3d_format != TIMING_3D_FORMAT_NONE && stream->view_format != VIEW_3D_FORMAT_NONE) + if (stream->timing.timing_3d_format != TIMING_3D_FORMAT_NONE && stream->view_format != VIEW_3D_FORMAT_NONE) { vscPacketRevision = 1; + stereo3dSupport = true; + } /*VSC packet set to 2 when DP revision >= 1.2*/ if (stream->psr_version != 0) @@ -94,12 +97,59 @@ static void mod_build_vsc_infopacket(const struct dc_stream_state *stream, info_packet->hb2 = 0x01; // 01h = Revision number. VSC SDP supporting 3D stereo only info_packet->hb3 = 0x01; // 01h = VSC SDP supporting 3D stereo only (HB2 = 01h). - if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_INBAND_FA) - info_packet->sb[0] = 0x1; - info_packet->valid = true; } + if (stereo3dSupport) { + /* ==============================================================================================================| + * A. STEREO 3D + * ==============================================================================================================| + * VSC Payload (1 byte) From DP1.2 spec + * + * Bits 3:0 (Stereo Interface Method Code) | Bits 7:4 (Stereo Interface Method Specific Parameter) + * ----------------------------------------------------------------------------------------------------- + * 0 = Non Stereo Video | Must be set to 0x0 + * ----------------------------------------------------------------------------------------------------- + * 1 = Frame/Field Sequential | 0x0: L + R view indication based on MISC1 bit 2:1 + * | 0x1: Right when Stereo Signal = 1 + * | 0x2: Left when Stereo Signal = 1 + * | (others reserved) + * ----------------------------------------------------------------------------------------------------- + * 2 = Stacked Frame | 0x0: Left view is on top and right view on bottom + * | (others reserved) + * ----------------------------------------------------------------------------------------------------- + * 3 = Pixel Interleaved | 0x0: horiz interleaved, right view pixels on even lines + * | 0x1: horiz interleaved, right view pixels on odd lines + * | 0x2: checker board, start with left view pixel + * | 0x3: vertical interleaved, start with left view pixels + * | 0x4: vertical interleaved, start with right view pixels + * | (others reserved) + * ----------------------------------------------------------------------------------------------------- + * 4 = Side-by-side | 0x0: left half represents left eye view + * | 0x1: left half represents right eye view + */ + switch (stream->timing.timing_3d_format) { + case TIMING_3D_FORMAT_HW_FRAME_PACKING: + case TIMING_3D_FORMAT_SW_FRAME_PACKING: + case TIMING_3D_FORMAT_TOP_AND_BOTTOM: + case TIMING_3D_FORMAT_TB_SW_PACKED: + info_packet->sb[0] = 0x02; // Stacked Frame, Left view is on top and right view on bottom. + break; + case TIMING_3D_FORMAT_DP_HDMI_INBAND_FA: + case TIMING_3D_FORMAT_INBAND_FA: + info_packet->sb[0] = 0x01; // Frame/Field Sequential, L + R view indication based on MISC1 bit 2:1 + break; + case TIMING_3D_FORMAT_SIDE_BY_SIDE: + case TIMING_3D_FORMAT_SBS_SW_PACKED: + info_packet->sb[0] = 0x04; // Side-by-side + break; + default: + info_packet->sb[0] = 0x00; // No Stereo Video, Shall be cleared to 0x0. + break; + } + + } + /* 05h = VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/Colorimetry Format indication. * Added in DP1.3, a DP Source device is allowed to indicate the pixel encoding/colorimetry * format to the DP Sink device with VSC SDP only when the DP Sink device supports it diff --git a/drivers/gpu/drm/amd/display/modules/stats/stats.c b/drivers/gpu/drm/amd/display/modules/stats/stats.c index 3d4c1b1ab8c4..03121ca64fe4 100644 --- a/drivers/gpu/drm/amd/display/modules/stats/stats.c +++ b/drivers/gpu/drm/amd/display/modules/stats/stats.c @@ -186,12 +186,8 @@ void mod_stats_destroy(struct mod_stats *mod_stats) if (mod_stats != NULL) { struct core_stats *core_stats = MOD_STATS_TO_CORE(mod_stats); - if (core_stats->time != NULL) - kfree(core_stats->time); - - if (core_stats->events != NULL) - kfree(core_stats->events); - + kfree(core_stats->time); + kfree(core_stats->events); kfree(core_stats); } } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 86b167ec9863..2083c308007c 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -109,6 +109,7 @@ enum amd_powergating_state { #define AMD_PG_SUPPORT_GFX_PIPELINE (1 << 12) #define AMD_PG_SUPPORT_MMHUB (1 << 13) #define AMD_PG_SUPPORT_VCN (1 << 14) +#define AMD_PG_SUPPORT_VCN_DPG (1 << 15) enum PP_FEATURE_MASK { PP_SCLK_DPM_MASK = 0x1, diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h index 216a401028de..4b7da589e14a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h @@ -33,6 +33,14 @@ #define mmUVD_POWER_STATUS_BASE_IDX 1 #define mmCC_UVD_HARVESTING 0x00c7 #define mmCC_UVD_HARVESTING_BASE_IDX 1 +#define mmUVD_DPG_LMA_CTL 0x00d1 +#define mmUVD_DPG_LMA_CTL_BASE_IDX 1 +#define mmUVD_DPG_LMA_DATA 0x00d2 +#define mmUVD_DPG_LMA_DATA_BASE_IDX 1 +#define mmUVD_DPG_LMA_MASK 0x00d3 +#define mmUVD_DPG_LMA_MASK_BASE_IDX 1 +#define mmUVD_DPG_PAUSE 0x00d4 +#define mmUVD_DPG_PAUSE_BASE_IDX 1 #define mmUVD_SCRATCH1 0x00d5 #define mmUVD_SCRATCH1_BASE_IDX 1 #define mmUVD_SCRATCH2 0x00d6 diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_sh_mask.h index 124383dac284..26382f5d5354 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_sh_mask.h @@ -87,6 +87,26 @@ //CC_UVD_HARVESTING #define CC_UVD_HARVESTING__UVD_DISABLE__SHIFT 0x1 #define CC_UVD_HARVESTING__UVD_DISABLE_MASK 0x00000002L +//UVD_DPG_LMA_CTL +#define UVD_DPG_LMA_CTL__READ_WRITE__SHIFT 0x0 +#define UVD_DPG_LMA_CTL__MASK_EN__SHIFT 0x1 +#define UVD_DPG_LMA_CTL__ADDR_AUTO_INCREMENT__SHIFT 0x2 +#define UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT 0x4 +#define UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT 0x10 +#define UVD_DPG_LMA_CTL__READ_WRITE_MASK 0x00000001L +#define UVD_DPG_LMA_CTL__MASK_EN_MASK 0x00000002L +#define UVD_DPG_LMA_CTL__ADDR_AUTO_INCREMENT_MASK 0x00000004L +#define UVD_DPG_LMA_CTL__SRAM_SEL_MASK 0x00000010L +#define UVD_DPG_LMA_CTL__READ_WRITE_ADDR_MASK 0xFFFF0000L +//UVD_DPG_PAUSE +#define UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ__SHIFT 0x0 +#define UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK__SHIFT 0x1 +#define UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ__SHIFT 0x2 +#define UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK__SHIFT 0x3 +#define UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK 0x00000001L +#define UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK 0x00000002L +#define UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK 0x00000004L +#define UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK 0x00000008L //UVD_SCRATCH1 #define UVD_SCRATCH1__SCRATCH1_DATA__SHIFT 0x0 #define UVD_SCRATCH1__SCRATCH1_DATA_MASK 0xFFFFFFFFL @@ -983,6 +1003,7 @@ #define UVD_MASTINT_EN__SYS_EN_MASK 0x00000004L #define UVD_MASTINT_EN__INT_OVERRUN_MASK 0x007FFFF0L //UVD_SYS_INT_EN +#define UVD_SYS_INT_EN__UVD_JRBC_EN__SHIFT 0x4 #define UVD_SYS_INT_EN__UVD_JRBC_EN_MASK 0x00000010L //JPEG_CGC_CTRL #define JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT 0x0 @@ -1138,7 +1159,11 @@ #define UVD_VCPU_CACHE_SIZE2__CACHE_SIZE2_MASK 0x001FFFFFL //UVD_VCPU_CNTL #define UVD_VCPU_CNTL__CLK_EN__SHIFT 0x9 +#define UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP__SHIFT 0x11 +#define UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT 0x14 #define UVD_VCPU_CNTL__CLK_EN_MASK 0x00000200L +#define UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK 0x00020000L +#define UVD_VCPU_CNTL__PRB_TIMEOUT_VAL_MASK 0x0FF00000L //UVD_SOFT_RESET #define UVD_SOFT_RESET__RBC_SOFT_RESET__SHIFT 0x0 #define UVD_SOFT_RESET__LBSI_SOFT_RESET__SHIFT 0x1 diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index f43ed96cfa6c..64ecffd52126 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -146,10 +146,10 @@ struct kgd2kfd_shared_resources { * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val * * KFD currently uses 1024 (= 0x3ff) doorbells per process. If - * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means - * mask would be set to 0x1f8 and val set to 0x0f0. + * doorbells 0x0e0-0x0ff and 0x2e0-0x2ff are reserved, that means + * mask would be set to 0x1e0 and val set to 0x0e0. */ - unsigned int sdma_doorbell[2][2]; + unsigned int sdma_doorbell[2][8]; unsigned int reserved_doorbell_mask; unsigned int reserved_doorbell_val; @@ -409,9 +409,9 @@ struct kfd2kgd_calls { struct dma_fence **ef); void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); void (*release_process_vm)(struct kgd_dev *kgd, void *vm); - uint32_t (*get_process_page_dir)(void *vm); + uint64_t (*get_process_page_dir)(void *vm); void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, - uint32_t vmid, uint32_t page_table_base); + uint32_t vmid, uint64_t page_table_base); int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va, uint64_t size, void *vm, struct kgd_mem **mem, uint64_t *offset, diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 448dee481a38..bd7404532029 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -113,6 +113,7 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_GPU_POWER, AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, + AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, }; enum amd_pp_task { diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c index d27c1c9df286..4588bddf8b33 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c @@ -488,7 +488,8 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr, return 0; } -int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency) +int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, + uint8_t id, uint32_t *frequency) { struct amdgpu_device *adev = hwmgr->adev; struct atom_get_smu_clock_info_parameters_v3_1 parameters; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h index 22e21668c93a..fe9e8ceef50e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h @@ -236,7 +236,7 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr, struct pp_atomfwctrl_smc_dpm_parameters *param); int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, - BIOS_CLKID id, uint32_t *frequency); + uint8_t id, uint32_t *frequency); #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 9808bd48b386..5d1dae25a466 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -552,6 +552,8 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, { struct smu10_hwmgr *data = hwmgr->backend; struct amdgpu_device *adev = hwmgr->adev; + uint32_t min_sclk = hwmgr->display_config->min_core_set_clock; + uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -563,6 +565,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, (adev->rev_id >= 8)) return 0; + if (min_sclk < data->gfx_min_freq_limit) + min_sclk = data->gfx_min_freq_limit; + + min_sclk /= 100; /* transfer 10KHz to MHz */ + if (min_mclk < data->clock_table.FClocks[0].Freq) + min_mclk = data->clock_table.FClocks[0].Freq; + switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: @@ -595,18 +604,18 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - data->gfx_min_freq_limit/100); + min_sclk); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - data->gfx_min_freq_limit/100); + min_sclk); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, - SMU10_UMD_PSTATE_MIN_FCLK); + min_mclk); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - SMU10_UMD_PSTATE_MIN_FCLK); + min_mclk); break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: smum_send_msg_to_smc_with_parameter(hwmgr, @@ -638,12 +647,12 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_AUTO: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - data->gfx_min_freq_limit/100); + min_sclk); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? SMU10_UMD_PSTATE_PEAK_FCLK : - SMU10_UMD_PSTATE_MIN_FCLK); + min_mclk); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, @@ -674,10 +683,10 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, - SMU10_UMD_PSTATE_MIN_FCLK); + min_mclk); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - SMU10_UMD_PSTATE_MIN_FCLK); + min_mclk); break; case AMD_DPM_FORCED_LEVEL_MANUAL: case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 04b7da0e39a6..6c99cbf51c08 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -4106,17 +4106,17 @@ static int smu7_register_irq_handlers(struct pp_hwmgr *hwmgr) source->funcs = &smu7_irq_funcs; amdgpu_irq_add_id((struct amdgpu_device *)(hwmgr->adev), - AMDGPU_IH_CLIENTID_LEGACY, + AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH, source); amdgpu_irq_add_id((struct amdgpu_device *)(hwmgr->adev), - AMDGPU_IH_CLIENTID_LEGACY, + AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW, source); /* Register CTF(GPIO_19) interrupt */ amdgpu_irq_add_id((struct amdgpu_device *)(hwmgr->adev), - AMDGPU_IH_CLIENTID_LEGACY, + AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GPIO_19, source); @@ -5035,6 +5035,18 @@ static int smu7_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw return 0; } +static int smu7_power_off_asic(struct pp_hwmgr *hwmgr) +{ + int result; + + result = smu7_disable_dpm_tasks(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "[disable_dpm_tasks] Failed to disable DPM!", + ); + + return result; +} + static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .backend_init = &smu7_hwmgr_backend_init, .backend_fini = &smu7_hwmgr_backend_fini, @@ -5092,6 +5104,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .get_power_profile_mode = smu7_get_power_profile_mode, .set_power_profile_mode = smu7_set_power_profile_mode, .get_performance_level = smu7_get_performance_level, + .power_off_asic = smu7_power_off_asic, }; uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c index b8637049198d..53cf787560f7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c @@ -880,7 +880,7 @@ static int smu8_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) smu8_update_low_mem_pstate(hwmgr, input); return 0; -}; +} static int smu8_setup_asic_task(struct pp_hwmgr *hwmgr) @@ -934,14 +934,6 @@ static void smu8_reset_cc6_data(struct pp_hwmgr *hwmgr) hw_data->cc6_settings.cpu_pstate_disable = false; } -static int smu8_power_off_asic(struct pp_hwmgr *hwmgr) -{ - smu8_power_up_display_clock_sys_pll(hwmgr); - smu8_clear_nb_dpm_flag(hwmgr); - smu8_reset_cc6_data(hwmgr); - return 0; -}; - static void smu8_program_voting_clients(struct pp_hwmgr *hwmgr) { cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, @@ -1011,6 +1003,17 @@ static void smu8_reset_acp_boot_level(struct pp_hwmgr *hwmgr) data->acp_boot_level = 0xff; } +static int smu8_enable_dpm_tasks(struct pp_hwmgr *hwmgr) +{ + smu8_program_voting_clients(hwmgr); + if (smu8_start_dpm(hwmgr)) + return -EINVAL; + smu8_program_bootup_state(hwmgr); + smu8_reset_acp_boot_level(hwmgr); + + return 0; +} + static int smu8_disable_dpm_tasks(struct pp_hwmgr *hwmgr) { smu8_disable_nb_dpm(hwmgr); @@ -1020,18 +1023,16 @@ static int smu8_disable_dpm_tasks(struct pp_hwmgr *hwmgr) return -EINVAL; return 0; -}; +} -static int smu8_enable_dpm_tasks(struct pp_hwmgr *hwmgr) +static int smu8_power_off_asic(struct pp_hwmgr *hwmgr) { - smu8_program_voting_clients(hwmgr); - if (smu8_start_dpm(hwmgr)) - return -EINVAL; - smu8_program_bootup_state(hwmgr); - smu8_reset_acp_boot_level(hwmgr); - + smu8_disable_dpm_tasks(hwmgr); + smu8_power_up_display_clock_sys_pll(hwmgr); + smu8_clear_nb_dpm_flag(hwmgr); + smu8_reset_cc6_data(hwmgr); return 0; -}; +} static int smu8_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *prequest_ps, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c index 2aab1b475945..8ad4e6960efd 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c @@ -545,7 +545,7 @@ int phm_irq_process(struct amdgpu_device *adev, uint32_t client_id = entry->client_id; uint32_t src_id = entry->src_id; - if (client_id == AMDGPU_IH_CLIENTID_LEGACY) { + if (client_id == AMDGPU_IRQ_CLIENTID_LEGACY) { if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH) pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n", PCI_BUS_NUM(adev->pdev->devfn), diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index ca9be583fb62..419a1d77d661 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -39,6 +39,7 @@ #include "soc15_common.h" #include "pppcielanes.h" #include "vega10_hwmgr.h" +#include "vega10_smumgr.h" #include "vega10_processpptables.h" #include "vega10_pptable.h" #include "vega10_thermal.h" @@ -3713,6 +3714,11 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT; *((uint32_t *)value) = (uint32_t)convert_to_vddc((uint8_t)val_vid); return 0; + case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK: + ret = vega10_get_enabled_smc_features(hwmgr, (uint64_t *)value); + if (!ret) + *size = 8; + break; default: ret = -EINVAL; break; @@ -4940,16 +4946,6 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_performance_level = vega10_get_performance_level, }; -int vega10_enable_smc_features(struct pp_hwmgr *hwmgr, - bool enable, uint32_t feature_mask) -{ - int msg = enable ? PPSMC_MSG_EnableSmuFeatures : - PPSMC_MSG_DisableSmuFeatures; - - return smum_send_msg_to_smc_with_parameter(hwmgr, - msg, feature_mask); -} - int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) { hwmgr->hwmgr_func = &vega10_hwmgr_funcs; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h index 339820da9e6a..89870556de1b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h @@ -441,7 +441,5 @@ int vega10_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate); int vega10_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate); int vega10_update_acp_dpm(struct pp_hwmgr *hwmgr, bool bgate); int vega10_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable); -int vega10_enable_smc_features(struct pp_hwmgr *hwmgr, - bool enable, uint32_t feature_mask); #endif /* _VEGA10_HWMGR_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c index 22364875a943..2d88abf97e7b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c @@ -23,6 +23,7 @@ #include "hwmgr.h" #include "vega10_hwmgr.h" +#include "vega10_smumgr.h" #include "vega10_powertune.h" #include "vega10_ppsmc.h" #include "vega10_inc.h" diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index aa044c1955fe..407762b36901 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -23,6 +23,7 @@ #include "vega10_thermal.h" #include "vega10_hwmgr.h" +#include "vega10_smumgr.h" #include "vega10_ppsmc.h" #include "vega10_inc.h" #include "soc15_common.h" diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 0789d64246ca..9600e2f226e9 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -745,8 +745,8 @@ static int vega12_init_smc_table(struct pp_hwmgr *hwmgr) memcpy(pp_table, pptable_information->smc_pptable, sizeof(PPTable_t)); - result = vega12_copy_table_to_smc(hwmgr, - (uint8_t *)pp_table, TABLE_PPTABLE); + result = smum_smc_table_manager(hwmgr, + (uint8_t *)pp_table, TABLE_PPTABLE, false); PP_ASSERT_WITH_CODE(!result, "Failed to upload PPtable!", return result); @@ -1317,7 +1317,11 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, break; case AMDGPU_PP_SENSOR_GPU_POWER: ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value); - + break; + case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK: + ret = vega12_get_enabled_smc_features(hwmgr, (uint64_t *)value); + if (!ret) + *size = 8; break; default: ret = -EINVAL; @@ -2103,8 +2107,8 @@ static int vega12_display_configuration_changed_task(struct pp_hwmgr *hwmgr) if ((data->water_marks_bitmap & WaterMarksExist) && !(data->water_marks_bitmap & WaterMarksLoaded)) { - result = vega12_copy_table_to_smc(hwmgr, - (uint8_t *)wm_table, TABLE_WATERMARKS); + result = smum_smc_table_manager(hwmgr, + (uint8_t *)wm_table, TABLE_WATERMARKS, false); PP_ASSERT_WITH_CODE(result, "Failed to update WMTABLE!", return EINVAL); data->water_marks_bitmap |= WaterMarksLoaded; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index d45cbfe8e184..2a554f9edcda 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -461,7 +461,7 @@ static int vega20_get_number_of_dpm_level(struct pp_hwmgr *hwmgr, "[GetNumOfDpmLevel] failed to get dpm levels!", return ret); - vega20_read_arg_from_smc(hwmgr, num_of_levels); + *num_of_levels = smum_get_argument(hwmgr); PP_ASSERT_WITH_CODE(*num_of_levels > 0, "[GetNumOfDpmLevel] number of clk levels is invalid!", return -EINVAL); @@ -481,7 +481,7 @@ static int vega20_get_dpm_frequency_by_index(struct pp_hwmgr *hwmgr, "[GetDpmFreqByIndex] failed to get dpm freq by index!", return ret); - vega20_read_arg_from_smc(hwmgr, clk); + *clk = smum_get_argument(hwmgr); PP_ASSERT_WITH_CODE(*clk, "[GetDpmFreqByIndex] clk value is invalid!", return -EINVAL); @@ -743,8 +743,8 @@ static int vega20_init_smc_table(struct pp_hwmgr *hwmgr) memcpy(pp_table, pptable_information->smc_pptable, sizeof(PPTable_t)); - result = vega20_copy_table_to_smc(hwmgr, - (uint8_t *)pp_table, TABLE_PPTABLE); + result = smum_smc_table_manager(hwmgr, + (uint8_t *)pp_table, TABLE_PPTABLE, false); PP_ASSERT_WITH_CODE(!result, "[InitSMCTable] Failed to upload PPtable!", return result); @@ -1044,7 +1044,7 @@ static int vega20_od8_get_gfx_clock_base_voltage( "[GetBaseVoltage] failed to get GFXCLK AVFS voltage from SMU!", return ret); - vega20_read_arg_from_smc(hwmgr, voltage); + *voltage = smum_get_argument(hwmgr); *voltage = *voltage / VOLTAGE_SCALE; return 0; @@ -1067,7 +1067,7 @@ static int vega20_od8_initialize_default_settings( vega20_od8_set_feature_id(hwmgr); /* Set default values */ - ret = vega20_copy_table_from_smc(hwmgr, (uint8_t *)od_table, TABLE_OVERDRIVE); + ret = smum_smc_table_manager(hwmgr, (uint8_t *)od_table, TABLE_OVERDRIVE, true); PP_ASSERT_WITH_CODE(!ret, "Failed to export over drive table!", return ret); @@ -1195,7 +1195,7 @@ static int vega20_od8_initialize_default_settings( } } - ret = vega20_copy_table_to_smc(hwmgr, (uint8_t *)od_table, TABLE_OVERDRIVE); + ret = smum_smc_table_manager(hwmgr, (uint8_t *)od_table, TABLE_OVERDRIVE, false); PP_ASSERT_WITH_CODE(!ret, "Failed to import over drive table!", return ret); @@ -1214,7 +1214,7 @@ static int vega20_od8_set_settings( struct vega20_od8_single_setting *od8_settings = data->od8_settings.od8_settings_array; - ret = vega20_copy_table_from_smc(hwmgr, (uint8_t *)(&od_table), TABLE_OVERDRIVE); + ret = smum_smc_table_manager(hwmgr, (uint8_t *)(&od_table), TABLE_OVERDRIVE, true); PP_ASSERT_WITH_CODE(!ret, "Failed to export over drive table!", return ret); @@ -1271,7 +1271,7 @@ static int vega20_od8_set_settings( break; } - ret = vega20_copy_table_to_smc(hwmgr, (uint8_t *)(&od_table), TABLE_OVERDRIVE); + ret = smum_smc_table_manager(hwmgr, (uint8_t *)(&od_table), TABLE_OVERDRIVE, false); PP_ASSERT_WITH_CODE(!ret, "Failed to import over drive table!", return ret); @@ -1401,7 +1401,7 @@ static int vega20_get_max_sustainable_clock(struct pp_hwmgr *hwmgr, (clock_select << 16))) == 0, "[GetMaxSustainableClock] Failed to get max DC clock from SMC!", return ret); - vega20_read_arg_from_smc(hwmgr, clock); + *clock = smum_get_argument(hwmgr); /* if DC limit is zero, return AC limit */ if (*clock == 0) { @@ -1410,7 +1410,7 @@ static int vega20_get_max_sustainable_clock(struct pp_hwmgr *hwmgr, (clock_select << 16))) == 0, "[GetMaxSustainableClock] failed to get max AC clock from SMC!", return ret); - vega20_read_arg_from_smc(hwmgr, clock); + *clock = smum_get_argument(hwmgr); } return 0; @@ -1770,14 +1770,14 @@ static int vega20_get_clock_ranges(struct pp_hwmgr *hwmgr, PPSMC_MSG_GetMaxDpmFreq, (clock_select << 16))) == 0, "[GetClockRanges] Failed to get max clock from SMC!", return ret); - vega20_read_arg_from_smc(hwmgr, clock); + *clock = smum_get_argument(hwmgr); } else { PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetMinDpmFreq, (clock_select << 16))) == 0, "[GetClockRanges] Failed to get min clock from SMC!", return ret); - vega20_read_arg_from_smc(hwmgr, clock); + *clock = smum_get_argument(hwmgr); } return 0; @@ -1841,7 +1841,7 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr, int ret = 0; SmuMetrics_t metrics_table; - ret = vega20_copy_table_from_smc(hwmgr, (uint8_t *)&metrics_table, TABLE_SMU_METRICS); + ret = smum_smc_table_manager(hwmgr, (uint8_t *)&metrics_table, TABLE_SMU_METRICS, true); PP_ASSERT_WITH_CODE(!ret, "Failed to export SMU METRICS table!", return ret); @@ -1862,7 +1862,7 @@ static int vega20_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx PPSMC_MSG_GetDpmClockFreq, (PPCLK_GFXCLK << 16))) == 0, "[GetCurrentGfxClkFreq] Attempt to get Current GFXCLK Frequency Failed!", return ret); - vega20_read_arg_from_smc(hwmgr, &gfx_clk); + gfx_clk = smum_get_argument(hwmgr); *gfx_freq = gfx_clk * 100; @@ -1880,7 +1880,7 @@ static int vega20_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_f PPSMC_MSG_GetDpmClockFreq, (PPCLK_UCLK << 16))) == 0, "[GetCurrentMClkFreq] Attempt to get Current MCLK Frequency Failed!", return ret); - vega20_read_arg_from_smc(hwmgr, &mem_clk); + mem_clk = smum_get_argument(hwmgr); *mclk_freq = mem_clk * 100; @@ -1893,7 +1893,7 @@ static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr, int ret = 0; SmuMetrics_t metrics_table; - ret = vega20_copy_table_from_smc(hwmgr, (uint8_t *)&metrics_table, TABLE_SMU_METRICS); + ret = smum_smc_table_manager(hwmgr, (uint8_t *)&metrics_table, TABLE_SMU_METRICS, true); PP_ASSERT_WITH_CODE(!ret, "Failed to export SMU METRICS table!", return ret); @@ -1941,6 +1941,11 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 16; ret = vega20_get_gpu_power(hwmgr, (uint32_t *)value); break; + case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK: + ret = vega20_get_enabled_smc_features(hwmgr, (uint64_t *)value); + if (!ret) + *size = 8; + break; default: ret = -EINVAL; break; @@ -2612,18 +2617,18 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, data->gfxclk_overdrive = false; data->memclk_overdrive = false; - ret = vega20_copy_table_from_smc(hwmgr, - (uint8_t *)od_table, - TABLE_OVERDRIVE); + ret = smum_smc_table_manager(hwmgr, + (uint8_t *)od_table, + TABLE_OVERDRIVE, true); PP_ASSERT_WITH_CODE(!ret, "Failed to export overdrive table!", return ret); break; case PP_OD_COMMIT_DPM_TABLE: - ret = vega20_copy_table_to_smc(hwmgr, - (uint8_t *)od_table, - TABLE_OVERDRIVE); + ret = smum_smc_table_manager(hwmgr, + (uint8_t *)od_table, + TABLE_OVERDRIVE, false); PP_ASSERT_WITH_CODE(!ret, "Failed to import overdrive table!", return ret); @@ -2847,8 +2852,8 @@ static int vega20_display_configuration_changed_task(struct pp_hwmgr *hwmgr) if ((data->water_marks_bitmap & WaterMarksExist) && !(data->water_marks_bitmap & WaterMarksLoaded)) { - result = vega20_copy_table_to_smc(hwmgr, - (uint8_t *)wm_table, TABLE_WATERMARKS); + result = smum_smc_table_manager(hwmgr, + (uint8_t *)wm_table, TABLE_WATERMARKS, false); PP_ASSERT_WITH_CODE(!result, "Failed to update WMTABLE!", return result); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_thermal.c index 2984ddd5428c..1c951a5d827d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_thermal.c @@ -37,10 +37,7 @@ static int vega20_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm) PPSMC_MSG_GetCurrentRpm)) == 0, "Attempt to get current RPM from SMC Failed!", return ret); - PP_ASSERT_WITH_CODE((ret = vega20_read_arg_from_smc(hwmgr, - current_rpm)) == 0, - "Attempt to read current RPM from SMC Failed!", - return ret); + *current_rpm = smum_get_argument(hwmgr); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c index f7e3bc22bb93..a74c5be1ec18 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c @@ -724,11 +724,13 @@ static int smu8_start_smu(struct pp_hwmgr *hwmgr) if (hwmgr->chip_id == CHIP_STONEY) fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK); - ret = smu8_request_smu_load_fw(hwmgr); - if (ret) - pr_err("SMU firmware load failed\n"); + smu8_request_smu_load_fw(hwmgr); - smu8_check_fw_load_finish(hwmgr, fw_to_check); + ret = smu8_check_fw_load_finish(hwmgr, fw_to_check); + if (ret) { + pr_err("SMU firmware load failed\n"); + return ret; + } ret = smu8_load_mec_firmware(hwmgr); if (ret) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c index 5d19115f410c..c81acc3192ad 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c @@ -88,8 +88,18 @@ static int vega10_copy_table_to_smc(struct pp_hwmgr *hwmgr, return 0; } -static int vega10_get_smc_features(struct pp_hwmgr *hwmgr, - uint32_t *features_enabled) +int vega10_enable_smc_features(struct pp_hwmgr *hwmgr, + bool enable, uint32_t feature_mask) +{ + int msg = enable ? PPSMC_MSG_EnableSmuFeatures : + PPSMC_MSG_DisableSmuFeatures; + + return smum_send_msg_to_smc_with_parameter(hwmgr, + msg, feature_mask); +} + +int vega10_get_enabled_smc_features(struct pp_hwmgr *hwmgr, + uint64_t *features_enabled) { if (features_enabled == NULL) return -EINVAL; @@ -102,9 +112,9 @@ static int vega10_get_smc_features(struct pp_hwmgr *hwmgr, static bool vega10_is_dpm_running(struct pp_hwmgr *hwmgr) { - uint32_t features_enabled = 0; + uint64_t features_enabled = 0; - vega10_get_smc_features(hwmgr, &features_enabled); + vega10_get_enabled_smc_features(hwmgr, &features_enabled); if (features_enabled & SMC_DPM_FEATURES) return true; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h index 424e868bc768..bad760f22624 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.h @@ -42,6 +42,10 @@ struct vega10_smumgr { struct smu_table_array smu_tables; }; +int vega10_enable_smc_features(struct pp_hwmgr *hwmgr, + bool enable, uint32_t feature_mask); +int vega10_get_enabled_smc_features(struct pp_hwmgr *hwmgr, + uint64_t *features_enabled); #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c index 7f0e2109f40d..ddb801517667 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c @@ -37,8 +37,8 @@ * @param hwmgr the address of the HW manager * @param table_id the driver's table ID to copy from */ -int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr, - uint8_t *table, int16_t table_id) +static int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr, + uint8_t *table, int16_t table_id) { struct vega12_smumgr *priv = (struct vega12_smumgr *)(hwmgr->smu_backend); @@ -75,8 +75,8 @@ int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr, * @param hwmgr the address of the HW manager * @param table_id the table to copy from */ -int vega12_copy_table_to_smc(struct pp_hwmgr *hwmgr, - uint8_t *table, int16_t table_id) +static int vega12_copy_table_to_smc(struct pp_hwmgr *hwmgr, + uint8_t *table, int16_t table_id) { struct vega12_smumgr *priv = (struct vega12_smumgr *)(hwmgr->smu_backend); @@ -351,6 +351,19 @@ static int vega12_start_smu(struct pp_hwmgr *hwmgr) return 0; } +static int vega12_smc_table_manager(struct pp_hwmgr *hwmgr, uint8_t *table, + uint16_t table_id, bool rw) +{ + int ret; + + if (rw) + ret = vega12_copy_table_from_smc(hwmgr, table, table_id); + else + ret = vega12_copy_table_to_smc(hwmgr, table, table_id); + + return ret; +} + const struct pp_smumgr_func vega12_smu_funcs = { .smu_init = &vega12_smu_init, .smu_fini = &vega12_smu_fini, @@ -362,4 +375,5 @@ const struct pp_smumgr_func vega12_smu_funcs = { .upload_pptable_settings = NULL, .is_dpm_running = vega12_is_dpm_running, .get_argument = smu9_get_argument, + .smc_table_manager = vega12_smc_table_manager, }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.h index b285cbc04019..aeec965ce81f 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.h @@ -48,10 +48,6 @@ struct vega12_smumgr { #define SMU_FEATURES_HIGH_MASK 0xFFFFFFFF00000000 #define SMU_FEATURES_HIGH_SHIFT 32 -int vega12_copy_table_from_smc(struct pp_hwmgr *hwmgr, - uint8_t *table, int16_t table_id); -int vega12_copy_table_to_smc(struct pp_hwmgr *hwmgr, - uint8_t *table, int16_t table_id); int vega12_enable_smc_features(struct pp_hwmgr *hwmgr, bool enable, uint64_t feature_mask); int vega12_get_enabled_smc_features(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c index fe7f71079e0e..b7ff7d4d6f44 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c @@ -148,19 +148,11 @@ static int vega20_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, return (ret == PPSMC_Result_OK) ? 0 : -EIO; } -/* - * Retrieve an argument from SMC. - * @param hwmgr the address of the powerplay hardware manager. - * @param arg pointer to store the argument from SMC. - * @return Always return 0. - */ -int vega20_read_arg_from_smc(struct pp_hwmgr *hwmgr, uint32_t *arg) +static uint32_t vega20_get_argument(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = hwmgr->adev; - *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); - - return 0; + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); } /* @@ -168,8 +160,8 @@ int vega20_read_arg_from_smc(struct pp_hwmgr *hwmgr, uint32_t *arg) * @param hwmgr the address of the HW manager * @param table_id the driver's table ID to copy from */ -int vega20_copy_table_from_smc(struct pp_hwmgr *hwmgr, - uint8_t *table, int16_t table_id) +static int vega20_copy_table_from_smc(struct pp_hwmgr *hwmgr, + uint8_t *table, int16_t table_id) { struct vega20_smumgr *priv = (struct vega20_smumgr *)(hwmgr->smu_backend); @@ -208,8 +200,8 @@ int vega20_copy_table_from_smc(struct pp_hwmgr *hwmgr, * @param hwmgr the address of the HW manager * @param table_id the table to copy from */ -int vega20_copy_table_to_smc(struct pp_hwmgr *hwmgr, - uint8_t *table, int16_t table_id) +static int vega20_copy_table_to_smc(struct pp_hwmgr *hwmgr, + uint8_t *table, int16_t table_id) { struct vega20_smumgr *priv = (struct vega20_smumgr *)(hwmgr->smu_backend); @@ -345,18 +337,12 @@ int vega20_get_enabled_smc_features(struct pp_hwmgr *hwmgr, PPSMC_MSG_GetEnabledSmuFeaturesLow)) == 0, "[GetEnabledSMCFeatures] Attemp to get SMU features Low failed!", return ret); - PP_ASSERT_WITH_CODE((ret = vega20_read_arg_from_smc(hwmgr, - &smc_features_low)) == 0, - "[GetEnabledSMCFeatures] Attemp to read SMU features Low argument failed!", - return ret); + smc_features_low = vega20_get_argument(hwmgr); PP_ASSERT_WITH_CODE((ret = vega20_send_msg_to_smc(hwmgr, PPSMC_MSG_GetEnabledSmuFeaturesHigh)) == 0, "[GetEnabledSMCFeatures] Attemp to get SMU features High failed!", return ret); - PP_ASSERT_WITH_CODE((ret = vega20_read_arg_from_smc(hwmgr, - &smc_features_high)) == 0, - "[GetEnabledSMCFeatures] Attemp to read SMU features High argument failed!", - return ret); + smc_features_high = vega20_get_argument(hwmgr); *features_enabled = ((((uint64_t)smc_features_low << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) | (((uint64_t)smc_features_high << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK)); @@ -574,6 +560,19 @@ static bool vega20_is_dpm_running(struct pp_hwmgr *hwmgr) return false; } +static int vega20_smc_table_manager(struct pp_hwmgr *hwmgr, uint8_t *table, + uint16_t table_id, bool rw) +{ + int ret; + + if (rw) + ret = vega20_copy_table_from_smc(hwmgr, table, table_id); + else + ret = vega20_copy_table_to_smc(hwmgr, table, table_id); + + return ret; +} + const struct pp_smumgr_func vega20_smu_funcs = { .smu_init = &vega20_smu_init, .smu_fini = &vega20_smu_fini, @@ -584,4 +583,6 @@ const struct pp_smumgr_func vega20_smu_funcs = { .download_pptable_settings = NULL, .upload_pptable_settings = NULL, .is_dpm_running = vega20_is_dpm_running, + .get_argument = vega20_get_argument, + .smc_table_manager = vega20_smc_table_manager, }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.h index 505eb0d82e3b..77349c3f0162 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.h @@ -47,11 +47,6 @@ struct vega20_smumgr { #define SMU_FEATURES_HIGH_MASK 0xFFFFFFFF00000000 #define SMU_FEATURES_HIGH_SHIFT 32 -int vega20_read_arg_from_smc(struct pp_hwmgr *hwmgr, uint32_t *arg); -int vega20_copy_table_from_smc(struct pp_hwmgr *hwmgr, - uint8_t *table, int16_t table_id); -int vega20_copy_table_to_smc(struct pp_hwmgr *hwmgr, - uint8_t *table, int16_t table_id); int vega20_enable_smc_features(struct pp_hwmgr *hwmgr, bool enable, uint64_t feature_mask); int vega20_get_enabled_smc_features(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 69e9b431bf1f..e7c3ed6c9a2e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -105,7 +105,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) change = dma_addr - gpu->hangcheck_dma_addr; if (change < 0 || change > 16) { gpu->hangcheck_dma_addr = dma_addr; - schedule_delayed_work(&sched_job->work_tdr, + schedule_delayed_work(&sched_job->sched->work_tdr, sched_job->sched->timeout); return; } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 9ca741f3a0bc..4e8505d51795 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work) * manages to find this job as the next job in the list, the fence * signaled check below will prevent the timeout to be restarted. */ - cancel_delayed_work_sync(&s_job->work_tdr); + cancel_delayed_work_sync(&sched->work_tdr); spin_lock(&sched->job_list_lock); - /* queue TDR for next job */ - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { - struct drm_sched_job *next = list_next_entry(s_job, node); - - if (!dma_fence_is_signaled(&next->s_fence->finished)) - schedule_delayed_work(&next->work_tdr, sched->timeout); - } /* remove job from ring_mirror_list */ list_del(&s_job->node); + /* queue TDR for next job */ + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && + !list_empty(&sched->ring_mirror_list)) + schedule_delayed_work(&sched->work_tdr, sched->timeout); spin_unlock(&sched->job_list_lock); dma_fence_put(&s_job->s_fence->finished); @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) if (sched->timeout != MAX_SCHEDULE_TIMEOUT && list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node) == s_job) - schedule_delayed_work(&s_job->work_tdr, sched->timeout); + schedule_delayed_work(&sched->work_tdr, sched->timeout); spin_unlock(&sched->job_list_lock); } static void drm_sched_job_timedout(struct work_struct *work) { - struct drm_sched_job *job = container_of(work, struct drm_sched_job, - work_tdr.work); + struct drm_gpu_scheduler *sched; + struct drm_sched_job *job; + + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); + job = list_first_entry_or_null(&sched->ring_mirror_list, + struct drm_sched_job, node); - job->sched->ops->timedout_job(job); + if (job) + job->sched->ops->timedout_job(job); } /** @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) s_job = list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node); if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) - schedule_delayed_work(&s_job->work_tdr, sched->timeout); + schedule_delayed_work(&sched->work_tdr, sched->timeout); list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, INIT_WORK(&job->finish_work, drm_sched_job_finish); INIT_LIST_HEAD(&job->node); - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); return 0; } @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, INIT_LIST_HEAD(&sched->ring_mirror_list); spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); atomic_set(&sched->num_jobs, 0); atomic64_set(&sched->job_id_count, 0); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index a5501581d96b..9243dea6e6ad 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -168,7 +168,7 @@ v3d_job_timedout(struct drm_sched_job *sched_job) job->timedout_ctca = ctca; job->timedout_ctra = ctra; - schedule_delayed_work(&job->base.work_tdr, + schedule_delayed_work(&job->base.sched->work_tdr, job->base.sched->timeout); return; } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index daec50f887b3..d87b268f1781 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); * finished to remove the job from the * @drm_gpu_scheduler.ring_mirror_list. * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout - * interval is over. * @id: a unique id assigned to each job scheduled on the scheduler. * @karma: increment on every hang caused by this job. If this exceeds the hang * limit of the scheduler then the job is marked guilty and will not @@ -195,7 +193,6 @@ struct drm_sched_job { struct dma_fence_cb finish_cb; struct work_struct finish_work; struct list_head node; - struct delayed_work work_tdr; uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { * finished. * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the + * timeout interval is over. * @thread: the kthread on which the scheduler which run. * @ring_mirror_list: the list of jobs which are currently in the job queue. * @job_list_lock: lock to protect the ring_mirror_list. @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { wait_queue_head_t job_scheduled; atomic_t hw_rq_count; atomic64_t job_id_count; + struct delayed_work work_tdr; struct task_struct *thread; struct list_head ring_mirror_list; spinlock_t job_list_lock; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 01674b56e14f..f5ff8a76e208 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -82,6 +82,14 @@ struct kfd_ioctl_set_cu_mask_args { __u64 cu_mask_ptr; /* to KFD */ }; +struct kfd_ioctl_get_queue_wave_state_args { + uint64_t ctl_stack_address; /* to KFD */ + uint32_t ctl_stack_used_size; /* from KFD */ + uint32_t save_area_used_size; /* from KFD */ + uint32_t queue_id; /* to KFD */ + uint32_t pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -475,7 +483,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { #define AMDKFD_IOC_SET_CU_MASK \ AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) +#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ + AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1B +#define AMDKFD_COMMAND_END 0x1C #endif |