diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
59 files changed, 1660 insertions, 1362 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6a8129949333..833c3c16501a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -110,6 +110,7 @@ extern int amdgpu_pos_buf_per_se; extern int amdgpu_cntl_sb_buf_per_se; extern int amdgpu_param_buf_per_se; +#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) @@ -966,6 +967,8 @@ struct amdgpu_gfx_config { unsigned mc_arb_ramcfg; unsigned gb_addr_config; unsigned num_rbs; + unsigned gs_vgt_table_depth; + unsigned gs_prim_buffer_depth; uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; @@ -980,6 +983,7 @@ struct amdgpu_gfx_config { struct amdgpu_cu_info { uint32_t number; /* total active CU number */ uint32_t ao_cu_mask; + uint32_t wave_front_size; uint32_t bitmap[4][4]; }; @@ -1000,10 +1004,10 @@ struct amdgpu_ngg_buf { }; enum { - PRIM = 0, - POS, - CNTL, - PARAM, + NGG_PRIM = 0, + NGG_POS, + NGG_CNTL, + NGG_PARAM, NGG_BUF_MAX }; @@ -1125,6 +1129,7 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; + bool need_pipeline_sync; unsigned vm_id; uint64_t vm_pd_addr; uint32_t gds_base, gds_size; @@ -1704,9 +1709,6 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define WREG32_FIELD_OFFSET(reg, offset, field, val) \ WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) -#define WREG32_FIELD15(ip, idx, reg, field, val) \ - WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) - /* * BIOS helpers. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index ad4329922f79..1cf78f4dd339 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1727,6 +1727,12 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) { int i; + /* + * VBIOS will check ASIC_INIT_COMPLETE bit to decide if + * execute ASIC_Init posting via driver + */ + adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; + for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 4b9abd68e04f..4bdda56fccee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -26,6 +26,7 @@ #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "atom.h" +#include "atombios.h" #define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) @@ -77,10 +78,29 @@ void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev) { int i; + /* + * VBIOS will check ASIC_INIT_COMPLETE bit to decide if + * execute ASIC_Init posting via driver + */ + adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; + for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); } +void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev, + bool hung) +{ + u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3); + + if (hung) + tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG; + else + tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG; + + WREG32(adev->bios_scratch_reg_offset + 3, tmp); +} + int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) { struct atom_context *ctx = adev->mode_info.atom_context; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index d0c4dcd7fa96..a2c3ebe22c71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -28,6 +28,8 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev); +void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev, + bool hung); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index cc97eee93226..1beae5b930d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -117,8 +117,13 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, } out_cleanup: + /* Check error value now. The value can be overwritten when clean up.*/ + if (r) { + DRM_ERROR("Error while benchmarking BO move.\n"); + } + if (sobj) { - r = amdgpu_bo_reserve(sobj, false); + r = amdgpu_bo_reserve(sobj, true); if (likely(r == 0)) { amdgpu_bo_unpin(sobj); amdgpu_bo_unreserve(sobj); @@ -126,17 +131,13 @@ out_cleanup: amdgpu_bo_unref(&sobj); } if (dobj) { - r = amdgpu_bo_reserve(dobj, false); + r = amdgpu_bo_reserve(dobj, true); if (likely(r == 0)) { amdgpu_bo_unpin(dobj); amdgpu_bo_unreserve(dobj); } amdgpu_bo_unref(&dobj); } - - if (r) { - DRM_ERROR("Error while benchmarking BO move.\n"); - } } void amdgpu_benchmark(struct amdgpu_device *adev, int test_number) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 1c7e6c28f93a..c6dba1eaefbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -42,82 +42,6 @@ struct amdgpu_cgs_device { struct amdgpu_device *adev = \ ((struct amdgpu_cgs_device *)cgs_device)->adev -static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, - uint64_t *mc_start, uint64_t *mc_size, - uint64_t *mem_size) -{ - CGS_FUNC_ADEV; - switch(type) { - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__VISIBLE_FB: - *mc_start = 0; - *mc_size = adev->mc.visible_vram_size; - *mem_size = adev->mc.visible_vram_size - adev->vram_pin_size; - break; - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: - *mc_start = adev->mc.visible_vram_size; - *mc_size = adev->mc.real_vram_size - adev->mc.visible_vram_size; - *mem_size = *mc_size; - break; - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: - *mc_start = adev->mc.gtt_start; - *mc_size = adev->mc.gtt_size; - *mem_size = adev->mc.gtt_size - adev->gart_pin_size; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem, - uint64_t size, - uint64_t min_offset, uint64_t max_offset, - cgs_handle_t *kmem_handle, uint64_t *mcaddr) -{ - CGS_FUNC_ADEV; - int ret; - struct amdgpu_bo *bo; - struct page *kmem_page = vmalloc_to_page(kmem); - int npages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; - - struct sg_table *sg = drm_prime_pages_to_sg(&kmem_page, npages); - ret = amdgpu_bo_create(adev, size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, NULL, &bo); - if (ret) - return ret; - ret = amdgpu_bo_reserve(bo, false); - if (unlikely(ret != 0)) - return ret; - - /* pin buffer into GTT */ - ret = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_GTT, - min_offset, max_offset, mcaddr); - amdgpu_bo_unreserve(bo); - - *kmem_handle = (cgs_handle_t)bo; - return ret; -} - -static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle) -{ - struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle; - - if (obj) { - int r = amdgpu_bo_reserve(obj, false); - if (likely(r == 0)) { - amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - } - amdgpu_bo_unref(&obj); - - } - return 0; -} - static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, uint64_t size, uint64_t align, @@ -215,7 +139,7 @@ static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; if (obj) { - int r = amdgpu_bo_reserve(obj, false); + int r = amdgpu_bo_reserve(obj, true); if (likely(r == 0)) { amdgpu_bo_kunmap(obj); amdgpu_bo_unpin(obj); @@ -239,7 +163,7 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h min_offset = obj->placements[0].fpfn << PAGE_SHIFT; max_offset = obj->placements[0].lpfn << PAGE_SHIFT; - r = amdgpu_bo_reserve(obj, false); + r = amdgpu_bo_reserve(obj, true); if (unlikely(r != 0)) return r; r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains, @@ -252,7 +176,7 @@ static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t { int r; struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, false); + r = amdgpu_bo_reserve(obj, true); if (unlikely(r != 0)) return r; r = amdgpu_bo_unpin(obj); @@ -265,7 +189,7 @@ static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h { int r; struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, false); + r = amdgpu_bo_reserve(obj, true); if (unlikely(r != 0)) return r; r = amdgpu_bo_kmap(obj, map); @@ -277,7 +201,7 @@ static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t { int r; struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, false); + r = amdgpu_bo_reserve(obj, true); if (unlikely(r != 0)) return r; amdgpu_bo_kunmap(obj); @@ -349,62 +273,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, WARN(1, "Invalid indirect register space"); } -static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr) -{ - CGS_FUNC_ADEV; - uint8_t val; - int ret = pci_read_config_byte(adev->pdev, addr, &val); - if (WARN(ret, "pci_read_config_byte error")) - return 0; - return val; -} - -static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr) -{ - CGS_FUNC_ADEV; - uint16_t val; - int ret = pci_read_config_word(adev->pdev, addr, &val); - if (WARN(ret, "pci_read_config_word error")) - return 0; - return val; -} - -static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device, - unsigned addr) -{ - CGS_FUNC_ADEV; - uint32_t val; - int ret = pci_read_config_dword(adev->pdev, addr, &val); - if (WARN(ret, "pci_read_config_dword error")) - return 0; - return val; -} - -static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, unsigned addr, - uint8_t value) -{ - CGS_FUNC_ADEV; - int ret = pci_write_config_byte(adev->pdev, addr, value); - WARN(ret, "pci_write_config_byte error"); -} - -static void amdgpu_cgs_write_pci_config_word(struct cgs_device *cgs_device, unsigned addr, - uint16_t value) -{ - CGS_FUNC_ADEV; - int ret = pci_write_config_word(adev->pdev, addr, value); - WARN(ret, "pci_write_config_word error"); -} - -static void amdgpu_cgs_write_pci_config_dword(struct cgs_device *cgs_device, unsigned addr, - uint32_t value) -{ - CGS_FUNC_ADEV; - int ret = pci_write_config_dword(adev->pdev, addr, value); - WARN(ret, "pci_write_config_dword error"); -} - - static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device, enum cgs_resource_type resource_type, uint64_t size, @@ -477,56 +345,6 @@ static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigne adev->mode_info.atom_context, table, args); } -static int amdgpu_cgs_create_pm_request(struct cgs_device *cgs_device, cgs_handle_t *request) -{ - /* TODO */ - return 0; -} - -static int amdgpu_cgs_destroy_pm_request(struct cgs_device *cgs_device, cgs_handle_t request) -{ - /* TODO */ - return 0; -} - -static int amdgpu_cgs_set_pm_request(struct cgs_device *cgs_device, cgs_handle_t request, - int active) -{ - /* TODO */ - return 0; -} - -static int amdgpu_cgs_pm_request_clock(struct cgs_device *cgs_device, cgs_handle_t request, - enum cgs_clock clock, unsigned freq) -{ - /* TODO */ - return 0; -} - -static int amdgpu_cgs_pm_request_engine(struct cgs_device *cgs_device, cgs_handle_t request, - enum cgs_engine engine, int powered) -{ - /* TODO */ - return 0; -} - - - -static int amdgpu_cgs_pm_query_clock_limits(struct cgs_device *cgs_device, - enum cgs_clock clock, - struct cgs_clock_limits *limits) -{ - /* TODO */ - return 0; -} - -static int amdgpu_cgs_set_camera_voltages(struct cgs_device *cgs_device, uint32_t mask, - const uint32_t *voltages) -{ - DRM_ERROR("not implemented"); - return -EPERM; -} - struct cgs_irq_params { unsigned src_id; cgs_irq_source_set_func_t set; @@ -1269,9 +1087,6 @@ static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device, } static const struct cgs_ops amdgpu_cgs_ops = { - .gpu_mem_info = amdgpu_cgs_gpu_mem_info, - .gmap_kmem = amdgpu_cgs_gmap_kmem, - .gunmap_kmem = amdgpu_cgs_gunmap_kmem, .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem, .free_gpu_mem = amdgpu_cgs_free_gpu_mem, .gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem, @@ -1282,23 +1097,10 @@ static const struct cgs_ops amdgpu_cgs_ops = { .write_register = amdgpu_cgs_write_register, .read_ind_register = amdgpu_cgs_read_ind_register, .write_ind_register = amdgpu_cgs_write_ind_register, - .read_pci_config_byte = amdgpu_cgs_read_pci_config_byte, - .read_pci_config_word = amdgpu_cgs_read_pci_config_word, - .read_pci_config_dword = amdgpu_cgs_read_pci_config_dword, - .write_pci_config_byte = amdgpu_cgs_write_pci_config_byte, - .write_pci_config_word = amdgpu_cgs_write_pci_config_word, - .write_pci_config_dword = amdgpu_cgs_write_pci_config_dword, .get_pci_resource = amdgpu_cgs_get_pci_resource, .atom_get_data_table = amdgpu_cgs_atom_get_data_table, .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs, .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table, - .create_pm_request = amdgpu_cgs_create_pm_request, - .destroy_pm_request = amdgpu_cgs_destroy_pm_request, - .set_pm_request = amdgpu_cgs_set_pm_request, - .pm_request_clock = amdgpu_cgs_pm_request_clock, - .pm_request_engine = amdgpu_cgs_pm_request_engine, - .pm_query_clock_limits = amdgpu_cgs_pm_query_clock_limits, - .set_camera_voltages = amdgpu_cgs_set_camera_voltages, .get_firmware_info = amdgpu_cgs_get_firmware_info, .rel_firmware = amdgpu_cgs_rel_firmware, .set_powergating_state = amdgpu_cgs_set_powergating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ec71b9320561..4e6b9501ab0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1074,6 +1074,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); + amdgpu_cs_parser_fini(p, 0, true); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); @@ -1129,7 +1130,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; r = amdgpu_cs_submit(&parser, cs); + if (r) + goto out; + return 0; out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index cf0500671353..90d1ac8a80f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -273,6 +273,9 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, spin_lock(&ctx->ring_lock); + if (seq == ~0ull) + seq = ctx->rings[ring->idx].sequence - 1; + if (seq >= cring->sequence) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 483660742f75..43ca16b6eee2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -53,7 +53,6 @@ #include "bif/bif_4_1_d.h" #include <linux/pci.h> #include <linux/firmware.h> -#include "amdgpu_pm.h" static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); @@ -350,7 +349,7 @@ static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev) if (adev->vram_scratch.robj == NULL) { return; } - r = amdgpu_bo_reserve(adev->vram_scratch.robj, false); + r = amdgpu_bo_reserve(adev->vram_scratch.robj, true); if (likely(r == 0)) { amdgpu_bo_kunmap(adev->vram_scratch.robj); amdgpu_bo_unpin(adev->vram_scratch.robj); @@ -422,12 +421,11 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev) if (adev->doorbell.num_doorbells == 0) return -EINVAL; - adev->doorbell.ptr = ioremap(adev->doorbell.base, adev->doorbell.num_doorbells * sizeof(u32)); - if (adev->doorbell.ptr == NULL) { + adev->doorbell.ptr = ioremap(adev->doorbell.base, + adev->doorbell.num_doorbells * + sizeof(u32)); + if (adev->doorbell.ptr == NULL) return -ENOMEM; - } - DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)adev->doorbell.base); - DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size); return 0; } @@ -1584,9 +1582,6 @@ static int amdgpu_late_init(struct amdgpu_device *adev) } } - amdgpu_dpm_enable_uvd(adev, false); - amdgpu_dpm_enable_vce(adev, false); - return 0; } @@ -1854,7 +1849,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* mutex initialization are all done here so we * can recall function without having locking issues */ - mutex_init(&adev->vm_manager.lock); atomic_set(&adev->irq.ih.lock, 0); mutex_init(&adev->firmware.mutex); mutex_init(&adev->pm.mutex); @@ -2071,7 +2065,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) DRM_INFO("amdgpu: finishing device.\n"); adev->shutdown = true; - drm_crtc_force_disable_all(adev->ddev); + if (adev->mode_info.mode_config_initialized) + drm_crtc_force_disable_all(adev->ddev); /* evict vram memory */ amdgpu_bo_evict_vram(adev); amdgpu_ib_pool_fini(adev); @@ -2146,7 +2141,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) if (amdgpu_crtc->cursor_bo) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - r = amdgpu_bo_reserve(aobj, false); + r = amdgpu_bo_reserve(aobj, true); if (r == 0) { amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); @@ -2159,7 +2154,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) robj = gem_to_amdgpu_bo(rfb->obj); /* don't unpin kernel fb objects */ if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { - r = amdgpu_bo_reserve(robj, false); + r = amdgpu_bo_reserve(robj, true); if (r == 0) { amdgpu_bo_unpin(robj); amdgpu_bo_unreserve(robj); @@ -2216,7 +2211,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) struct drm_connector *connector; struct amdgpu_device *adev = dev->dev_private; struct drm_crtc *crtc; - int r; + int r = 0; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -2228,11 +2223,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) pci_set_power_state(dev->pdev, PCI_D0); pci_restore_state(dev->pdev); r = pci_enable_device(dev->pdev); - if (r) { - if (fbcon) - console_unlock(); - return r; - } + if (r) + goto unlock; } if (adev->is_atom_fw) amdgpu_atomfirmware_scratch_regs_restore(adev); @@ -2249,7 +2241,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) r = amdgpu_resume(adev); if (r) { DRM_ERROR("amdgpu_resume failed (%d).\n", r); - return r; + goto unlock; } amdgpu_fence_driver_resume(adev); @@ -2260,11 +2252,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } r = amdgpu_late_init(adev); - if (r) { - if (fbcon) - console_unlock(); - return r; - } + if (r) + goto unlock; /* pin cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -2272,7 +2261,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (amdgpu_crtc->cursor_bo) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - r = amdgpu_bo_reserve(aobj, false); + r = amdgpu_bo_reserve(aobj, true); if (r == 0) { r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, @@ -2314,12 +2303,14 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) dev->dev->power.disable_depth--; #endif - if (fbcon) { + if (fbcon) amdgpu_fbdev_set_suspend(adev, 0); + +unlock: + if (fbcon) console_unlock(); - } - return 0; + return r; } static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) @@ -2430,25 +2421,37 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, uint32_t domain; int r; - if (!bo->shadow) - return 0; + if (!bo->shadow) + return 0; + + r = amdgpu_bo_reserve(bo, true); + if (r) + return r; + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); + /* if bo has been evicted, then no need to recover */ + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + r = amdgpu_bo_validate(bo->shadow); + if (r) { + DRM_ERROR("bo validate failed!\n"); + goto err; + } - r = amdgpu_bo_reserve(bo, false); - if (r) - return r; - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - /* if bo has been evicted, then no need to recover */ - if (domain == AMDGPU_GEM_DOMAIN_VRAM) { - r = amdgpu_bo_restore_from_shadow(adev, ring, bo, + r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem); + if (r) { + DRM_ERROR("%p bind failed\n", bo->shadow); + goto err; + } + + r = amdgpu_bo_restore_from_shadow(adev, ring, bo, NULL, fence, true); - if (r) { - DRM_ERROR("recover page table failed!\n"); - goto err; - } - } + if (r) { + DRM_ERROR("recover page table failed!\n"); + goto err; + } + } err: - amdgpu_bo_unreserve(bo); - return r; + amdgpu_bo_unreserve(bo); + return r; } /** @@ -2520,6 +2523,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) ring = adev->mman.buffer_funcs_ring; mutex_lock(&adev->shadow_list_lock); list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { + next = NULL; amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); if (fence) { r = dma_fence_wait(fence, false); @@ -2593,7 +2597,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring) + if (!ring || !ring->sched.thread) continue; kthread_park(ring->sched.thread); amd_sched_hw_job_reset(&ring->sched); @@ -2666,6 +2670,7 @@ retry: DRM_INFO("recover vram bo from shadow\n"); mutex_lock(&adev->shadow_list_lock); list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { + next = NULL; amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); if (fence) { r = dma_fence_wait(fence, false); @@ -2688,7 +2693,8 @@ retry: } for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring) + + if (!ring || !ring->sched.thread) continue; amd_sched_job_recovery(&ring->sched); @@ -2697,7 +2703,7 @@ retry: } else { dev_err(adev->dev, "asic resume failed (%d).\n", r); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - if (adev->rings[i]) { + if (adev->rings[i] && adev->rings[i]->sched.thread) { kthread_unpark(adev->rings[i]->sched.thread); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 96926a221bd5..cdf2ab20166a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -123,7 +123,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work) int r; /* unpin of the old buffer */ - r = amdgpu_bo_reserve(work->old_abo, false); + r = amdgpu_bo_reserve(work->old_abo, true); if (likely(r == 0)) { r = amdgpu_bo_unpin(work->old_abo); if (unlikely(r != 0)) { @@ -138,52 +138,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work) kfree(work); } - -static void amdgpu_flip_work_cleanup(struct amdgpu_flip_work *work) -{ - int i; - - amdgpu_bo_unref(&work->old_abo); - dma_fence_put(work->excl); - for (i = 0; i < work->shared_count; ++i) - dma_fence_put(work->shared[i]); - kfree(work->shared); - kfree(work); -} - -static void amdgpu_flip_cleanup_unreserve(struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - amdgpu_bo_unreserve(new_abo); - amdgpu_flip_work_cleanup(work); -} - -static void amdgpu_flip_cleanup_unpin(struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) - DRM_ERROR("failed to unpin new abo in error path\n"); - amdgpu_flip_cleanup_unreserve(work, new_abo); -} - -void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) { - DRM_ERROR("failed to reserve new abo in error path\n"); - amdgpu_flip_work_cleanup(work); - return; - } - amdgpu_flip_cleanup_unpin(work, new_abo); -} - -int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, - uint32_t target, - struct amdgpu_flip_work **work_p, - struct amdgpu_bo **new_abo_p) +int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags, uint32_t target, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; @@ -196,7 +155,7 @@ int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc, unsigned long flags; u64 tiling_flags; u64 base; - int r; + int i, r; work = kzalloc(sizeof *work, GFP_KERNEL); if (work == NULL) @@ -257,80 +216,41 @@ int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc, spin_unlock_irqrestore(&crtc->dev->event_lock, flags); r = -EBUSY; goto pflip_cleanup; - } - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - - *work_p = work; - *new_abo_p = new_abo; - - return 0; - -pflip_cleanup: - amdgpu_crtc_cleanup_flip_ctx(work, new_abo); - return r; - -unpin: - amdgpu_flip_cleanup_unpin(work, new_abo); - return r; - -unreserve: - amdgpu_flip_cleanup_unreserve(work, new_abo); - return r; -cleanup: - amdgpu_flip_work_cleanup(work); - return r; - -} - -void amdgpu_crtc_submit_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo) -{ - unsigned long flags; - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - spin_lock_irqsave(&crtc->dev->event_lock, flags); amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING; amdgpu_crtc->pflip_works = work; + + DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", + amdgpu_crtc->crtc_id, amdgpu_crtc, work); /* update crtc fb */ crtc->primary->fb = fb; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - - DRM_DEBUG_DRIVER( - "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", - amdgpu_crtc->crtc_id, amdgpu_crtc, work); - amdgpu_flip_work_func(&work->flip_work.work); -} - -int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, - uint32_t target, - struct drm_modeset_acquire_ctx *ctx) -{ - struct amdgpu_bo *new_abo; - struct amdgpu_flip_work *work; - int r; + return 0; - r = amdgpu_crtc_prepare_flip(crtc, - fb, - event, - page_flip_flags, - target, - &work, - &new_abo); - if (r) - return r; +pflip_cleanup: + if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) { + DRM_ERROR("failed to reserve new abo in error path\n"); + goto cleanup; + } +unpin: + if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) { + DRM_ERROR("failed to unpin new abo in error path\n"); + } +unreserve: + amdgpu_bo_unreserve(new_abo); - amdgpu_crtc_submit_flip(crtc, fb, work, new_abo); +cleanup: + amdgpu_bo_unref(&work->old_abo); + dma_fence_put(work->excl); + for (i = 0; i < work->shared_count; ++i) + dma_fence_put(work->shared[i]); + kfree(work->shared); + kfree(work); - return 0; + return r; } int amdgpu_crtc_set_config(struct drm_mode_set *set, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4e0f7d2d87f1..f2d705e6a75a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -63,9 +63,11 @@ * - 3.11.0 - Add support for sensor query info (clocks, temp, etc). * - 3.12.0 - Add query for double offchip LDS buffers * - 3.13.0 - Add PRT support + * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality + * - 3.15.0 - Export more gpu info for gfx9 */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 13 +#define KMS_DRIVER_MINOR 15 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -453,7 +455,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0, 0, 0} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index a48142d930c6..236d9950221b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -112,7 +112,7 @@ static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj) struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); int ret; - ret = amdgpu_bo_reserve(abo, false); + ret = amdgpu_bo_reserve(abo, true); if (likely(ret == 0)) { amdgpu_bo_kunmap(abo); amdgpu_bo_unpin(abo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 2ee327d69775..902e6015abca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -186,7 +186,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) if (adev->gart.robj == NULL) { return; } - r = amdgpu_bo_reserve(adev->gart.robj, false); + r = amdgpu_bo_reserve(adev->gart.robj, true); if (likely(r == 0)) { amdgpu_bo_kunmap(adev->gart.robj); amdgpu_bo_unpin(adev->gart.robj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 03a9c5cad222..94cb91cf93eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -139,6 +139,35 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, return 0; } +static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo) +{ + /* if anything is swapped out don't swap it in here, + just abort and wait for the next CS */ + if (!amdgpu_bo_gpu_accessible(bo)) + return -ERESTARTSYS; + + if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) + return -ERESTARTSYS; + + return 0; +} + +static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct list_head *list) +{ + struct ttm_validate_buffer *entry; + + list_for_each_entry(entry, list, head) { + struct amdgpu_bo *bo = + container_of(entry->bo, struct amdgpu_bo, tbo); + if (amdgpu_gem_vm_check(NULL, bo)) + return false; + } + + return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL); +} + void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv) { @@ -148,15 +177,13 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry vm_pd; - struct list_head list, duplicates; + struct list_head list; struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct amdgpu_bo_va *bo_va; - struct dma_fence *fence = NULL; int r; INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&duplicates); tv.bo = &bo->tbo; tv.shared = true; @@ -164,16 +191,18 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); + r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); if (r) { dev_err(adev->dev, "leaking bo va because " "we fail to reserve bo (%d)\n", r); return; } bo_va = amdgpu_vm_bo_find(vm, bo); - if (bo_va) { - if (--bo_va->ref_count == 0) { - amdgpu_vm_bo_rmv(adev, bo_va); + if (bo_va && --bo_va->ref_count == 0) { + amdgpu_vm_bo_rmv(adev, bo_va); + + if (amdgpu_gem_vm_ready(adev, vm, &list)) { + struct dma_fence *fence = NULL; r = amdgpu_vm_clear_freed(adev, vm, &fence); if (unlikely(r)) { @@ -502,19 +531,6 @@ out: return r; } -static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo) -{ - /* if anything is swapped out don't swap it in here, - just abort and wait for the next CS */ - if (!amdgpu_bo_gpu_accessible(bo)) - return -ERESTARTSYS; - - if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) - return -ERESTARTSYS; - - return 0; -} - /** * amdgpu_gem_va_update_vm -update the bo_va in its VM * @@ -533,19 +549,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct list_head *list, uint32_t operation) { - struct ttm_validate_buffer *entry; int r = -ERESTARTSYS; - list_for_each_entry(entry, list, head) { - struct amdgpu_bo *bo = - container_of(entry->bo, struct amdgpu_bo, tbo); - if (amdgpu_gem_va_check(NULL, bo)) - goto error; - } - - r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check, - NULL); - if (r) + if (!amdgpu_gem_vm_ready(adev, vm, list)) goto error; r = amdgpu_vm_update_directories(adev, vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 0335c2f331e9..f7d22c44034d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -134,6 +134,15 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, return r; } +void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); + struct amdgpu_gtt_mgr *mgr = man->priv; + + seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", + man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20); + +} /** * amdgpu_gtt_mgr_new - allocate a new node * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index aab857d89d03..6e4ae0d983c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -160,6 +160,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } + if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) + amdgpu_ring_emit_pipeline_sync(ring); if (vm) { r = amdgpu_vm_flush(ring, job); @@ -217,7 +219,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vm_id) - amdgpu_vm_reset_id(adev, job->vm_id); + amdgpu_vm_reset_id(adev, ring->funcs->vmhub, + job->vm_id); amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 86a12424c162..7570f2439a11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; + (*job)->need_pipeline_sync = false; amdgpu_sync_create(&(*job)->sync); @@ -139,7 +140,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); - if (fence == NULL && vm && !job->vm_id) { + while (fence == NULL && vm && !job->vm_id) { struct amdgpu_ring *ring = job->ring; int r; @@ -152,6 +153,9 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) fence = amdgpu_sync_get_fence(&job->sync); } + if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) + job->need_pipeline_sync = true; + return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 832be632478f..96c341670782 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -545,11 +545,22 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file adev->gfx.config.double_offchip_lds_buf; if (amdgpu_ngg) { - dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; - dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; - dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; - dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; + dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr; + dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size; + dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr; + dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size; + dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr; + dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size; + dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr; + dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size; } + dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; + dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; + dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; + dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; + dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; + dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; @@ -810,7 +821,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ - BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false)); + BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); fpriv->vm.csa_bo_va = NULL; amdgpu_bo_unreserve(adev->virt.csa_obj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index db8f8dda209c..dbd10618ec20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -597,21 +597,6 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_pending_vblank_event *event, uint32_t page_flip_flags, uint32_t target, struct drm_modeset_acquire_ctx *ctx); -void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo); -int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, - uint32_t target, - struct amdgpu_flip_work **work, - struct amdgpu_bo **new_abo); - -void amdgpu_crtc_submit_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct amdgpu_flip_work *work, - struct amdgpu_bo *new_abo); - extern const struct drm_mode_config_funcs amdgpu_mode_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cb89fff863c0..365883d7948d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -295,7 +295,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (*bo == NULL) return; - if (likely(amdgpu_bo_reserve(*bo, false) == 0)) { + if (likely(amdgpu_bo_reserve(*bo, true) == 0)) { if (cpu_addr) amdgpu_bo_kunmap(*bo); @@ -543,6 +543,27 @@ err: return r; } +int amdgpu_bo_validate(struct amdgpu_bo *bo) +{ + uint32_t domain; + int r; + + if (bo->pin_count) + return 0; + + domain = bo->prefered_domains; + +retry: + amdgpu_ttm_placement_from_domain(bo, domain); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { + domain = bo->allowed_domains; + goto retry; + } + + return r; +} + int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 15a723adca76..382485115b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -175,6 +175,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, struct amdgpu_bo *bo, struct reservation_object *resv, struct dma_fence **fence, bool direct); +int amdgpu_bo_validate(struct amdgpu_bo *bo); int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 990fde2cf4fd..7df503aedb69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -867,8 +867,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); - /* never 0 (full-speed), fuse or smc-controlled always */ - return sprintf(buf, "%i\n", pwm_mode == FDO_PWM_MODE_STATIC ? 1 : 2); + return sprintf(buf, "%i\n", pwm_mode); } static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, @@ -887,14 +886,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, if (err) return err; - switch (value) { - case 1: /* manual, percent-based */ - amdgpu_dpm_set_fan_control_mode(adev, FDO_PWM_MODE_STATIC); - break; - default: /* disable */ - amdgpu_dpm_set_fan_control_mode(adev, 0); - break; - } + amdgpu_dpm_set_fan_control_mode(adev, value); return count; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 3826d5aea0a6..6bdc866570ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -113,7 +113,7 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj) struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); int ret = 0; - ret = amdgpu_bo_reserve(bo, false); + ret = amdgpu_bo_reserve(bo, true); if (unlikely(ret != 0)) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index ed6e5799016e..ac5e92e5d59d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -55,6 +55,8 @@ static int psp_sw_init(void *handle) psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos; psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; psp->ring_init = psp_v3_1_ring_init; + psp->ring_create = psp_v3_1_ring_create; + psp->ring_destroy = psp_v3_1_ring_destroy; psp->cmd_submit = psp_v3_1_cmd_submit; psp->compare_sram_data = psp_v3_1_compare_sram_data; psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; @@ -152,11 +154,6 @@ static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, static int psp_tmr_init(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; /* * Allocate 3M memory aligned to 1M from Frame Buffer (local @@ -168,22 +165,30 @@ static int psp_tmr_init(struct psp_context *psp) ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000, AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); - if (ret) - goto failed; + + return ret; +} + +static int psp_tmr_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr, 1); if (ret) - goto failed_mem; + goto failed; kfree(cmd); return 0; -failed_mem: - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); failed: kfree(cmd); return ret; @@ -203,104 +208,78 @@ static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; } -static int psp_asd_load(struct psp_context *psp) +static int psp_asd_init(struct psp_context *psp) { int ret; - struct amdgpu_bo *asd_bo, *asd_shared_bo; - uint64_t asd_mc_addr, asd_shared_mc_addr; - void *asd_buf, *asd_shared_buf; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; /* * Allocate 16k memory aligned to 4k from Frame Buffer (local * physical) for shared ASD <-> Driver */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &asd_shared_bo, &asd_shared_mc_addr, &asd_buf); - if (ret) - goto failed; + ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->asd_shared_bo, + &psp->asd_shared_mc_addr, + &psp->asd_shared_buf); - /* - * Allocate 256k memory aligned to 4k from Frame Buffer (local - * physical) for ASD firmware - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_BIN_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &asd_bo, &asd_mc_addr, &asd_buf); - if (ret) - goto failed_mem; + return ret; +} + +static int psp_asd_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; - memcpy(asd_buf, psp->asd_start_addr, psp->asd_ucode_size); + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size); - psp_prep_asd_cmd_buf(cmd, asd_mc_addr, asd_shared_mc_addr, + psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr, psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr, 2); - if (ret) - goto failed_mem1; - amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf); - amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf); kfree(cmd); - return 0; - -failed_mem1: - amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf); -failed_mem: - amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf); -failed: - kfree(cmd); return ret; } -static int psp_load_fw(struct amdgpu_device *adev) +static int psp_hw_start(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - int i; - struct amdgpu_firmware_info *ucode; - struct psp_context *psp = &adev->psp; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; ret = psp_bootloader_load_sysdrv(psp); if (ret) - goto failed; + return ret; ret = psp_bootloader_load_sos(psp); if (ret) - goto failed; - - ret = psp_ring_init(psp, PSP_RING_TYPE__KM); - if (ret) - goto failed; + return ret; - ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->fence_buf_bo, - &psp->fence_buf_mc_addr, - &psp->fence_buf); + ret = psp_ring_create(psp, PSP_RING_TYPE__KM); if (ret) - goto failed; - - memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); + return ret; - ret = psp_tmr_init(psp); + ret = psp_tmr_load(psp); if (ret) - goto failed_mem; + return ret; ret = psp_asd_load(psp); if (ret) - goto failed_mem; + return ret; + + return 0; +} + +static int psp_np_fw_load(struct psp_context *psp) +{ + int i, ret; + struct amdgpu_firmware_info *ucode; + struct amdgpu_device* adev = psp->adev; for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; @@ -310,15 +289,21 @@ static int psp_load_fw(struct amdgpu_device *adev) if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && psp_smu_reload_quirk(psp)) continue; + if (amdgpu_sriov_vf(adev) && + (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) + /*skip ucode loading in SRIOV VF */ + continue; - ret = psp_prep_cmd_buf(ucode, cmd); + ret = psp_prep_cmd_buf(ucode, psp->cmd); if (ret) - goto failed_mem; + return ret; - ret = psp_cmd_submit_buf(psp, ucode, cmd, + ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, psp->fence_buf_mc_addr, i + 3); if (ret) - goto failed_mem; + return ret; #if 0 /* check if firmware loaded sucessfully */ @@ -327,8 +312,59 @@ static int psp_load_fw(struct amdgpu_device *adev) #endif } - amdgpu_bo_free_kernel(&psp->fence_buf_bo, - &psp->fence_buf_mc_addr, &psp->fence_buf); + return 0; +} + +static int psp_load_fw(struct amdgpu_device *adev) +{ + int ret; + struct psp_context *psp = &adev->psp; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp->cmd = cmd; + + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + if (ret) + goto failed; + + ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->fence_buf_bo, + &psp->fence_buf_mc_addr, + &psp->fence_buf); + if (ret) + goto failed_mem1; + + memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); + + ret = psp_ring_init(psp, PSP_RING_TYPE__KM); + if (ret) + goto failed_mem1; + + ret = psp_tmr_init(psp); + if (ret) + goto failed_mem; + + ret = psp_asd_init(psp); + if (ret) + goto failed_mem; + + ret = psp_hw_start(psp); + if (ret) + goto failed_mem; + + ret = psp_np_fw_load(psp); + if (ret) + goto failed_mem; + kfree(cmd); return 0; @@ -336,6 +372,9 @@ static int psp_load_fw(struct amdgpu_device *adev) failed_mem: amdgpu_bo_free_kernel(&psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); +failed_mem1: + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); failed: kfree(cmd); return ret; @@ -379,12 +418,24 @@ static int psp_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) - amdgpu_ucode_fini_bo(adev); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + + amdgpu_ucode_fini_bo(adev); + + psp_ring_destroy(psp, PSP_RING_TYPE__KM); if (psp->tmr_buf) amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + if (psp->fw_pri_buf) + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); + + if (psp->fence_buf_bo) + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + return 0; } @@ -397,18 +448,30 @@ static int psp_resume(void *handle) { int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + DRM_INFO("PSP is resuming...\n"); + mutex_lock(&adev->firmware.mutex); - ret = psp_load_fw(adev); + ret = psp_hw_start(psp); if (ret) - DRM_ERROR("PSP resume failed\n"); + goto failed; + + ret = psp_np_fw_load(psp); + if (ret) + goto failed; mutex_unlock(&adev->firmware.mutex); + return 0; + +failed: + DRM_ERROR("PSP resume failed\n"); + mutex_unlock(&adev->firmware.mutex); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e9f35e025b59..0301e4e0b297 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -30,8 +30,8 @@ #define PSP_FENCE_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000 -#define PSP_ASD_BIN_SIZE 0x40000 #define PSP_ASD_SHARED_MEM_SIZE 0x4000 +#define PSP_1_MEG 0x100000 enum psp_ring_type { @@ -57,6 +57,7 @@ struct psp_context { struct amdgpu_device *adev; struct psp_ring km_ring; + struct psp_gfx_cmd_resp *cmd; int (*init_microcode)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); @@ -64,6 +65,9 @@ struct psp_context int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); + int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); + int (*ring_destroy)(struct psp_context *psp, + enum psp_ring_type ring_type); int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index); bool (*compare_sram_data)(struct psp_context *psp, @@ -71,6 +75,11 @@ struct psp_context enum AMDGPU_UCODE_ID ucode_type); bool (*smu_reload_quirk)(struct psp_context *psp); + /* fence buffer */ + struct amdgpu_bo *fw_pri_bo; + uint64_t fw_pri_mc_addr; + void *fw_pri_buf; + /* sos firmware */ const struct firmware *sos_fw; uint32_t sos_fw_version; @@ -85,12 +94,15 @@ struct psp_context uint64_t tmr_mc_addr; void *tmr_buf; - /* asd firmware */ + /* asd firmware and buffer */ const struct firmware *asd_fw; uint32_t asd_fw_version; uint32_t asd_feature_version; uint32_t asd_ucode_size; uint8_t *asd_start_addr; + struct amdgpu_bo *asd_shared_bo; + uint64_t asd_shared_mc_addr; + void *asd_shared_buf; /* fence buffer */ struct amdgpu_bo *fence_buf_bo; @@ -105,6 +117,8 @@ struct amdgpu_psp_funcs { #define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) #define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) +#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type)) +#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type))) #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) #define psp_compare_sram_data(psp, ucode, type) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 63e56398ca9a..944443c5b90a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -99,6 +99,7 @@ struct amdgpu_ring_funcs { uint32_t align_mask; u32 nop; bool support_64bit_ptrs; + unsigned vmhub; /* ring read/write ptr handling */ u64 (*get_rptr)(struct amdgpu_ring *ring); @@ -178,6 +179,7 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + unsigned vm_inv_eng; #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index de9f919ae336..5ca75a456ad2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -130,7 +130,7 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, return -EINVAL; } - r = amdgpu_bo_reserve(sa_manager->bo, false); + r = amdgpu_bo_reserve(sa_manager->bo, true); if (!r) { amdgpu_bo_kunmap(sa_manager->bo); amdgpu_bo_unpin(sa_manager->bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index ee9d0f346d75..8601904e670a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -190,26 +190,29 @@ TRACE_EVENT(amdgpu_sched_run_job, TRACE_EVENT(amdgpu_vm_grab_id, - TP_PROTO(struct amdgpu_vm *vm, int ring, struct amdgpu_job *job), + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_job *job), TP_ARGS(vm, ring, job), TP_STRUCT__entry( __field(struct amdgpu_vm *, vm) __field(u32, ring) - __field(u32, vmid) + __field(u32, vm_id) + __field(u32, vm_hub) __field(u64, pd_addr) __field(u32, needs_flush) ), TP_fast_assign( __entry->vm = vm; - __entry->ring = ring; - __entry->vmid = job->vm_id; + __entry->ring = ring->idx; + __entry->vm_id = job->vm_id; + __entry->vm_hub = ring->funcs->vmhub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), - TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx needs_flush=%u", - __entry->vm, __entry->ring, __entry->vmid, - __entry->pd_addr, __entry->needs_flush) + TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", + __entry->vm, __entry->ring, __entry->vm_id, + __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) ); TRACE_EVENT(amdgpu_vm_bo_map, @@ -331,21 +334,25 @@ TRACE_EVENT(amdgpu_vm_copy_ptes, ); TRACE_EVENT(amdgpu_vm_flush, - TP_PROTO(uint64_t pd_addr, unsigned ring, unsigned id), - TP_ARGS(pd_addr, ring, id), + TP_PROTO(struct amdgpu_ring *ring, unsigned vm_id, + uint64_t pd_addr), + TP_ARGS(ring, vm_id, pd_addr), TP_STRUCT__entry( - __field(u64, pd_addr) __field(u32, ring) - __field(u32, id) + __field(u32, vm_id) + __field(u32, vm_hub) + __field(u64, pd_addr) ), TP_fast_assign( + __entry->ring = ring->idx; + __entry->vm_id = vm_id; + __entry->vm_hub = ring->funcs->vmhub; __entry->pd_addr = pd_addr; - __entry->ring = ring; - __entry->id = id; ), - TP_printk("ring=%u, id=%u, pd_addr=%010Lx", - __entry->ring, __entry->id, __entry->pd_addr) + TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx", + __entry->ring, __entry->vm_id, + __entry->vm_hub,__entry->pd_addr) ); TRACE_EVENT(amdgpu_bo_list_set, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 35d53a0d9ba6..5db0230e45c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -203,7 +203,9 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, abo = container_of(bo, struct amdgpu_bo, tbo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (adev->mman.buffer_funcs_ring->ready == false) { + if (adev->mman.buffer_funcs && + adev->mman.buffer_funcs_ring && + adev->mman.buffer_funcs_ring->ready == false) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); } else { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); @@ -763,7 +765,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) { struct amdgpu_ttm_tt *gtt, *tmp; struct ttm_mem_reg bo_mem; - uint32_t flags; + uint64_t flags; int r; bo_mem.mem_type = TTM_PL_TT; @@ -1038,11 +1040,17 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { - if (bo->mem.mem_type == TTM_PL_VRAM && - bo->mem.start == AMDGPU_BO_INVALID_OFFSET) { - unsigned long num_pages = bo->mem.num_pages; - struct drm_mm_node *node = bo->mem.mm_node; + unsigned long num_pages = bo->mem.num_pages; + struct drm_mm_node *node = bo->mem.mm_node; + + if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET) + return ttm_bo_eviction_valuable(bo, place); + + switch (bo->mem.mem_type) { + case TTM_PL_TT: + return true; + case TTM_PL_VRAM: /* Check each drm MM node individually */ while (num_pages) { if (place->fpfn < (node->start + node->size) && @@ -1052,8 +1060,10 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, num_pages -= node->size; ++node; } + break; - return false; + default: + break; } return ttm_bo_eviction_valuable(bo, place); @@ -1188,7 +1198,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) return; amdgpu_ttm_debugfs_fini(adev); if (adev->stollen_vga_memory) { - r = amdgpu_bo_reserve(adev->stollen_vga_memory, false); + r = amdgpu_bo_reserve(adev->stollen_vga_memory, true); if (r == 0) { amdgpu_bo_unpin(adev->stollen_vga_memory); amdgpu_bo_unreserve(adev->stollen_vga_memory); @@ -1401,6 +1411,8 @@ error_free: #if defined(CONFIG_DEBUG_FS) +extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager + *man); static int amdgpu_mm_dump_table(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; @@ -1414,11 +1426,17 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) spin_lock(&glob->lru_lock); drm_mm_print(mm, &p); spin_unlock(&glob->lru_lock); - if (ttm_pl == TTM_PL_VRAM) + switch (ttm_pl) { + case TTM_PL_VRAM: seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", adev->mman.bdev.man[ttm_pl].size, (u64)atomic64_read(&adev->vram_usage) >> 20, (u64)atomic64_read(&adev->vram_vis_usage) >> 20); + break; + case TTM_PL_TT: + amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]); + break; + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index a1891c93cdbf..dfd1c98efa7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -382,10 +382,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) * if SMU loaded firmware, it needn't add SMC, UVD, and VCE * ucode info here */ - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) - adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4; - else + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + if (amdgpu_sriov_vf(adev)) + adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 3; + else + adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4; + } else { adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM; + } for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index c853400805d1..735c38d7db0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -955,11 +955,11 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; uint32_t rptr = amdgpu_ring_get_rptr(ring); unsigned i; - int r; + int r, timeout = adev->usec_timeout; - /* TODO: remove it if VCE can work for sriov */ + /* workaround VCE ring test slow issue for sriov*/ if (amdgpu_sriov_vf(adev)) - return 0; + timeout *= 10; r = amdgpu_ring_alloc(ring, 16); if (r) { @@ -970,13 +970,13 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, VCE_CMD_END); amdgpu_ring_commit(ring); - for (i = 0; i < adev->usec_timeout; i++) { + for (i = 0; i < timeout; i++) { if (amdgpu_ring_get_rptr(ring) != rptr) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { + if (i < timeout) { DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { @@ -999,10 +999,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *fence = NULL; long r; - /* TODO: remove it if VCE can work for sriov */ - if (amdgpu_sriov_vf(ring->adev)) - return 0; - /* skip vce ring1/2 ib test for now, since it's not reliable */ if (ring != &ring->adev->vce.ring[0]) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index ba8b8ae6234f..6bf5cea294f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -225,3 +225,49 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev) return 0; } + +/** + * amdgpu_virt_alloc_mm_table() - alloc memory for mm table + * @amdgpu: amdgpu device. + * MM table is used by UVD and VCE for its initialization + * Return: Zero if allocate success. + */ +int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr) + return 0; + + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->virt.mm_table.bo, + &adev->virt.mm_table.gpu_addr, + (void *)&adev->virt.mm_table.cpu_addr); + if (r) { + DRM_ERROR("failed to alloc mm table and error = %d.\n", r); + return r; + } + + memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); + DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n", + adev->virt.mm_table.gpu_addr, + adev->virt.mm_table.cpu_addr); + return 0; +} + +/** + * amdgpu_virt_free_mm_table() - free mm table memory + * @amdgpu: amdgpu device. + * Free MM table memory + */ +void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr) + return; + + amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, + &adev->virt.mm_table.gpu_addr, + (void *)&adev->virt.mm_table.cpu_addr); + adev->virt.mm_table.gpu_addr = 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 1ee0a190b33b..a8ed162cc0bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -98,5 +98,7 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); +int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); +void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7ed5302b511a..07ff3b1514f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -406,6 +406,8 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; struct dma_fence *updates = sync->last_vm_update; struct amdgpu_vm_id *id, *idle; @@ -413,16 +415,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; int r = 0; - fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids, - GFP_KERNEL); + fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); if (!fences) return -ENOMEM; - mutex_lock(&adev->vm_manager.lock); + mutex_lock(&id_mgr->lock); /* Check if we have an idle VMID */ i = 0; - list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) { + list_for_each_entry(idle, &id_mgr->ids_lru, list) { fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); if (!fences[i]) break; @@ -430,7 +431,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } /* If we can't find a idle VMID to use, wait till one becomes available */ - if (&idle->list == &adev->vm_manager.ids_lru) { + if (&idle->list == &id_mgr->ids_lru) { u64 fence_context = adev->vm_manager.fence_context + ring->idx; unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; struct dma_fence_array *array; @@ -455,25 +456,19 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - mutex_unlock(&adev->vm_manager.lock); + mutex_unlock(&id_mgr->lock); return 0; } kfree(fences); - job->vm_needs_flush = true; + job->vm_needs_flush = false; /* Check if we can use a VMID already assigned to this VM */ - i = ring->idx; - do { + list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { struct dma_fence *flushed; - - id = vm->ids[i++]; - if (i == AMDGPU_MAX_RINGS) - i = 0; + bool needs_flush = false; /* Check all the prerequisites to using this VMID */ - if (!id) - continue; if (amdgpu_vm_had_gpu_reset(adev, id)) continue; @@ -483,16 +478,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (job->vm_pd_addr != id->pd_gpu_addr) continue; - if (!id->last_flush) - continue; - - if (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush)) - continue; + if (!id->last_flush || + (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush))) + needs_flush = true; flushed = id->flushed_updates; - if (updates && - (!flushed || dma_fence_is_later(updates, flushed))) + if (updates && (!flushed || dma_fence_is_later(updates, flushed))) + needs_flush = true; + + /* Concurrent flushes are only possible starting with Vega10 */ + if (adev->asic_type < CHIP_VEGA10 && needs_flush) continue; /* Good we can use this VMID. Remember this submission as @@ -502,17 +498,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - list_move_tail(&id->list, &adev->vm_manager.ids_lru); - vm->ids[ring->idx] = id; - - job->vm_id = id - adev->vm_manager.ids; - job->vm_needs_flush = false; - trace_amdgpu_vm_grab_id(vm, ring->idx, job); + if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } - mutex_unlock(&adev->vm_manager.lock); - return 0; + if (needs_flush) + goto needs_flush; + else + goto no_flush_needed; - } while (i != ring->idx); + }; /* Still no ID to use? Then use the idle one found earlier */ id = idle; @@ -522,23 +518,25 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - dma_fence_put(id->last_flush); - id->last_flush = NULL; - + id->pd_gpu_addr = job->vm_pd_addr; dma_fence_put(id->flushed_updates); id->flushed_updates = dma_fence_get(updates); - - id->pd_gpu_addr = job->vm_pd_addr; id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); - list_move_tail(&id->list, &adev->vm_manager.ids_lru); atomic64_set(&id->owner, vm->client_id); - vm->ids[ring->idx] = id; - job->vm_id = id - adev->vm_manager.ids; - trace_amdgpu_vm_grab_id(vm, ring->idx, job); +needs_flush: + job->vm_needs_flush = true; + dma_fence_put(id->last_flush); + id->last_flush = NULL; + +no_flush_needed: + list_move_tail(&id->list, &id_mgr->ids_lru); + + job->vm_id = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); error: - mutex_unlock(&adev->vm_manager.lock); + mutex_unlock(&id_mgr->lock); return r; } @@ -590,7 +588,9 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id]; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id]; bool gds_switch_needed = ring->funcs->emit_gds_switch && ( id->gds_base != job->gds_base || id->gds_size != job->gds_size || @@ -614,24 +614,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_pipeline_sync) + if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) amdgpu_ring_emit_pipeline_sync(ring); if (ring->funcs->emit_vm_flush && vm_flush_needed) { u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); struct dma_fence *fence; - trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id); + trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) return r; - mutex_lock(&adev->vm_manager.lock); + mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); id->last_flush = fence; - mutex_unlock(&adev->vm_manager.lock); + mutex_unlock(&id_mgr->lock); } if (gds_switch_needed) { @@ -666,9 +666,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) * * Reset saved GDW, GWS and OA to force switch on next flush. */ -void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id) +void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, + unsigned vmid) { - struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; id->gds_base = 0; id->gds_size = 0; @@ -1336,6 +1338,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, flags &= ~AMDGPU_PTE_MTYPE_MASK; flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); + if ((mapping->flags & AMDGPU_PTE_PRT) && + (adev->asic_type >= CHIP_VEGA10)) { + flags |= AMDGPU_PTE_PRT; + flags &= ~AMDGPU_PTE_VALID; + } + trace_amdgpu_vm_bo_update(mapping); pfn = mapping->offset >> PAGE_SHIFT; @@ -1629,8 +1637,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping, - 0, 0, &f); + r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, + mapping->start, mapping->last, + 0, 0, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2117,10 +2126,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) unsigned ring_instance; struct amdgpu_ring *ring; struct amd_sched_rq *rq; - int i, r; + int r; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - vm->ids[i] = NULL; vm->va = RB_ROOT; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); spin_lock_init(&vm->status_lock); @@ -2241,16 +2248,21 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) */ void amdgpu_vm_manager_init(struct amdgpu_device *adev) { - unsigned i; + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vm_id_manager *id_mgr = + &adev->vm_manager.id_mgr[i]; - INIT_LIST_HEAD(&adev->vm_manager.ids_lru); + mutex_init(&id_mgr->lock); + INIT_LIST_HEAD(&id_mgr->ids_lru); - /* skip over VMID 0, since it is the system VM */ - for (i = 1; i < adev->vm_manager.num_ids; ++i) { - amdgpu_vm_reset_id(adev, i); - amdgpu_sync_create(&adev->vm_manager.ids[i].active); - list_add_tail(&adev->vm_manager.ids[i].list, - &adev->vm_manager.ids_lru); + /* skip over VMID 0, since it is the system VM */ + for (j = 1; j < id_mgr->num_ids; ++j) { + amdgpu_vm_reset_id(adev, i, j); + amdgpu_sync_create(&id_mgr->ids[i].active); + list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); + } } adev->vm_manager.fence_context = @@ -2258,6 +2270,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) adev->vm_manager.seqno[i] = 0; + atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); @@ -2273,13 +2286,19 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) { - unsigned i; + unsigned i, j; - for (i = 0; i < AMDGPU_NUM_VM; ++i) { - struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vm_id_manager *id_mgr = + &adev->vm_manager.id_mgr[i]; - amdgpu_sync_free(&adev->vm_manager.ids[i].active); - dma_fence_put(id->flushed_updates); - dma_fence_put(id->last_flush); + mutex_destroy(&id_mgr->lock); + for (j = 0; j < AMDGPU_NUM_VM; ++j) { + struct amdgpu_vm_id *id = &id_mgr->ids[j]; + + amdgpu_sync_free(&id->active); + dma_fence_put(id->flushed_updates); + dma_fence_put(id->last_flush); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index d9e57290dc71..d97e28b4bdc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -65,7 +65,8 @@ struct amdgpu_bo_list_entry; #define AMDGPU_PTE_FRAG(x) ((x & 0x1fULL) << 7) -#define AMDGPU_PTE_PRT (1ULL << 63) +/* TILED for VEGA10, reserved for older ASICs */ +#define AMDGPU_PTE_PRT (1ULL << 51) /* VEGA10 only */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) @@ -114,9 +115,6 @@ struct amdgpu_vm { struct dma_fence *last_dir_update; uint64_t last_eviction_counter; - /* for id and flush management per ring */ - struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS]; - /* protecting freed */ spinlock_t freed_lock; @@ -149,12 +147,16 @@ struct amdgpu_vm_id { uint32_t oa_size; }; +struct amdgpu_vm_id_manager { + struct mutex lock; + unsigned num_ids; + struct list_head ids_lru; + struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; +}; + struct amdgpu_vm_manager { /* Handling of VMIDs */ - struct mutex lock; - unsigned num_ids; - struct list_head ids_lru; - struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; + struct amdgpu_vm_id_manager id_mgr[AMDGPU_MAX_VMHUBS]; /* Handling of VM fences */ u64 fence_context; @@ -200,7 +202,8 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync, struct dma_fence *fence, struct amdgpu_job *job); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); -void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); +void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, + unsigned vmid); int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 11ccda83d767..6dc1410b380f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -1267,30 +1267,33 @@ static int ci_dpm_set_fan_speed_percent(struct amdgpu_device *adev, static void ci_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode) { - if (mode) { - /* stop auto-manage */ + switch (mode) { + case AMD_FAN_CTRL_NONE: if (adev->pm.dpm.fan.ucode_fan_control) ci_fan_ctrl_stop_smc_fan_control(adev); - ci_fan_ctrl_set_static_mode(adev, mode); - } else { - /* restart auto-manage */ + ci_dpm_set_fan_speed_percent(adev, 100); + break; + case AMD_FAN_CTRL_MANUAL: + if (adev->pm.dpm.fan.ucode_fan_control) + ci_fan_ctrl_stop_smc_fan_control(adev); + break; + case AMD_FAN_CTRL_AUTO: if (adev->pm.dpm.fan.ucode_fan_control) ci_thermal_start_smc_fan_control(adev); - else - ci_fan_ctrl_set_default_mode(adev); + break; + default: + break; } } static u32 ci_dpm_get_fan_control_mode(struct amdgpu_device *adev) { struct ci_power_info *pi = ci_get_pi(adev); - u32 tmp; if (pi->fan_is_controlled_by_smc) - return 0; - - tmp = RREG32_SMC(ixCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK; - return (tmp >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT); + return AMD_FAN_CTRL_AUTO; + else + return AMD_FAN_CTRL_MANUAL; } #if 0 @@ -3036,6 +3039,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev, memory_clock, &memory_level->MinVddcPhases); + memory_level->EnabledForActivity = 1; memory_level->EnabledForThrottle = 1; memory_level->UpH = 0; memory_level->DownH = 100; @@ -3468,8 +3472,6 @@ static int ci_populate_all_memory_levels(struct amdgpu_device *adev) return ret; } - pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; - if ((dpm_table->mclk_table.count >= 2) && ((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) { pi->smc_state_table.MemoryLevel[1].MinVddc = diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index ba98d35340a3..0cdeb6a2e4a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2230,7 +2230,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, if (!atomic && fb && fb != crtc->primary->fb) { amdgpu_fb = to_amdgpu_framebuffer(fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; amdgpu_bo_unpin(abo); @@ -2589,7 +2589,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, unpin: if (amdgpu_crtc->cursor_bo) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - ret = amdgpu_bo_reserve(aobj, false); + ret = amdgpu_bo_reserve(aobj, true); if (likely(ret == 0)) { amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); @@ -2720,7 +2720,7 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc) amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); else { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index e59bc42df18c..773654a19749 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2214,7 +2214,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, if (!atomic && fb && fb != crtc->primary->fb) { amdgpu_fb = to_amdgpu_framebuffer(fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; amdgpu_bo_unpin(abo); @@ -2609,7 +2609,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, unpin: if (amdgpu_crtc->cursor_bo) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - ret = amdgpu_bo_reserve(aobj, false); + ret = amdgpu_bo_reserve(aobj, true); if (likely(ret == 0)) { amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); @@ -2740,7 +2740,7 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); else { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 307269bda4fa..1f3552967ba3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -979,7 +979,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, u32 priority_a_mark = 0, priority_b_mark = 0; u32 priority_a_cnt = PRIORITY_OFF; u32 priority_b_cnt = PRIORITY_OFF; - u32 tmp, arb_control3; + u32 tmp, arb_control3, lb_vblank_lead_lines = 0; fixed20_12 a, b, c; if (amdgpu_crtc->base.enabled && num_heads && mode) { @@ -1091,6 +1091,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, c.full = dfixed_div(c, a); priority_b_mark = dfixed_trunc(c); priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK; + + lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); } /* select wm A */ @@ -1120,6 +1122,9 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; amdgpu_crtc->wm_high = latency_watermark_a; + + /* Save number of lines the linebuffer leads before the scanout */ + amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } /* watermark setup */ @@ -1640,7 +1645,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, if (!atomic && fb && fb != crtc->primary->fb) { amdgpu_fb = to_amdgpu_framebuffer(fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; amdgpu_bo_unpin(abo); @@ -1957,7 +1962,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, unpin: if (amdgpu_crtc->cursor_bo) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - ret = amdgpu_bo_reserve(aobj, false); + ret = amdgpu_bo_reserve(aobj, true); if (likely(ret == 0)) { amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); @@ -2083,7 +2088,7 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc) amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); else { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 6df7a28e8aac..3c558c170e5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2089,7 +2089,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, if (!atomic && fb && fb != crtc->primary->fb) { amdgpu_fb = to_amdgpu_framebuffer(fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; amdgpu_bo_unpin(abo); @@ -2440,7 +2440,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, unpin: if (amdgpu_crtc->cursor_bo) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - ret = amdgpu_bo_reserve(aobj, false); + ret = amdgpu_bo_reserve(aobj, true); if (likely(ret == 0)) { amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); @@ -2571,7 +2571,7 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc) amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); else { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 81a24b6b4846..f1b479b6ac98 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -248,7 +248,7 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc) amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); abo = gem_to_amdgpu_bo(amdgpu_fb->obj); - r = amdgpu_bo_reserve(abo, false); + r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); else { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 4c4874fdf59f..a125f9d44577 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1579,7 +1579,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) static void gfx_v6_0_config_init(struct amdgpu_device *adev) { - adev->gfx.config.double_offchip_lds_buf = 1; + adev->gfx.config.double_offchip_lds_buf = 0; } static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) @@ -2437,7 +2437,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) int r; if (adev->gfx.rlc.save_restore_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false); + r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r); amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj); @@ -2448,7 +2448,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) } if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); @@ -2459,7 +2459,7 @@ static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) } if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); + r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); @@ -3292,7 +3292,7 @@ static int gfx_v6_0_sw_init(void *handle) ring->me = 1; ring->pipe = i; ring->queue = i; - sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 8a8bc2fe6f2e..ee2f2139e2eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1935,7 +1935,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) INDEX_STRIDE, 3); mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { if (i == 0) sh_mem_base = 0; else @@ -2792,7 +2792,7 @@ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, false); + r = amdgpu_bo_reserve(ring->mqd_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); @@ -2810,7 +2810,7 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) int r; if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); @@ -3359,7 +3359,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) /* save restore block */ if (adev->gfx.rlc.save_restore_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false); + r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r); amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj); @@ -3371,7 +3371,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) /* clear state block */ if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); @@ -3383,7 +3383,7 @@ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) /* clear state block */ if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); + r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index dad8a4cd1b37..758d636a6f52 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1239,7 +1239,7 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) /* clear state block */ if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); @@ -1250,7 +1250,7 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) /* jump table block */ if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); + r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); @@ -1363,7 +1363,7 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) int r; if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); @@ -1490,7 +1490,7 @@ static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) memset(hpd, 0, MEC_HPD_SIZE); - r = amdgpu_bo_reserve(kiq->eop_obj, false); + r = amdgpu_bo_reserve(kiq->eop_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); amdgpu_bo_kunmap(kiq->eop_obj); @@ -1932,6 +1932,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) case 0xca: case 0xce: case 0x88: + case 0xe6: /* B6 */ adev->gfx.config.max_cu_per_sh = 6; break; @@ -1964,17 +1965,28 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_backends_per_se = 1; switch (adev->pdev->revision) { + case 0x80: + case 0x81: case 0xc0: case 0xc1: case 0xc2: case 0xc4: case 0xc8: case 0xc9: + case 0xd6: + case 0xda: + case 0xe9: + case 0xea: adev->gfx.config.max_cu_per_sh = 3; break; + case 0x83: case 0xd0: case 0xd1: case 0xd2: + case 0xd4: + case 0xdb: + case 0xe1: + case 0xe2: default: adev->gfx.config.max_cu_per_sh = 2; break; @@ -3890,7 +3902,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, INDEX_STRIDE, 3); mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { vi_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ if (i == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a447b70841c9..0c16b7563b73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -39,7 +39,6 @@ #define GFX9_NUM_GFX_RINGS 1 #define GFX9_NUM_COMPUTE_RINGS 8 -#define GFX9_NUM_SE 4 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); @@ -453,7 +452,7 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) int r; if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); @@ -463,7 +462,7 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) adev->gfx.mec.hpd_eop_obj = NULL; } if (adev->gfx.mec.mec_fw_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); + r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r); amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); @@ -599,7 +598,7 @@ static int gfx_v9_0_kiq_init(struct amdgpu_device *adev) memset(hpd, 0, MEC_HPD_SIZE); - r = amdgpu_bo_reserve(kiq->eop_obj, false); + r = amdgpu_bo_reserve(kiq->eop_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); amdgpu_bo_kunmap(kiq->eop_obj); @@ -631,7 +630,6 @@ static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev, ring->pipe = 1; } - irq->data = ring; ring->queue = 0; ring->eop_gpu_addr = kiq->eop_gpu_addr; sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); @@ -647,7 +645,6 @@ static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring, { amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); - irq->data = NULL; } /* create MQD for each compute queue */ @@ -705,19 +702,19 @@ static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev) static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX), + WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (address << SQ_IND_INDEX__INDEX__SHIFT) | (SQ_IND_INDEX__FORCE_READ_MASK)); - return RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA)); + return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t regno, uint32_t num, uint32_t *out) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX), + WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (regno << SQ_IND_INDEX__INDEX__SHIFT) | @@ -725,7 +722,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, (SQ_IND_INDEX__FORCE_READ_MASK) | (SQ_IND_INDEX__AUTO_INCR_MASK)); while (num--) - *(out++) = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA)); + *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) @@ -774,7 +771,6 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: adev->gfx.config.max_shader_engines = 4; - adev->gfx.config.max_tile_pipes = 8; //?? adev->gfx.config.max_cu_per_sh = 16; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 4; @@ -787,6 +783,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + adev->gfx.config.gs_vgt_table_depth = 32; + adev->gfx.config.gs_prim_buffer_depth = 1792; gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; break; default: @@ -801,6 +799,10 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES); + + adev->gfx.config.max_tile_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + adev->gfx.config.gb_addr_config_fields.num_banks = 1 << REG_GET_FIELD( adev->gfx.config.gb_addr_config, @@ -841,7 +843,7 @@ static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, } size_se = size_se ? size_se : default_size_se; - ngg_buf->size = size_se * GFX9_NUM_SE; + ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; r = amdgpu_bo_create_kernel(adev, ngg_buf->size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &ngg_buf->bo, @@ -888,7 +890,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; /* Primitive Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM], + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], amdgpu_prim_buf_per_se, 64 * 1024); if (r) { @@ -897,7 +899,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) } /* Position Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS], + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], amdgpu_pos_buf_per_se, 256 * 1024); if (r) { @@ -906,7 +908,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) } /* Control Sideband */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL], + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], amdgpu_cntl_sb_buf_per_se, 256); if (r) { @@ -918,7 +920,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) if (amdgpu_param_buf_per_se <= 0) goto out; - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM], + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], amdgpu_param_buf_per_se, 512 * 1024); if (r) { @@ -947,47 +949,47 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) /* Program buffer size */ data = 0; - size = adev->gfx.ngg.buf[PRIM].size / 256; + size = adev->gfx.ngg.buf[NGG_PRIM].size / 256; data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); - size = adev->gfx.ngg.buf[POS].size / 256; + size = adev->gfx.ngg.buf[NGG_POS].size / 256; data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size); - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_1), data); + WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); data = 0; - size = adev->gfx.ngg.buf[CNTL].size / 256; + size = adev->gfx.ngg.buf[NGG_CNTL].size / 256; data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); - size = adev->gfx.ngg.buf[PARAM].size / 1024; + size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024; data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size); - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_2), data); + WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); /* Program buffer base address */ - base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); + base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE), data); + WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); - base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); + base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE_HI), data); + WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); - base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); + base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE), data); + WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); - base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); + base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE_HI), data); + WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); - base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); + base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE), data); + WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); - base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); + base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); - WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI), data); + WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); /* Clear GDS reserved memory */ r = amdgpu_ring_alloc(ring, 17); @@ -1096,7 +1098,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->pipe = i / 8; ring->queue = i % 8; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -1203,7 +1205,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); } - WREG32( SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); } static u32 gfx_v9_0_create_bitmask(u32 bit_width) @@ -1215,8 +1217,8 @@ static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_RB_BACKEND_DISABLE)); - data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_RB_BACKEND_DISABLE)); + data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); + data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; @@ -1276,8 +1278,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1304,8 +1306,8 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) tmp = 0; tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), tmp); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), 0); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); } soc15_grbm_select(adev, 0, 0, 0, 0); @@ -1320,7 +1322,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) */ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - WREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE), + WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, (adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_prim_fifo_size_backend << @@ -1343,7 +1345,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); for (k = 0; k < adev->usec_timeout; k++) { - if (RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY)) == 0) + if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; udelay(1); } @@ -1357,7 +1359,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; for (k = 0; k < adev->usec_timeout; k++) { - if ((RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY)) & mask) == 0) + if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) break; udelay(1); } @@ -1366,7 +1368,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); if (enable) return; @@ -1376,15 +1378,15 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), tmp); + WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) { - u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp); + WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); gfx_v9_0_enable_gui_idle_interrupt(adev, false); @@ -1415,17 +1417,17 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) #ifdef AMDGPU_RLC_DEBUG_RETRY /* RLC_GPM_GENERAL_6 : RLC Ucode version */ - rlc_ucode_ver = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6)); + rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); if(rlc_ucode_ver == 0x108) { DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", rlc_ucode_ver, adev->gfx.rlc_fw_version); /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, * default is 0x9C4 to create a 100us interval */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3), 0x9C4); + WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr * to disable the page fault retry interrupts, default is * 0x100 (256) */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12), 0x100); + WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); } #endif } @@ -1446,11 +1448,11 @@ static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) le32_to_cpu(hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, RLCG_UCODE_LOADING_START_ADDRESS); for (i = 0; i < fw_size; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA), le32_to_cpup(fw_data++)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), adev->gfx.rlc_fw_version); + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); return 0; } @@ -1465,10 +1467,10 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) gfx_v9_0_rlc_stop(adev); /* disable CG */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), 0); + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); /* disable PG */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), 0); + WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); gfx_v9_0_rlc_reset(adev); @@ -1487,7 +1489,7 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { int i; - u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)); + u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); @@ -1496,7 +1498,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].ready = false; } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp); + WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); udelay(50); } @@ -1529,30 +1531,30 @@ static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) (adev->gfx.pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), 0); + WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); for (i = 0; i < fw_size; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA), le32_to_cpup(fw_data++)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), adev->gfx.pfp_fw_version); + WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); /* CE */ fw_data = (const __le32 *) (adev->gfx.ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), 0); + WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); for (i = 0; i < fw_size; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA), le32_to_cpup(fw_data++)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), adev->gfx.ce_fw_version); + WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); /* ME */ fw_data = (const __le32 *) (adev->gfx.me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), 0); + WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); for (i = 0; i < fw_size; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_DATA), le32_to_cpup(fw_data++)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), adev->gfx.me_fw_version); + WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); + WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); return 0; } @@ -1594,8 +1596,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) int r, i; /* init the CP */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT), adev->gfx.config.max_hw_contexts - 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID), 1); + WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); + WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); gfx_v9_0_cp_gfx_enable(adev, true); @@ -1650,10 +1652,10 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) u64 rb_addr, rptr_addr, wptr_gpu_addr; /* Set the write pointer delay */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY), 0); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); /* set the RB to use vmid 0 */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID), 0); + WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); /* Set ring buffer size */ ring = &adev->gfx.gfx_ring[0]; @@ -1663,30 +1665,30 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); #endif - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp); + WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); /* Initialize the ring buffer's write pointers */ ring->wptr = 0; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); /* set the wb address wether it's enabled or not */ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR), lower_32_bits(rptr_addr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI), upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); mdelay(1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp); + WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); rb_addr = ring->gpu_addr >> 8; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE), rb_addr); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI), upper_32_bits(rb_addr)); + WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); + WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); if (ring->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, ring->doorbell_index); @@ -1695,13 +1697,13 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) } else { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL), tmp); + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER, ring->doorbell_index); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER), tmp); + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER), + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); @@ -1717,9 +1719,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) int i; if (enable) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 0); + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); } else { - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].ready = false; @@ -1756,21 +1758,21 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) tmp = 0; tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_CNTL), tmp); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_LO), + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_HI), + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); /* MEC1 */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR), + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, mec_hdr->jt_offset); for (i = 0; i < mec_hdr->jt_size; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA), + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR), + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ @@ -1785,7 +1787,7 @@ static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, false); + r = amdgpu_bo_reserve(ring->mqd_obj, true); if (unlikely(r != 0)) dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); @@ -1823,12 +1825,12 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* tell RLC which is KIQ queue */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) @@ -1898,14 +1900,14 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); if (ring->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -1935,7 +1937,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -1945,7 +1947,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -1973,7 +1975,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = 0; /* enable the doorbell if requested */ if (ring->use_doorbell) { - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, ring->doorbell_index); @@ -1989,15 +1991,20 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); mqd->cp_hqd_persistent_state = tmp; + /* set MIN_IB_AVAIL_SIZE */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + mqd->cp_hqd_ib_control = tmp; + /* activate the queue */ mqd->cp_hqd_active = 1; @@ -2013,94 +2020,94 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ - if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1); + if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1)) + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), + WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); /* set the wb address whether it's enabled or not */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ if (ring->use_doorbell) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER), + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (AMDGPU_DOORBELL64_KIQ *2) << 2); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER), + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2); } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid); + WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), + WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); if (ring->use_doorbell) @@ -2323,7 +2330,7 @@ static bool gfx_v9_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)), + if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) return false; else @@ -2338,7 +2345,7 @@ static int gfx_v9_0_wait_for_idle(void *handle) for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)) & + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK; if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) @@ -2355,7 +2362,7 @@ static int gfx_v9_0_soft_reset(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* GRBM_STATUS */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)); + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | @@ -2374,7 +2381,7 @@ static int gfx_v9_0_soft_reset(void *handle) } /* GRBM_STATUS2 */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)); + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); @@ -2391,17 +2398,17 @@ static int gfx_v9_0_soft_reset(void *handle) gfx_v9_0_cp_compute_enable(adev, false); if (grbm_soft_reset) { - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); tmp |= grbm_soft_reset; dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); udelay(50); tmp &= ~grbm_soft_reset; - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); } /* Wait a little for things to settle down */ @@ -2415,9 +2422,9 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT), 1); - clock = (uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB)) | - ((uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB)) << 32ULL); + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } @@ -2497,7 +2504,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) return; /* if RLC is not enabled, do nothing */ - rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) return; @@ -2506,7 +2513,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) AMD_CG_SUPPORT_GFX_3D_CGCG)) { data = RLC_SAFE_MODE__CMD_MASK; data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data); + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); /* wait for RLC_SAFE_MODE */ for (i = 0; i < adev->usec_timeout; i++) { @@ -2526,7 +2533,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) return; /* if RLC is not enabled, do nothing */ - rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) return; @@ -2537,7 +2544,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) * mode. */ data = RLC_SAFE_MODE__CMD_MASK; - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data); + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); adev->gfx.rlc.in_safe_mode = false; } } @@ -2550,7 +2557,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | @@ -2560,48 +2567,48 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); /* MGLS is a global flag to control all MGLS in GFX */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { /* 2 - RLC memory Light sleep */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); + def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data); + WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); } /* 3 - CP memory Light sleep */ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); + def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data); + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } } else { /* 1 - MGCG_OVERRIDE */ - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); /* 2 - disable MGLS in RLC */ - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); + data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data); + WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); } /* 3 - disable MGLS in CP */ - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data); + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } } @@ -2616,37 +2623,37 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, /* Enable 3D CGCG/CGLS */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { /* write cmd to clear cgcg/cgls ov */ - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; /* update CGCG and CGLS override bits */ if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); /* enable 3Dcgcg FSM(0x0020003f) */ - def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data); + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); /* set IDLE_POLL_COUNT(0x00900100) */ - def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); + def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); } else { /* Disable CGCG/CGLS */ - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); /* disable cgcg, cgls should be disabled */ data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); /* disable cgcg and cgls in FSM */ if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data); + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } adev->gfx.rlc.funcs->exit_safe_mode(adev); @@ -2660,7 +2667,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev adev->gfx.rlc.funcs->enter_safe_mode(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) @@ -2669,31 +2676,31 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; /* update CGCG and CGLS override bits */ if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); /* enable cgcg FSM(0x0020003F) */ - def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); + def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data); + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); /* set IDLE_POLL_COUNT(0x00900100) */ - def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); + def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); } else { - def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); + def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); /* reset CGCG/CGLS bits */ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); /* disable cgcg and cgls in FSM */ if (def != data) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data); + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } adev->gfx.rlc.funcs->exit_safe_mode(adev); @@ -2740,6 +2747,9 @@ static int gfx_v9_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_VEGA10: gfx_v9_0_update_gfx_clock_gating(adev, @@ -2760,12 +2770,12 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) *flags = 0; /* AMD_CG_SUPPORT_GFX_MGCG */ - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) *flags |= AMD_CG_SUPPORT_GFX_MGCG; /* AMD_CG_SUPPORT_GFX_CGCG */ - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CGCG; @@ -2774,17 +2784,17 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_GFX_CGLS; /* AMD_CG_SUPPORT_GFX_RLC_LS */ - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); + data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_CP_LS */ - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; @@ -2807,8 +2817,8 @@ static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) if (ring->use_doorbell) { wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); } else { - wptr = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR)); - wptr += (u64)RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI)) << 32; + wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); + wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; } return wptr; @@ -2823,8 +2833,8 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); } } @@ -2956,35 +2966,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; - unsigned i; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_lo32 - + (2 * vm_id), - lower_32_bits(pd_addr)); + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->ctx0_ptb_addr_lo32 + (2 * vm_id), + lower_32_bits(pd_addr)); - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_hi32 - + (2 * vm_id), - upper_32_bits(pd_addr)); + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->ctx0_ptb_addr_hi32 + (2 * vm_id), + upper_32_bits(pd_addr)); - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->vm_inv_eng0_req + eng, req); + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->vm_inv_eng0_req + eng, req); - /* wait for the invalidate to complete */ - gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + - eng, 0, 1 << vm_id, 1 << vm_id, 0x20); - } + /* wait for the invalidate to complete */ + gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + + eng, 0, 1 << vm_id, 1 << vm_id, 0x20); /* compute doesn't have PFP */ if (usepfp) { @@ -3373,9 +3377,7 @@ static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { uint32_t tmp, target; - struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data; - - BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); if (ring->me == 1) target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); @@ -3386,20 +3388,20 @@ static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev, switch (type) { case AMDGPU_CP_KIQ_IRQ_DRIVER0: if (state == AMDGPU_IRQ_STATE_DISABLE) { - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, GENERIC2_INT_ENABLE, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp); + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); tmp = RREG32(target); tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, GENERIC2_INT_ENABLE, 0); WREG32(target, tmp); } else { - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, GENERIC2_INT_ENABLE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp); + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); tmp = RREG32(target); tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, @@ -3419,9 +3421,7 @@ static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { u8 me_id, pipe_id, queue_id; - struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data; - - BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; @@ -3456,13 +3456,14 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ 7 + /* PIPELINE_SYNC */ - 46 + /* VM_FLUSH */ + 24 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -3500,6 +3501,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -3508,7 +3510,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ - 64 + /* gfx_v9_0_ring_emit_vm_flush */ + 24 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, @@ -3529,6 +3531,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -3537,7 +3540,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ - 64 + /* gfx_v9_0_ring_emit_vm_flush */ + 24 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, @@ -3612,7 +3615,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE)); + adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; @@ -3641,8 +3644,8 @@ static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG)); - data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG)); + data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; @@ -3763,25 +3766,25 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); eop_gpu_addr >>= 8; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), lower_32_bits(eop_gpu_addr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), upper_32_bits(eop_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(MEC_HPD_SIZE / 4) - 1)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), tmp); + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp); /* enable doorbell? */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); if (use_doorbell) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); else tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), tmp); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); mqd->cp_hqd_pq_doorbell_control = tmp; /* disable the queue if it's active */ @@ -3790,40 +3793,40 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) mqd->cp_hqd_pq_rptr = 0; mqd->cp_hqd_pq_wptr_lo = 0; mqd->cp_hqd_pq_wptr_hi = 0; - if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1); + if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1)) + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), mqd->cp_hqd_dequeue_request); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), mqd->cp_hqd_pq_rptr); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi); + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), mqd->cp_mqd_base_addr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), mqd->cp_mqd_base_addr_hi); + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), tmp); + WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp); mqd->cp_mqd_control = tmp; /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ hqd_gpu_addr = ring->gpu_addr >> 8; mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), mqd->cp_hqd_pq_base_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), mqd->cp_hqd_pq_base_hi); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -3835,7 +3838,7 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), tmp); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp); mqd->cp_hqd_pq_control = tmp; /* set the wb address wether it's enabled or not */ @@ -3843,27 +3846,27 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ if (use_doorbell) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER), + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (AMDGPU_DOORBELL64_KIQ * 2) << 2); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER), + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, ring->doorbell_index); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); @@ -3874,25 +3877,25 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) } else { mqd->cp_hqd_pq_doorbell_control = 0; } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid); + WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE)); + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), tmp); + WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp); mqd->cp_hqd_persistent_state = tmp; /* activate the queue */ mqd->cp_hqd_active = 1; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), mqd->cp_hqd_active); + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 631aef38126d..a572979f186c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -346,7 +346,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; @@ -621,7 +622,7 @@ static int gmc_v6_0_vm_init(struct amdgpu_device *adev) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 92abe12d92bb..a9083a16a250 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -395,7 +395,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; @@ -746,7 +747,7 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index f2ccefc66fd4..4ac99784160a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -557,7 +557,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; @@ -949,7 +950,7 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3b045e0b114e..dc1e1c1d6b24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -386,6 +386,23 @@ static int gmc_v9_0_early_init(void *handle) static int gmc_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 3, 3 }; + unsigned i; + + for(i = 0; i < adev->num_rings; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + unsigned vmhub = ring->funcs->vmhub; + + ring->vm_inv_eng = vm_inv_eng[vmhub]++; + dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", + ring->idx, ring->name, ring->vm_inv_eng, + ring->funcs->vmhub); + } + + /* Engine 17 is used for GART flushes */ + for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) + BUG_ON(vm_inv_eng[i] > 17); + return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); } @@ -469,7 +486,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; @@ -519,7 +537,8 @@ static int gmc_v9_0_vm_init(struct amdgpu_device *adev) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; /* TODO: fix num_level for APU when updating vm size and block size */ if (adev->flags & AMD_IS_APU) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 62684510ddcd..dbfe48d1207a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -511,6 +511,9 @@ static int mmhub_v1_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_VEGA10: mmhub_v1_0_update_medium_grain_clock_gating(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h index 5f0fc8bf16a9..8af0bddf85e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h @@ -84,4 +84,61 @@ struct mmsch_v1_0_cmd_indirect_write { uint32_t reg_value; }; +static inline void mmsch_v1_0_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt, + uint32_t *init_table, + uint32_t reg_offset, + uint32_t value) +{ + direct_wt->cmd_header.reg_offset = reg_offset; + direct_wt->reg_value = value; + memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write)); +} + +static inline void mmsch_v1_0_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt, + uint32_t *init_table, + uint32_t reg_offset, + uint32_t mask, uint32_t data) +{ + direct_rd_mod_wt->cmd_header.reg_offset = reg_offset; + direct_rd_mod_wt->mask_value = mask; + direct_rd_mod_wt->write_data = data; + memcpy((void *)init_table, direct_rd_mod_wt, + sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)); +} + +static inline void mmsch_v1_0_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll, + uint32_t *init_table, + uint32_t reg_offset, + uint32_t mask, uint32_t wait) +{ + direct_poll->cmd_header.reg_offset = reg_offset; + direct_poll->mask_value = mask; + direct_poll->wait_value = wait; + memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling)); +} + +#define MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ + mmsch_v1_0_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \ + init_table, (reg), \ + (mask), (data)); \ + init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ + table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ +} + +#define MMSCH_V1_0_INSERT_DIRECT_WT(reg, value) { \ + mmsch_v1_0_insert_direct_wt(&direct_wt, \ + init_table, (reg), \ + (value)); \ + init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ + table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ +} + +#define MMSCH_V1_0_INSERT_DIRECT_POLL(reg, mask, wait) { \ + mmsch_v1_0_insert_direct_poll(&direct_poll, \ + init_table, (reg), \ + (mask), (wait)); \ + init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ + table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 70a3dd13cb02..7bdc51b02326 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -368,9 +368,12 @@ static int xgpu_vi_mailbox_rcv_msg(struct amdgpu_device *adev, u32 reg; u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID); - reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); - if (!(reg & mask)) - return -ENOENT; + /* workaround: host driver doesn't set VALID for CMPL now */ + if (event != IDH_FLR_NOTIFICATION_CMPL) { + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); + if (!(reg & mask)) + return -ENOENT; + } reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0); if (reg != event) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index c3588d1c7cb0..60a6407ba267 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -166,11 +166,8 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) { int ret; uint32_t psp_gfxdrv_command_reg = 0; - struct amdgpu_bo *psp_sysdrv; - void *psp_sysdrv_virt = NULL; - uint64_t psp_sysdrv_mem; struct amdgpu_device *adev = psp->adev; - uint32_t size, sol_reg; + uint32_t sol_reg; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. @@ -185,27 +182,14 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) if (ret) return ret; - /* - * Create a 1 meg GART memory to store the psp sys driver - * binary with a 1 meg aligned address - */ - size = (psp->sys_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) & - (~(PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)); - - ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT, - AMDGPU_GEM_DOMAIN_GTT, - &psp_sysdrv, - &psp_sysdrv_mem, - &psp_sysdrv_virt); - if (ret) - return ret; + memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy PSP System Driver binary to memory */ - memcpy(psp_sysdrv_virt, psp->sys_start_addr, psp->sys_bin_size); + memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); /* Provide the sys driver to bootrom */ WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), - (uint32_t)(psp_sysdrv_mem >> 20)); + (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 1 << 16; WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), psp_gfxdrv_command_reg); @@ -216,8 +200,6 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 0x80000000, 0x80000000, false); - amdgpu_bo_free_kernel(&psp_sysdrv, &psp_sysdrv_mem, &psp_sysdrv_virt); - return ret; } @@ -225,11 +207,8 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) { int ret; unsigned int psp_gfxdrv_command_reg = 0; - struct amdgpu_bo *psp_sos; - void *psp_sos_virt = NULL; - uint64_t psp_sos_mem; struct amdgpu_device *adev = psp->adev; - uint32_t size, sol_reg; + uint32_t sol_reg; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. @@ -244,23 +223,14 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) if (ret) return ret; - size = (psp->sos_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) & - (~((uint64_t)PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)); - - ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT, - AMDGPU_GEM_DOMAIN_GTT, - &psp_sos, - &psp_sos_mem, - &psp_sos_virt); - if (ret) - return ret; + memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy Secure OS binary to PSP memory */ - memcpy(psp_sos_virt, psp->sos_start_addr, psp->sos_bin_size); + memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); /* Provide the PSP secure OS to bootrom */ WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), - (uint32_t)(psp_sos_mem >> 20)); + (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 2 << 16; WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), psp_gfxdrv_command_reg); @@ -273,8 +243,6 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) 0, true); #endif - amdgpu_bo_free_kernel(&psp_sos, &psp_sos_mem, &psp_sos_virt); - return ret; } @@ -300,7 +268,6 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) { int ret = 0; - unsigned int psp_ring_reg = 0; struct psp_ring *ring; struct amdgpu_device *adev = psp->adev; @@ -320,6 +287,16 @@ int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) return ret; } + return 0; +} + +int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg); @@ -344,6 +321,33 @@ int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) return ret; } +int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + unsigned int psp_ring_reg = 0; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + /* Write the ring destroy command to C2PMSG_64 */ + psp_ring_reg = 3 << 16; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + if (ring->ring_mem) + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + return ret; +} + int psp_v3_1_cmd_submit(struct psp_context *psp, struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h index e82eff741a08..9dcd0b25c4c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h @@ -39,6 +39,10 @@ extern int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd); extern int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type); +extern int psp_v3_1_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type); +extern int psp_v3_1_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type); extern int psp_v3_1_cmd_submit(struct psp_context *psp, struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 21f38d882335..ecc70a730a54 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -48,8 +48,7 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static const u32 golden_settings_sdma_4[] = -{ +static const u32 golden_settings_sdma_4[] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, @@ -76,8 +75,7 @@ static const u32 golden_settings_sdma_4[] = SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0 }; -static const u32 golden_settings_sdma_vg10[] = -{ +static const u32 golden_settings_sdma_vg10[] = { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002, SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, @@ -87,16 +85,17 @@ static const u32 golden_settings_sdma_vg10[] = static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) { u32 base = 0; + switch (instance) { - case 0: - base = SDMA0_BASE.instance[0].segment[0]; - break; - case 1: - base = SDMA1_BASE.instance[0].segment[0]; - break; - default: - BUG(); - break; + case 0: + base = SDMA0_BASE.instance[0].segment[0]; + break; + case 1: + base = SDMA1_BASE.instance[0].segment[0]; + break; + default: + BUG(); + break; } return base + internal_offset; @@ -159,7 +158,8 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA10: chip_name = "vega10"; break; - default: BUG(); + default: + BUG(); } for (i = 0; i < adev->sdma.num_instances; i++) { @@ -179,7 +179,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) if (adev->sdma.instance[i].feature_version >= 20) adev->sdma.instance[i].burst_nop = true; DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP? "true": "false"); + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; @@ -192,9 +192,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) } out: if (err) { - printk(KERN_ERR - "sdma_v4_0: Failed to load firmware \"%s\"\n", - fw_name); + DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); adev->sdma.instance[i].fw = NULL; @@ -212,10 +210,10 @@ out: */ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) { - u64* rptr; + u64 *rptr; /* XXX check if swapping is necessary on BE */ - rptr =((u64*)&ring->adev->wb.wb[ring->rptr_offs]); + rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); return ((*rptr) >> 2); @@ -231,19 +229,20 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64* wptr = NULL; - uint64_t local_wptr=0; + u64 *wptr = NULL; + uint64_t local_wptr = 0; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64*)&adev->wb.wb[ring->wptr_offs]); + wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); *wptr = (*wptr) >> 2; DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); } else { u32 lowbit, highbit; int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - wptr=&local_wptr; + + wptr = &local_wptr; lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2; highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; @@ -285,12 +284,13 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; + DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x \n", - me, + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", me, lower_32_bits(ring->wptr << 2), + me, upper_32_bits(ring->wptr << 2)); WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); @@ -319,22 +319,22 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VEGA10). */ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { - u32 vmid = vm_id & 0xf; + u32 vmid = vm_id & 0xf; - /* IB packet must end on a 8 DW boundary */ - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + /* IB packet must end on a 8 DW boundary */ + sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); - /* base must be 32 byte aligned */ - amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, ib->length_dw); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | + SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + /* base must be 32 byte aligned */ + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); } @@ -523,7 +523,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) u32 doorbell; u32 doorbell_offset; u32 temp; - int i,r; + int i, r; for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; @@ -572,7 +572,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL)); doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET)); - if (ring->use_doorbell){ + if (ring->use_doorbell) { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); @@ -694,9 +694,7 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) for (j = 0; j < fw_size; j++) - { WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); - } WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); } @@ -744,10 +742,8 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) if (r) return r; r = sdma_v4_0_rlc_resume(adev); - if (r) - return r; - return 0; + return r; } /** @@ -797,9 +793,8 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { + if (tmp == 0xDEADBEEF) break; - } DRM_UDELAY(1); } @@ -864,29 +859,29 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) if (r) goto err1; - r = dma_fence_wait_timeout(f, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); - r = -ETIMEDOUT; - goto err1; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - goto err1; - } - tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); - r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); - r = -EINVAL; - } + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out\n"); + r = -ETIMEDOUT; + goto err1; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err1; + } + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } err1: - amdgpu_ib_free(adev, &ib, NULL); - dma_fence_put(f); + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); - return r; + amdgpu_wb_free(adev, index); + return r; } @@ -1039,44 +1034,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; - unsigned i; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - /* flush TLB */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); /* reference */ - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); - } + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 1 << vm_id); /* reference */ + amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } static int sdma_v4_0_early_init(void *handle) @@ -1162,8 +1153,6 @@ static int sdma_v4_0_hw_init(void *handle) sdma_v4_0_init_golden_registers(adev); r = sdma_v4_0_start(adev); - if (r) - return r; return r; } @@ -1199,10 +1188,12 @@ static bool sdma_v4_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 i; + for (i = 0; i < adev->sdma.num_instances; i++) { u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG)); + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - return false; + return false; } return true; @@ -1211,8 +1202,9 @@ static bool sdma_v4_0_is_idle(void *handle) static int sdma_v4_0_wait_for_idle(void *handle) { unsigned i; - u32 sdma0,sdma1; + u32 sdma0, sdma1; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + for (i = 0; i < adev->usec_timeout; i++) { sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG)); sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG)); @@ -1240,7 +1232,7 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) : - sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); + sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); sdma_cntl = RREG32(reg_offset); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, @@ -1332,7 +1324,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating( SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); - if(def != data) + if (def != data) WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); } } else { @@ -1382,17 +1374,17 @@ static void sdma_v4_0_update_medium_grain_light_sleep( /* 1-not override: enable sdma1 mem light sleep */ if (adev->asic_type == CHIP_VEGA10) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); - data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); + data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); } } else { /* 0-override:disable sdma0 mem light sleep */ def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); /* 0-override:disable sdma1 mem light sleep */ if (adev->asic_type == CHIP_VEGA10) { @@ -1473,6 +1465,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, @@ -1480,7 +1473,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { 6 + /* sdma_v4_0_ring_emit_hdp_flush */ 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - 36 + /* sdma_v4_0_ring_emit_vm_flush */ + 18 + /* sdma_v4_0_ring_emit_vm_flush */ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ .emit_ib = sdma_v4_0_ring_emit_ib, @@ -1606,8 +1599,7 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) } } -const struct amdgpu_ip_block_version sdma_v4_0_ip_block = -{ +const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 385de8617075..6b55d451ae7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -25,7 +25,7 @@ #include <linux/module.h> #include "drmP.h" #include "amdgpu.h" -#include "amdgpu_atombios.h" +#include "amdgpu_atomfirmware.h" #include "amdgpu_ih.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -405,11 +405,11 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) static int soc15_asic_reset(struct amdgpu_device *adev) { - amdgpu_atombios_scratch_regs_engine_hung(adev, true); + amdgpu_atomfirmware_scratch_regs_engine_hung(adev, true); soc15_gpu_pci_config_reset(adev); - amdgpu_atombios_scratch_regs_engine_hung(adev, false); + amdgpu_atomfirmware_scratch_regs_engine_hung(adev, false); return 0; } @@ -505,8 +505,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); + amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 2b96c806baa1..e8df6d820dbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -45,13 +45,31 @@ struct nbio_pcie_index_data { u32 index_offset; u32 data_offset; }; -// Register Access Macro + +/* Register Access Macros */ #define SOC15_REG_OFFSET(ip, inst, reg) (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ (ip##_BASE__INST##inst##_SEG4 + reg))))) +#define WREG32_FIELD15(ip, idx, reg, field, val) \ + WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + +#define RREG32_SOC15(ip, inst, reg) \ + RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg)))))) + +#define WREG32_SOC15(ip, inst, reg, value) \ + WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg))))), value) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 9bcf01469282..eca8f6e01e97 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -27,10 +27,14 @@ #include "amdgpu_uvd.h" #include "soc15d.h" #include "soc15_common.h" +#include "mmsch_v1_0.h" #include "vega10/soc15ip.h" #include "vega10/UVD/uvd_7_0_offset.h" #include "vega10/UVD/uvd_7_0_sh_mask.h" +#include "vega10/VCE/vce_4_0_offset.h" +#include "vega10/VCE/vce_4_0_default.h" +#include "vega10/VCE/vce_4_0_sh_mask.h" #include "vega10/NBIF/nbif_6_1_offset.h" #include "vega10/HDP/hdp_4_0_offset.h" #include "vega10/MMHUB/mmhub_1_0_offset.h" @@ -41,6 +45,7 @@ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v7_0_start(struct amdgpu_device *adev); static void uvd_v7_0_stop(struct amdgpu_device *adev); +static int uvd_v7_0_sriov_start(struct amdgpu_device *adev); /** * uvd_v7_0_ring_get_rptr - get read pointer @@ -98,6 +103,9 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + if (ring == &adev->uvd.ring_enc[0]) return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR)); else @@ -129,6 +137,13 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + return; + } + if (ring == &adev->uvd.ring_enc[0]) WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), lower_32_bits(ring->wptr)); @@ -353,7 +368,10 @@ static int uvd_v7_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->uvd.num_enc_rings = 2; + if (amdgpu_sriov_vf(adev)) + adev->uvd.num_enc_rings = 1; + else + adev->uvd.num_enc_rings = 2; uvd_v7_0_set_ring_funcs(adev); uvd_v7_0_set_enc_ring_funcs(adev); uvd_v7_0_set_irq_funcs(adev); @@ -406,21 +424,31 @@ static int uvd_v7_0_sw_init(void *handle) r = amdgpu_uvd_resume(adev); if (r) return r; + if (!amdgpu_sriov_vf(adev)) { + ring = &adev->uvd.ring; + sprintf(ring->name, "uvd"); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + if (r) + return r; + } - ring = &adev->uvd.ring; - sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); - if (r) - return r; for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.ring_enc[i]; sprintf(ring->name, "uvd_enc%d", i); + if (amdgpu_sriov_vf(adev)) { + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + } r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); if (r) return r; } + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + return r; } @@ -429,6 +457,8 @@ static int uvd_v7_0_sw_fini(void *handle) int i, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_virt_free_mm_table(adev); + r = amdgpu_uvd_suspend(adev); if (r) return r; @@ -455,48 +485,53 @@ static int uvd_v7_0_hw_init(void *handle) uint32_t tmp; int i, r; - r = uvd_v7_0_start(adev); + if (amdgpu_sriov_vf(adev)) + r = uvd_v7_0_sriov_start(adev); + else + r = uvd_v7_0_start(adev); if (r) goto done; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } + if (!amdgpu_sriov_vf(adev)) { + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } - r = amdgpu_ring_alloc(ring, 10); - if (r) { - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); - goto done; - } + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); + goto done; + } - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); - /* Clear timeout status bits */ - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_TIMEOUT_STATUS), 0)); - amdgpu_ring_write(ring, 0x8); + /* Clear timeout status bits */ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_TIMEOUT_STATUS), 0)); + amdgpu_ring_write(ring, 0x8); - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_CNTL), 0)); - amdgpu_ring_write(ring, 3); + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_CNTL), 0)); + amdgpu_ring_write(ring, 3); - amdgpu_ring_commit(ring); + amdgpu_ring_commit(ring); + } for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.ring_enc[i]; @@ -618,6 +653,241 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); } +static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, + struct amdgpu_mm_table *table) +{ + uint32_t data = 0, loop; + uint64_t addr = table->gpu_addr; + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; + uint32_t size; + + size = header->header_size + header->vce_table_size + header->uvd_table_size; + + /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); + + /* 2, update vmid of descriptor */ + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); + data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; + data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); + + /* 3, notify mmsch about the size of this descriptor */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); + + /* 4, set resp to zero */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); + + /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); + loop = 1000; + while ((data & 0x10000002) != 0x10000002) { + udelay(10); + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); + loop--; + if (!loop) + break; + } + + if (!loop) { + dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); + return -EBUSY; + } + + return 0; +} + +static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t offset, size, tmp; + uint32_t table_size = 0; + struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} }; + struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} }; + struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} }; + struct mmsch_v1_0_cmd_end end = { {0} }; + uint32_t *init_table = adev->virt.mm_table.cpu_addr; + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; + + direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; + end.cmd_header.command_type = MMSCH_COMMAND__END; + + if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) { + header->version = MMSCH_VERSION; + header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; + + if (header->vce_table_offset == 0 && header->vce_table_size == 0) + header->uvd_table_offset = header->header_size; + else + header->uvd_table_offset = header->vce_table_size + header->vce_table_offset; + + init_table += header->uvd_table_offset; + + ring = &adev->uvd.ring; + size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); + + /* disable clock gating */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK, 0); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), + 0xFFFFFFFF, 0x00000004); + /* mc resume*/ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + offset = 0; + } else { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.gpu_addr)); + offset = size; + } + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), + adev->gfx.config.gb_addr_config); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), + adev->gfx.config.gb_addr_config); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), + adev->gfx.config.gb_addr_config); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); + /* mc resume end*/ + + /* disable clock gating */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), + ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); + + /* disable interupt */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), + ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); + + /* stall UMC and register bus before resetting VCPU */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* put LMI, VCPU, RBC etc... into reset */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); + + /* initialize UVD memory controller */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), + (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L)); + + /* disable byte swapping */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), 0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), 0); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); + + /* take all subblocks out of reset, except VCPU */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + /* enable VCPU clock */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK); + + /* enable UMC */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); + + /* boot up the VCPU */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); + + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); + + /* enable master interrupt */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); + + /* clear the bit 4 of UVD_STATUS */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); + + /* force RBC into idle state */ + size = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); + + /* set the write pointer delay */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); + + /* set the wb address */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + lower_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + upper_32_bits(ring->gpu_addr)); + + ring->wptr = 0; + ring = &adev->uvd.ring_enc[0]; + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); + + /* add end packet */ + memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); + table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; + header->uvd_table_size = table_size; + + return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table); + } + return -EINVAL; /* already initializaed ? */ +} + /** * uvd_v7_0_start - start UVD block * @@ -1034,42 +1304,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); uint32_t data0, data1, mask; - unsigned eng = ring->idx; - unsigned i; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; - data1 = upper_32_bits(pd_addr); - uvd_v7_0_vm_reg_write(ring, data0, data1); - - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; - data1 = lower_32_bits(pd_addr); - uvd_v7_0_vm_reg_write(ring, data0, data1); - - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; - data1 = lower_32_bits(pd_addr); - mask = 0xffffffff; - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); - - /* flush TLB */ - data0 = (hub->vm_inv_eng0_req + eng) << 2; - data1 = req; - uvd_v7_0_vm_reg_write(ring, data0, data1); - - /* wait for flush */ - data0 = (hub->vm_inv_eng0_ack + eng) << 2; - data1 = 1 << vm_id; - mask = 1 << vm_id; - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); - } + data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data1 = upper_32_bits(pd_addr); + uvd_v7_0_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + uvd_v7_0_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); + + /* flush TLB */ + data0 = (hub->vm_inv_eng0_req + eng) << 2; + data1 = req; + uvd_v7_0_vm_reg_write(ring, data0, data1); + + /* wait for flush */ + data0 = (hub->vm_inv_eng0_ack + eng) << 2; + data1 = 1 << vm_id; + mask = 1 << vm_id; + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); } static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) @@ -1080,44 +1346,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; - unsigned i; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - /* flush TLB */ - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); - } + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); } #if 0 @@ -1240,7 +1499,8 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, amdgpu_fence_process(&adev->uvd.ring_enc[0]); break; case 120: - amdgpu_fence_process(&adev->uvd.ring_enc[1]); + if (!amdgpu_sriov_vf(adev)) + amdgpu_fence_process(&adev->uvd.ring_enc[1]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -1448,13 +1708,14 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .align_mask = 0xf, .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, .emit_frame_size = 2 + /* uvd_v7_0_ring_emit_hdp_flush */ 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */ - 34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */ + 34 + /* uvd_v7_0_ring_emit_vm_flush */ 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */ .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */ .emit_ib = uvd_v7_0_ring_emit_ib, @@ -1475,11 +1736,12 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, .emit_frame_size = - 17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */ + 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */ 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */ 1, /* uvd_v7_0_enc_ring_insert_end */ .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index edde5fe938d6..139f964196b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -49,63 +49,6 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev); static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt, - uint32_t *init_table, - uint32_t reg_offset, - uint32_t value) -{ - direct_wt->cmd_header.reg_offset = reg_offset; - direct_wt->reg_value = value; - memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write)); -} - -static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt, - uint32_t *init_table, - uint32_t reg_offset, - uint32_t mask, uint32_t data) -{ - direct_rd_mod_wt->cmd_header.reg_offset = reg_offset; - direct_rd_mod_wt->mask_value = mask; - direct_rd_mod_wt->write_data = data; - memcpy((void *)init_table, direct_rd_mod_wt, - sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)); -} - -static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll, - uint32_t *init_table, - uint32_t reg_offset, - uint32_t mask, uint32_t wait) -{ - direct_poll->cmd_header.reg_offset = reg_offset; - direct_poll->mask_value = mask; - direct_poll->wait_value = wait; - memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling)); -} - -#define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ - mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \ - init_table, (reg), \ - (mask), (data)); \ - init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ - table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ -} - -#define INSERT_DIRECT_WT(reg, value) { \ - mmsch_insert_direct_wt(&direct_wt, \ - init_table, (reg), \ - (value)); \ - init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ - table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ -} - -#define INSERT_DIRECT_POLL(reg, mask, wait) { \ - mmsch_insert_direct_poll(&direct_poll, \ - init_table, (reg), \ - (mask), (wait)); \ - init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ - table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ -} - /** * vce_v4_0_ring_get_rptr - get read pointer * @@ -280,60 +223,73 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) init_table += header->vce_table_offset; ring = &adev->vce.ring[0]; - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr)); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), + lower_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), + upper_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), + ring->ring_size / 4); /* BEGING OF MC_RESUME */ - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); - - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); - - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + } else { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), + adev->vce.gpu_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), + adev->vce.gpu_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), + adev->vce.gpu_addr >> 8); + } offset = AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V4_0_FW_SIZE; - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), + offset & 0x7FFFFFFF); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); offset += size; size = VCE_V4_0_STACK_SIZE; - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), + offset & 0x7FFFFFFF); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); offset += size; size = VCE_V4_0_DATA_SIZE; - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), + offset & 0x7FFFFFFF); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), - 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), + 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); /* end of MC_RESUME */ - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), - ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), - ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), + VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), + ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); - INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), - VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, - VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), + VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, + VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); /* clear BUSY flag */ - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), - ~VCE_STATUS__JOB_BUSY_MASK, 0); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), + ~VCE_STATUS__JOB_BUSY_MASK, 0); /* add end packet */ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); @@ -494,20 +450,9 @@ static int vce_v4_0_sw_init(void *handle) return r; } - if (amdgpu_sriov_vf(adev)) { - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->virt.mm_table.bo, - &adev->virt.mm_table.gpu_addr, - (void *)&adev->virt.mm_table.cpu_addr); - if (!r) { - memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); - printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n", - adev->virt.mm_table.gpu_addr, - adev->virt.mm_table.cpu_addr); - } + r = amdgpu_virt_alloc_mm_table(adev); + if (r) return r; - } return r; } @@ -518,10 +463,7 @@ static int vce_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* free MM table */ - if (amdgpu_sriov_vf(adev)) - amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, - &adev->virt.mm_table.gpu_addr, - (void *)&adev->virt.mm_table.cpu_addr); + amdgpu_virt_free_mm_table(adev); r = amdgpu_vce_suspend(adev); if (r) @@ -973,44 +915,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; - unsigned i; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - /* flush TLB */ - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); - } + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); } static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, @@ -1078,12 +1013,13 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, .parse_cs = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = - 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */ + 17 + /* vce_v4_0_emit_vm_flush */ 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1, /* vce_v4_0_ring_insert_end */ .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ |