From ba2472eaf7255dfba27cea0b674ffcc0ee348293 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 29 Jun 2021 13:44:13 +0200 Subject: drm/amdgpu: return early for non-TTM_PL_TT type BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return early for non-TTM_PL_TT BOs so that we don't pass wrong pointer to amdgpu_gtt_mgr_has_gart_addr() which assumes ttm_resource argument to be TTM_PL_TT type BO's. v3: remove extra braces. v2: merge if-conditions. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210629114413.3371-1-nirmoy.das@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 80dff29f2bc7..ac35aebdf9f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -924,7 +924,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; - if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { + if (bo_mem->mem_type != TTM_PL_TT || + !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { gtt->offset = AMDGPU_BO_INVALID_OFFSET; return 0; } -- cgit v1.2.3 From 82c850c12fc250bdba25e7e66f54adab2ffcfcd6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 27 Jun 2021 17:40:12 -0700 Subject: : correct a function name in kernel-doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix kernel-doc function name warning: ../include/linux/dma-resv.h:227: warning: expecting prototype for dma_resv_exclusive(). Prototype was for dma_resv_excl_fence() instead Fixes: 6edbd6abb783d ("dma-buf: rename and cleanup dma_resv_get_excl v3") Signed-off-by: Randy Dunlap Cc: Sumit Semwal Cc: Christian König Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210628004012.6792-1-rdunlap@infradead.org --- include/linux/dma-resv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 562b885cf9c3..e1ca2080a1ff 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -212,7 +212,7 @@ static inline void dma_resv_unlock(struct dma_resv *obj) } /** - * dma_resv_exclusive - return the object's exclusive fence + * dma_resv_excl_fence - return the object's exclusive fence * @obj: the reservation object * * Returns the exclusive fence (if any). Caller must either hold the objects -- cgit v1.2.3 From cd8f318fbd266b127ffc93cc4c1eaf9a5196fafb Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Tue, 29 Jun 2021 19:59:56 +0800 Subject: drm/gma500: Add the missed drm_gem_object_put() in psb_user_framebuffer_create() psb_user_framebuffer_create() misses to call drm_gem_object_put() in an error path. Add the missed function call to fix it. Signed-off-by: Jing Xiangfeng Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210629115956.15160-1-jingxiangfeng@huawei.com --- drivers/gpu/drm/gma500/framebuffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index ebe9dccf2d83..0b8648396fb2 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -352,6 +352,7 @@ static struct drm_framebuffer *psb_user_framebuffer_create const struct drm_mode_fb_cmd2 *cmd) { struct drm_gem_object *obj; + struct drm_framebuffer *fb; /* * Find the GEM object and thus the gtt range object that is @@ -362,7 +363,11 @@ static struct drm_framebuffer *psb_user_framebuffer_create return ERR_PTR(-ENOENT); /* Let the core code do all the work */ - return psb_framebuffer_create(dev, cmd, obj); + fb = psb_framebuffer_create(dev, cmd, obj); + if (IS_ERR(fb)) + drm_gem_object_put(obj); + + return fb; } static int psbfb_probe(struct drm_fb_helper *fb_helper, -- cgit v1.2.3 From a51482458dafb836dbf7c8ae3200ca8db7348201 Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Mon, 14 Jun 2021 14:54:14 -0400 Subject: drm/amd/display: Enabling eDP no power sequencing with DAL feature mask [Why] Sometimes, DP receiver chip power-controlled externally by an Embedded Controller could be treated and used as eDP, if it drives mobile display. In this case, we shouldn't be doing power-sequencing, hence we can skip waiting for T7-ready and T9-ready." [How] Added a feature mask to enable eDP no power sequencing feature. To enable this, set 0x10 flag in amdgpu.dcfeaturemask on Linux command line. Signed-off-by: Zhan Liu Reviewed-by: Nikola Cornij Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../amd/display/dc/dce110/dce110_hw_sequencer.c | 31 +++++++++++++++++++--- drivers/gpu/drm/amd/include/amd_shared.h | 10 ++++--- 5 files changed, 38 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6f30c525caac..975be7d78d6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -160,6 +160,7 @@ int amdgpu_smu_pptable_id = -1; * highest. That helps saving some idle power. * DISABLE_FRACTIONAL_PWM (bit 2) disabled by default * PSR (bit 3) disabled by default + * EDP NO POWER SEQUENCING (bit 4) disabled by default */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b5b5ccf0ed71..2688a2e759de 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1160,6 +1160,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_feature_mask & DC_DISABLE_FRACTIONAL_PWM_MASK) init_data.flags.disable_fractional_pwm = true; + if (amdgpu_dc_feature_mask & DC_EDP_NO_POWER_SEQUENCING) + init_data.flags.edp_no_power_sequencing = true; + init_data.flags.power_down_display_on_boot = true; INIT_LIST_HEAD(&adev->dm.da_list); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5101a4f8f69f..45640f1c26c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -297,6 +297,7 @@ struct dc_config { bool allow_seamless_boot_optimization; bool power_down_display_on_boot; bool edp_not_connected; + bool edp_no_power_sequencing; bool force_enum_edp; bool forced_clocks; bool allow_lttpr_non_transparent_mode; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 2938caaa2299..62d595ded866 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1022,8 +1022,20 @@ void dce110_edp_backlight_control( /* dc_service_sleep_in_milliseconds(50); */ /*edp 1.2*/ panel_instance = link->panel_cntl->inst; - if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) - edp_receiver_ready_T7(link); + + if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) { + if (!link->dc->config.edp_no_power_sequencing) + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T7-ready. + */ + edp_receiver_ready_T7(link); + else + DC_LOG_DC("edp_receiver_ready_T7 skipped\n"); + } if (ctx->dc->ctx->dmub_srv && ctx->dc->debug.dmub_command_table) { @@ -1048,8 +1060,19 @@ void dce110_edp_backlight_control( dc_link_backlight_enable_aux(link, enable); /*edp 1.2*/ - if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF) - edp_add_delay_for_T9(link); + if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF) { + if (!link->dc->config.edp_no_power_sequencing) + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T9-ready. + */ + edp_add_delay_for_T9(link); + else + DC_LOG_DC("edp_receiver_ready_T9 skipped\n"); + } if (!enable && link->dpcd_sink_ext_caps.bits.oled) msleep(OLED_PRE_T11_DELAY); diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 332b0df53e52..ff1d3d4a6488 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -223,10 +223,12 @@ enum amd_harvest_ip_mask { }; enum DC_FEATURE_MASK { - DC_FBC_MASK = 0x1, - DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2, - DC_DISABLE_FRACTIONAL_PWM_MASK = 0x4, - DC_PSR_MASK = 0x8, + //Default value can be found at "uint amdgpu_dc_feature_mask" + DC_FBC_MASK = (1 << 0), //0x1, disabled by default + DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default + DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default + DC_PSR_MASK = (1 << 3), //0x8, disabled by default + DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default }; enum DC_DEBUG_MASK { -- cgit v1.2.3 From dafff0476d6554a5b84f0d48b99368333de58d5b Mon Sep 17 00:00:00 2001 From: Chengzhe Liu Date: Fri, 18 Jun 2021 17:02:55 +0800 Subject: drm/amdgpu: Power down VCN and JPEG before disabling SMU features When unloading driver, if VCN is powered on, sending message DisableAllSmuFeatures to SMU will cause SMU hang. We need to power down VCN and JPEG before clean up SMU. Signed-off-by: Chengzhe Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index cb375f1beebd..ebe672142808 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1453,10 +1453,14 @@ static int smu_hw_fini(void *handle) if (smu->is_apu) { smu_powergate_sdma(&adev->smu, true); - smu_dpm_set_vcn_enable(smu, false); - smu_dpm_set_jpeg_enable(smu, false); } + smu_dpm_set_vcn_enable(smu, false); + smu_dpm_set_jpeg_enable(smu, false); + + adev->vcn.cur_state = AMD_PG_STATE_GATE; + adev->jpeg.cur_state = AMD_PG_STATE_GATE; + if (!smu->pm_enabled) return 0; -- cgit v1.2.3 From 0dbc2c81a1ab7dadfc534d89d79d4e8c394123af Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 25 May 2021 11:15:55 +0800 Subject: drm/amdgpu: correct tcp harvest setting Add missing settings for SQC bits. And correct some confusing logics around active wgp bitmap calculation. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 97 ++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2d56b60bc058..ef08f2986475 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5086,47 +5086,44 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 4 + /* RMI */ 1); /* SQG */ - if (adev->asic_type == CHIP_NAVI10 || - adev->asic_type == CHIP_NAVI14 || - adev->asic_type == CHIP_NAVI12) { - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); - wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); - /* - * Set corresponding TCP bits for the inactive WGPs in - * GCRD_SA_TARGETS_DISABLE - */ - gcrd_targets_disable_tcp = 0; - /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ - utcl_invreq_disable = 0; - - for (k = 0; k < max_wgp_per_sh; k++) { - if (!(wgp_active_bitmap & (1 << k))) { - gcrd_targets_disable_tcp |= 3 << (2 * k); - utcl_invreq_disable |= (3 << (2 * k)) | - (3 << (2 * (max_wgp_per_sh + k))); - } + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); + /* + * Set corresponding TCP bits for the inactive WGPs in + * GCRD_SA_TARGETS_DISABLE + */ + gcrd_targets_disable_tcp = 0; + /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ + utcl_invreq_disable = 0; + + for (k = 0; k < max_wgp_per_sh; k++) { + if (!(wgp_active_bitmap & (1 << k))) { + gcrd_targets_disable_tcp |= 3 << (2 * k); + gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2)); + utcl_invreq_disable |= (3 << (2 * k)) | + (3 << (2 * (max_wgp_per_sh + k))); } - - tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); - /* only override TCP & SQC bits */ - tmp &= 0xffffffff << (4 * max_wgp_per_sh); - tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); - WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); - - tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); - /* only override TCP bits */ - tmp &= 0xffffffff << (2 * max_wgp_per_sh); - tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); - WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); } - } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); + tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); + /* only override TCP & SQC bits */ + tmp &= (0xffffffffU << (4 * max_wgp_per_sh)); + tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); + WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); + /* only override TCP & SQC bits */ + tmp &= (0xffffffffU << (3 * max_wgp_per_sh)); + tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); + WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); + } } + + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); } static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@ -7404,7 +7401,10 @@ static int gfx_v10_0_hw_init(void *handle) * init golden registers and rlc resume may override some registers, * reconfig them here */ - gfx_v10_0_tcp_harvest(adev); + if (adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12) + gfx_v10_0_tcp_harvest(adev); r = gfx_v10_0_cp_resume(adev); if (r) @@ -9324,17 +9324,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device * static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) { - u32 data, wgp_bitmask; - data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + u32 disabled_mask = + ~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + u32 efuse_setting = 0; + u32 vbios_setting = 0; + + efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); + efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; - data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; - data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; - wgp_bitmask = - amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + disabled_mask |= efuse_setting | vbios_setting; - return (~data) & wgp_bitmask; + return (~disabled_mask); } static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) -- cgit v1.2.3 From 9c26ddb1c5b6e30c6bca48b8ad9205d96efe93d0 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 25 May 2021 11:43:38 +0800 Subject: drm/amdgpu: fix Navi1x tcp power gating hang when issuing lightweight invalidaiton Fix TCP hang when a lightweight invalidation happens on Navi1x. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 95 ++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ef08f2986475..d08a823827a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7961,6 +7961,97 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, } } +static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + uint32_t reg_idx = 0; + uint32_t i; + + const uint32_t tcp_ctrl_regs[] = { + mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG + }; + + const uint32_t tcp_ctrl_regs_nv12[] = { + mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG, + }; + + const uint32_t sm_ctlr_regs[] = { + mmCGTS_SA0_QUAD0_SM_CTRL_REG, + mmCGTS_SA0_QUAD1_SM_CTRL_REG, + mmCGTS_SA1_QUAD0_SM_CTRL_REG, + mmCGTS_SA1_QUAD1_SM_CTRL_REG + }; + + if (adev->asic_type == CHIP_NAVI12) { + for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + + tcp_ctrl_regs_nv12[i]; + reg_data = RREG32(reg_idx); + reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK; + WREG32(reg_idx, reg_data); + } + } else { + for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + + tcp_ctrl_regs[i]; + reg_data = RREG32(reg_idx); + reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK; + WREG32(reg_idx, reg_data); + } + } + + for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] + + sm_ctlr_regs[i]; + reg_data = RREG32(reg_idx); + reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK; + reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT; + WREG32(reg_idx, reg_data); + } +} + static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -7977,6 +8068,10 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v10_0_update_3d_clock_gating(adev, enable); /* === CGCG + CGLS === */ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); + + if ((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12)) + gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS * === CGCG + CGLS === -- cgit v1.2.3 From 5a5da8ae9546031e43efd4fa5aa8baa481e83dfb Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 25 May 2021 12:08:53 +0800 Subject: drm/amdgpu: fix NAK-G generation during PCI-e link width switch A lot of NAK-G being generated when link widht switching is happening. WA for this issue is to program the SPC to 4 symbols per clock during bootup when the native PCIE width is x4. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 1 + drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 28 ++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/nv.c | 3 +++ 3 files changed, 32 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 25ee53545837..43d074bb00a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -93,6 +93,7 @@ struct amdgpu_nbio_funcs { void (*enable_aspm)(struct amdgpu_device *adev, bool enable); void (*program_aspm)(struct amdgpu_device *adev); + void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 05ddec7ba7e2..315d57bb373d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -51,6 +51,8 @@ #define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8 #define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -463,6 +465,31 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) WREG32_PCIE(smnPCIE_LC_CNTL3, data); } +static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + uint32_t link_width = 0; + + if (!((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12))) + return; + + reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL); + link_width = (reg_data & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; + + /* + * Program PCIE_LC_CNTL6.LC_SPC_MODE_8GT to 0x2 (4 symbols per clock data) + * if link_width is 0x3 (x4) + */ + if (0x3 == link_width) { + reg_data = RREG32_PCIE(smnPCIE_LC_CNTL6); + reg_data &= ~PCIE_LC_CNTL6__LC_SPC_MODE_8GT_MASK; + reg_data |= (0x2 << PCIE_LC_CNTL6__LC_SPC_MODE_8GT__SHIFT); + WREG32_PCIE(smnPCIE_LC_CNTL6, reg_data); + } +} + const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, @@ -484,4 +511,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, .enable_aspm = nbio_v2_3_enable_aspm, .program_aspm = nbio_v2_3_program_aspm, + .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 455d0425787c..63c96ca8d2a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1411,6 +1411,9 @@ static int nv_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->nbio.funcs->apply_lc_spc_mode_wa) + adev->nbio.funcs->apply_lc_spc_mode_wa(adev); + /* enable pcie gen2/3 link */ nv_pcie_gen3_enable(adev); /* enable aspm */ -- cgit v1.2.3 From adcf949e664a8b04df2fb8aa916892e58561653c Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 25 May 2021 14:36:29 +0800 Subject: drm/amdgpu: fix the hang caused by PCIe link width switch SMU had set all the necessary fields for a link width switch but the width switch wasn't occurring because the link was idle in the L1 state. Setting LC_L1_RECONFIG_EN=0x1 will allow width switches to also be initiated while in L1 instead of waiting until the link is back in L0. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 1 + drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 13 +++++++++++++ drivers/gpu/drm/amd/amdgpu/nv.c | 3 +++ 3 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 43d074bb00a1..45295dce5c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -94,6 +94,7 @@ struct amdgpu_nbio_funcs { bool enable); void (*program_aspm)(struct amdgpu_device *adev); void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev); + void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 315d57bb373d..754b11dea6f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -490,6 +490,18 @@ static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) } } +static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + + if (adev->asic_type != CHIP_NAVI10) + return; + + reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL); + reg_data |= PCIE_LC_LINK_WIDTH_CNTL__LC_L1_RECONFIG_EN_MASK; + WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data); +} + const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, @@ -512,4 +524,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .enable_aspm = nbio_v2_3_enable_aspm, .program_aspm = nbio_v2_3_program_aspm, .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa, + .apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 63c96ca8d2a2..5231b3402990 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1414,6 +1414,9 @@ static int nv_common_hw_init(void *handle) if (adev->nbio.funcs->apply_lc_spc_mode_wa) adev->nbio.funcs->apply_lc_spc_mode_wa(adev); + if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa) + adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev); + /* enable pcie gen2/3 link */ nv_pcie_gen3_enable(adev); /* enable aspm */ -- cgit v1.2.3 From 754e9883d48c2e3a50c4d53c42905e25df3862c7 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 25 May 2021 18:24:47 +0800 Subject: drm/amdgpu: correct clock gating settings on feature unsupported Clock gating setting is still performed even when the corresponding CG feature is not supported. And the tricky part is disablement is actually performed no matter for enablement or disablement request. That seems not logically right. Considering HW should already properly take care of the CG state, we will just skip the corresponding clock gating setting when the feature is not supported. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/athub_v2_0.c | 12 ++++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 69 +++++++++++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 10 ++++- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 10 ++++- drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c | 5 ++- 5 files changed, 83 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index 5b90efd6f6d0..3ac505d954c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -36,9 +36,12 @@ athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + return; + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + if (enable) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; else data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -53,10 +56,13 @@ athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; + if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))) + return; + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && - (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + if (enable) data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; else data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d08a823827a3..c19436570f42 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7777,8 +7777,11 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t data, def; + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) + return; + /* It is disabled by HW by default */ - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + if (enable) { /* 0 - Disable some blocks' MGCG */ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); WREG32_SOC15(GC, 0, mmCGTT_WD_CLK_CTRL, 0xff000000); @@ -7845,22 +7848,34 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS))) + return; + /* Enable 3D CGCG/CGLS */ - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + if (enable) { /* write cmd to clear cgcg/cgls ov */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ - data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + /* enable 3Dcgcg FSM(0x0000363f) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); @@ -7873,9 +7888,14 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, } else { /* Disable CGCG/CGLS */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + /* disable cgcg, cgls should be disabled */ - data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | - RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + /* disable cgcg and cgls in FSM */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); @@ -7887,25 +7907,35 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS))) + return; + + if (enable) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ - data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; - else - data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); @@ -7917,8 +7947,14 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); } else { def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + /* reset CGCG/CGLS bits */ - data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + /* disable cgcg and cgls in FSM */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); @@ -7930,7 +7966,10 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) { + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) + return; + + if (enable) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset FGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index f7e93bbc4e15..7ded6b2f058e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -568,6 +568,9 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad { uint32_t def, data, def1, data1; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + return; + switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: @@ -582,7 +585,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad break; } - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + if (enable) { data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | @@ -627,6 +630,9 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + return; + switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: @@ -639,7 +645,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade break; } - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + if (enable) data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; else data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 754b11dea6f0..7b79eeaa88aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -220,8 +220,11 @@ static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + def = data = RREG32_PCIE(smnCPM_CONTROL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + if (enable) { data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | @@ -246,8 +249,11 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + def = data = RREG32_PCIE(smnPCIE_CNTL2); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + if (enable) { data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c index e9c474c217ec..b6f1322f908c 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -43,9 +43,12 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool if (adev->flags & AMD_IS_APU) return; + if (!(adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + return; + def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + if (enable) data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); else -- cgit v1.2.3 From 3e7fbfb40fd83a18d5e29fd35ea59dfbdcce6327 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 25 May 2021 18:34:25 +0800 Subject: drm/amdgpu: update GFX MGCG settings Update GFX MGCG related settings. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c19436570f42..f5e9c022960b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7777,11 +7777,11 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t data, def; - if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) return; /* It is disabled by HW by default */ - if (enable) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 0 - Disable some blocks' MGCG */ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); WREG32_SOC15(GC, 0, mmCGTT_WD_CLK_CTRL, 0xff000000); @@ -7794,6 +7794,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); if (def != data) @@ -7816,13 +7817,15 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } - } else { + } else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | - RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); -- cgit v1.2.3 From ff4b601a0541ad539947a135205b8125880ac3b4 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 25 May 2021 18:35:36 +0800 Subject: drm/amdgpu: update HDP LS settings Avoid unnecessary register programming on feature disablement. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 85 ++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 7a15e669b68d..5793977953cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -90,45 +90,56 @@ static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev, RC_MEM_POWER_SD_EN, 0); WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); - /* only one clock gating mode (LS/DS/SD) can be enabled */ - if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_LS_EN, enable); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_LS_EN, enable); - } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_DS_EN, enable); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, enable); - } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_SD_EN, enable); - /* RC should not use shut down mode, fallback to ds */ - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, enable); - } - - /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to - * be set for SRAM LS/DS/SD */ - if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | - AMD_CG_SUPPORT_HDP_SD)) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_CTRL_EN, 1); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - RC_MEM_POWER_CTRL_EN, 1); + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, 1); + /* RC should not use shut down mode, fallback to ds or ls if allowed */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } + + /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to + * be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } } - WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); - - /* restore IPH & RC clock override after clock/power mode changing */ - WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1); + /* disable IPH & RC clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + IPH_MEM_CLK_SOFT_OVERRIDE, 0); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); } static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, -- cgit v1.2.3 From 75ae84c89b136a5c0193ab7064b03cddfcebba39 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 16 Jun 2021 10:02:18 -0400 Subject: drm/amdkfd: add helper function for kfd sysfs create No functionality change. Modify kfd_sysfs_create_file to use kobject as parameter, so it becomes common helper function to remove duplicate code and will simplify new kfd sysfs file create in future. Move pr_warn to helper function if sysfs file create failed. Set helper function as void return because caller doesn't use the helper function return value. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 119 ++++++++++--------------------- 1 file changed, 39 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 09b98a83f670..3147dc8bb051 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -484,34 +484,31 @@ int kfd_procfs_add_queue(struct queue *q) return 0; } -static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr, +static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr, char *name) { - int ret = 0; + int ret; - if (!p || !attr || !name) - return -EINVAL; + if (!kobj || !attr || !name) + return; attr->name = name; attr->mode = KFD_SYSFS_FILE_MODE; sysfs_attr_init(attr); - ret = sysfs_create_file(p->kobj, attr); - - return ret; + ret = sysfs_create_file(kobj, attr); + if (ret) + pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret); } -static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) +static void kfd_procfs_add_sysfs_stats(struct kfd_process *p) { - int ret = 0; + int ret; int i; char stats_dir_filename[MAX_SYSFS_FILENAME_LEN]; - if (!p) - return -EINVAL; - - if (!p->kobj) - return -EFAULT; + if (!p || !p->kobj) + return; /* * Create sysfs files for each GPU: @@ -521,63 +518,43 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) */ for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; - struct kobject *kobj_stats; snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN, "stats_%u", pdd->dev->id); - kobj_stats = kfd_alloc_struct(kobj_stats); - if (!kobj_stats) - return -ENOMEM; + pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats); + if (!pdd->kobj_stats) + return; - ret = kobject_init_and_add(kobj_stats, - &procfs_stats_type, - p->kobj, - stats_dir_filename); + ret = kobject_init_and_add(pdd->kobj_stats, + &procfs_stats_type, + p->kobj, + stats_dir_filename); if (ret) { pr_warn("Creating KFD proc/stats_%s folder failed", - stats_dir_filename); - kobject_put(kobj_stats); - goto err; + stats_dir_filename); + kobject_put(pdd->kobj_stats); + pdd->kobj_stats = NULL; + return; } - pdd->kobj_stats = kobj_stats; - pdd->attr_evict.name = "evicted_ms"; - pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&pdd->attr_evict); - ret = sysfs_create_file(kobj_stats, &pdd->attr_evict); - if (ret) - pr_warn("Creating eviction stats for gpuid %d failed", - (int)pdd->dev->id); - + kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict, + "evicted_ms"); /* Add sysfs file to report compute unit occupancy */ - if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) { - pdd->attr_cu_occupancy.name = "cu_occupancy"; - pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&pdd->attr_cu_occupancy); - ret = sysfs_create_file(kobj_stats, - &pdd->attr_cu_occupancy); - if (ret) - pr_warn("Creating %s failed for gpuid: %d", - pdd->attr_cu_occupancy.name, - (int)pdd->dev->id); - } + if (pdd->dev->kfd2kgd->get_cu_occupancy) + kfd_sysfs_create_file(pdd->kobj_stats, + &pdd->attr_cu_occupancy, + "cu_occupancy"); } -err: - return ret; } -static int kfd_procfs_add_sysfs_files(struct kfd_process *p) +static void kfd_procfs_add_sysfs_files(struct kfd_process *p) { - int ret = 0; int i; - if (!p) - return -EINVAL; - - if (!p->kobj) - return -EFAULT; + if (!p || !p->kobj) + return; /* * Create sysfs files for each GPU: @@ -589,20 +566,14 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p) snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u", pdd->dev->id); - ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename); - if (ret) - pr_warn("Creating vram usage for gpu id %d failed", - (int)pdd->dev->id); + kfd_sysfs_create_file(p->kobj, &pdd->attr_vram, + pdd->vram_filename); snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u", pdd->dev->id); - ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename); - if (ret) - pr_warn("Creating sdma usage for gpu id %d failed", - (int)pdd->dev->id); + kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma, + pdd->sdma_filename); } - - return ret; } void kfd_procfs_del_queue(struct queue *q) @@ -800,28 +771,16 @@ struct kfd_process *kfd_create_process(struct file *filep) goto out; } - process->attr_pasid.name = "pasid"; - process->attr_pasid.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&process->attr_pasid); - ret = sysfs_create_file(process->kobj, &process->attr_pasid); - if (ret) - pr_warn("Creating pasid for pid %d failed", - (int)process->lead_thread->pid); + kfd_sysfs_create_file(process->kobj, &process->attr_pasid, + "pasid"); process->kobj_queues = kobject_create_and_add("queues", process->kobj); if (!process->kobj_queues) pr_warn("Creating KFD proc/queues folder failed"); - ret = kfd_procfs_add_sysfs_stats(process); - if (ret) - pr_warn("Creating sysfs stats dir for pid %d failed", - (int)process->lead_thread->pid); - - ret = kfd_procfs_add_sysfs_files(process); - if (ret) - pr_warn("Creating sysfs usage file for pid %d failed", - (int)process->lead_thread->pid); + kfd_procfs_add_sysfs_stats(process); + kfd_procfs_add_sysfs_files(process); } out: if (!IS_ERR(process)) -- cgit v1.2.3 From dcdb4d904b4bd3078fe8d4d24b1658560d6078ef Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 21 Jun 2021 18:51:26 -0400 Subject: drm/amdkfd: fix sysfs kobj leak 3 cases of kobj leak, which causes memory leak: kobj_type must have release() method to free memory from release callback. Don't need NULL default_attrs to init kobj. sysfs files created under kobj_status should be removed with kobj_status as parent kobject. Remove queue sysfs files when releasing queue from process MMU notifier release callback. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 ++++++-------- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1 + 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3147dc8bb051..cfc36fceac8a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -451,13 +451,9 @@ static const struct sysfs_ops procfs_stats_ops = { .show = kfd_procfs_stats_show, }; -static struct attribute *procfs_stats_attrs[] = { - NULL -}; - static struct kobj_type procfs_stats_type = { .sysfs_ops = &procfs_stats_ops, - .default_attrs = procfs_stats_attrs, + .release = kfd_procfs_kobj_release, }; int kfd_procfs_add_queue(struct queue *q) @@ -946,9 +942,11 @@ static void kfd_process_wq_release(struct work_struct *work) sysfs_remove_file(p->kobj, &pdd->attr_vram); sysfs_remove_file(p->kobj, &pdd->attr_sdma); - sysfs_remove_file(p->kobj, &pdd->attr_evict); - if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) - sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy); + + sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); + if (pdd->dev->kfd2kgd->get_cu_occupancy) + sysfs_remove_file(pdd->kobj_stats, + &pdd->attr_cu_occupancy); kobject_del(pdd->kobj_stats); kobject_put(pdd->kobj_stats); pdd->kobj_stats = NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 95a6c36cea4c..243dd1efcdbf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -153,6 +153,7 @@ void pqm_uninit(struct process_queue_manager *pqm) if (pqn->q && pqn->q->gws) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); + kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); list_del(&pqn->process_queue_list); kfree(pqn); -- cgit v1.2.3 From 751580b3ff9ac6bf39da8586e132dbebee2409ef Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 16 Jun 2021 09:51:47 -0400 Subject: drm/amdkfd: add sysfs counters for vm fault and migration This is part of SVM profiling API, export sysfs counters for per-process, per-GPU vm retry fault, pages migrated in and out of GPU vram. counters will not be updated in parallel in GPU retry fault handler and migration to vram/ram path, use READ_ONCE to avoid compiler optimization. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 151 +++++++++++++++++++++++++------ 2 files changed, 131 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6dc22fa1e555..3426743ed228 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -730,6 +730,15 @@ struct kfd_process_device { * number of CU's a device has along with number of other competing processes */ struct attribute attr_cu_occupancy; + + /* sysfs counters for GPU retry fault and page migration tracking */ + struct kobject *kobj_counters; + struct attribute attr_faults; + struct attribute attr_page_in; + struct attribute attr_page_out; + uint64_t faults; + uint64_t page_in; + uint64_t page_out; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index cfc36fceac8a..21ec8a18cad2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -416,6 +416,29 @@ static ssize_t kfd_procfs_stats_show(struct kobject *kobj, return 0; } +static ssize_t kfd_sysfs_counters_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kfd_process_device *pdd; + + if (!strcmp(attr->name, "faults")) { + pdd = container_of(attr, struct kfd_process_device, + attr_faults); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults)); + } + if (!strcmp(attr->name, "page_in")) { + pdd = container_of(attr, struct kfd_process_device, + attr_page_in); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in)); + } + if (!strcmp(attr->name, "page_out")) { + pdd = container_of(attr, struct kfd_process_device, + attr_page_out); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out)); + } + return 0; +} + static struct attribute attr_queue_size = { .name = "size", .mode = KFD_SYSFS_FILE_MODE @@ -456,6 +479,15 @@ static struct kobj_type procfs_stats_type = { .release = kfd_procfs_kobj_release, }; +static const struct sysfs_ops sysfs_counters_ops = { + .show = kfd_sysfs_counters_show, +}; + +static struct kobj_type sysfs_counters_type = { + .sysfs_ops = &sysfs_counters_ops, + .release = kfd_procfs_kobj_release, +}; + int kfd_procfs_add_queue(struct queue *q) { struct kfd_process *proc; @@ -544,6 +576,50 @@ static void kfd_procfs_add_sysfs_stats(struct kfd_process *p) } } +static void kfd_procfs_add_sysfs_counters(struct kfd_process *p) +{ + int ret = 0; + int i; + char counters_dir_filename[MAX_SYSFS_FILENAME_LEN]; + + if (!p || !p->kobj) + return; + + /* + * Create sysfs files for each GPU which supports SVM + * - proc//counters_/ + * - proc//counters_/faults + * - proc//counters_/page_in + * - proc//counters_/page_out + */ + for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd = p->pdds[i]; + struct kobject *kobj_counters; + + snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN, + "counters_%u", pdd->dev->id); + kobj_counters = kfd_alloc_struct(kobj_counters); + if (!kobj_counters) + return; + + ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type, + p->kobj, counters_dir_filename); + if (ret) { + pr_warn("Creating KFD proc/%s folder failed", + counters_dir_filename); + kobject_put(kobj_counters); + return; + } + + pdd->kobj_counters = kobj_counters; + kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults, + "faults"); + kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in, + "page_in"); + kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out, + "page_out"); + } +} static void kfd_procfs_add_sysfs_files(struct kfd_process *p) { @@ -777,6 +853,7 @@ struct kfd_process *kfd_create_process(struct file *filep) kfd_procfs_add_sysfs_stats(process); kfd_procfs_add_sysfs_files(process); + kfd_procfs_add_sysfs_counters(process); } out: if (!IS_ERR(process)) @@ -919,44 +996,60 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) p->n_pdds = 0; } -/* No process locking is needed in this function, because the process - * is not findable any more. We must assume that no other thread is - * using it any more, otherwise we couldn't safely free the process - * structure in the end. - */ -static void kfd_process_wq_release(struct work_struct *work) +static void kfd_process_remove_sysfs(struct kfd_process *p) { - struct kfd_process *p = container_of(work, struct kfd_process, - release_work); + struct kfd_process_device *pdd; int i; - /* Remove the procfs files */ - if (p->kobj) { - sysfs_remove_file(p->kobj, &p->attr_pasid); - kobject_del(p->kobj_queues); - kobject_put(p->kobj_queues); - p->kobj_queues = NULL; + if (!p->kobj) + return; - for (i = 0; i < p->n_pdds; i++) { - struct kfd_process_device *pdd = p->pdds[i]; + sysfs_remove_file(p->kobj, &p->attr_pasid); + kobject_del(p->kobj_queues); + kobject_put(p->kobj_queues); + p->kobj_queues = NULL; - sysfs_remove_file(p->kobj, &pdd->attr_vram); - sysfs_remove_file(p->kobj, &pdd->attr_sdma); + for (i = 0; i < p->n_pdds; i++) { + pdd = p->pdds[i]; - sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); - if (pdd->dev->kfd2kgd->get_cu_occupancy) - sysfs_remove_file(pdd->kobj_stats, - &pdd->attr_cu_occupancy); - kobject_del(pdd->kobj_stats); - kobject_put(pdd->kobj_stats); - pdd->kobj_stats = NULL; - } + sysfs_remove_file(p->kobj, &pdd->attr_vram); + sysfs_remove_file(p->kobj, &pdd->attr_sdma); - kobject_del(p->kobj); - kobject_put(p->kobj); - p->kobj = NULL; + sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); + if (pdd->dev->kfd2kgd->get_cu_occupancy) + sysfs_remove_file(pdd->kobj_stats, + &pdd->attr_cu_occupancy); + kobject_del(pdd->kobj_stats); + kobject_put(pdd->kobj_stats); + pdd->kobj_stats = NULL; + } + + for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { + pdd = p->pdds[i]; + + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults); + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in); + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out); + kobject_del(pdd->kobj_counters); + kobject_put(pdd->kobj_counters); + pdd->kobj_counters = NULL; } + kobject_del(p->kobj); + kobject_put(p->kobj); + p->kobj = NULL; +} + +/* No process locking is needed in this function, because the process + * is not findable any more. We must assume that no other thread is + * using it any more, otherwise we couldn't safely free the process + * structure in the end. + */ +static void kfd_process_wq_release(struct work_struct *work) +{ + struct kfd_process *p = container_of(work, struct kfd_process, + release_work); + kfd_process_remove_sysfs(p); kfd_iommu_unbind_process(p); kfd_process_free_outstanding_kfd_bos(p); -- cgit v1.2.3 From d4ebc2007040a0aff01bfe1b194085d3867328fd Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 22 Jun 2021 00:12:32 -0400 Subject: drm/amdkfd: implement counters for vm fault and migration Add helper function to get process device data structure from adev to update counters. Update vm faults, page_in, page_out counters will no be executed in parallel, use WRITE_ONCE to avoid any form of compiler optimizations. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 14 ++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 45 +++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 ++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 2660f03e63a7..d8092a84e0a4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -365,6 +365,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t end) { uint64_t npages = (end - start) >> PAGE_SHIFT; + struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate; dma_addr_t *scratch; @@ -425,6 +426,12 @@ retry: out_free: kvfree(buf); out: + if (!r) { + pdd = svm_range_get_pdd_by_adev(prange, adev); + if (pdd) + WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages); + } + return r; } @@ -581,6 +588,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, uint64_t end) { uint64_t npages = (end - start) >> PAGE_SHIFT; + struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate; dma_addr_t *scratch; @@ -630,6 +638,12 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, out_free: kvfree(buf); out: + if (!r) { + pdd = svm_range_get_pdd_by_adev(prange, adev); + if (pdd) + WRITE_ONCE(pdd->page_out, + pdd->page_out + migrate.cpages); + } return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index dff1011dd7ee..90e2eacb8ba0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -564,6 +564,24 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) return (struct amdgpu_device *)pdd->dev->kgd; } +struct kfd_process_device * +svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev) +{ + struct kfd_process *p; + int32_t gpu_idx, gpuid; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + + r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx); + if (r) { + pr_debug("failed to get device id by adev %p\n", adev); + return NULL; + } + + return kfd_process_device_from_gpuidx(p, gpu_idx); +} + static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) { struct ttm_operation_ctx ctx = { false, false }; @@ -2311,6 +2329,27 @@ static bool svm_range_skip_recover(struct svm_range *prange) return false; } +static void +svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, + struct svm_range *prange, int32_t gpuidx) +{ + struct kfd_process_device *pdd; + + if (gpuidx == MAX_GPU_INSTANCE) + /* fault is on different page of same range + * or fault is skipped to recover later + */ + pdd = svm_range_get_pdd_by_adev(prange, adev); + else + /* fault recovered + * or fault cannot recover because GPU no access on the range + */ + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + + if (pdd) + WRITE_ONCE(pdd->faults, pdd->faults + 1); +} + int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr) @@ -2320,7 +2359,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct svm_range *prange; struct kfd_process *p; uint64_t timestamp; - int32_t best_loc, gpuidx; + int32_t best_loc; + int32_t gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; int r = 0; @@ -2440,6 +2480,9 @@ out_unlock_range: out_unlock_svms: mutex_unlock(&svms->lock); mmap_read_unlock(mm); + + svm_range_count_fault(adev, p, prange, gpuidx); + mmput(mm); out: kfd_unref_process(p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 0c0fc399395e..a9af03994d1a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -174,6 +174,8 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages); void svm_range_free_dma_mappings(struct svm_range *prange); void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); +struct kfd_process_device * +svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); /* SVM API and HMM page migration work together, device memory type * is initialized to not 0 when page migration register device memory. -- cgit v1.2.3 From c8af9390e5c01b83600a700bf287087d0eabb387 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Mon, 21 Jun 2021 13:17:10 +0800 Subject: drm/amdgpu: enable tmz on yellow carp The tmz functions are verified on yellow carp. So enable it by default. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 0174f7817ce2..d0b8d415b63b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -562,6 +562,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { -- cgit v1.2.3 From b3a24461f9fb1579c3335c63d1e039bc5a6eda53 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Sat, 19 Jun 2021 00:10:46 +0530 Subject: amdgpu/nv.c - Added codec query for Beige Goby Added the Beige Goby capabilities in codec query. v2: fix build error and indent (James) Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 5231b3402990..859e761c612c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -64,6 +64,13 @@ #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" +#define codec_info_build(type, width, height, level) \ + .codec_type = type,\ + .max_width = width,\ + .max_height = height,\ + .max_pixels_per_frame = height * width,\ + .max_level = level, + static const struct amd_ip_funcs nv_common_ip_funcs; /* Navi */ @@ -309,6 +316,23 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode = .codec_array = sriov_sc_video_codecs_decode_array, }; +/* Beige Goby*/ +static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs bg_video_codecs_decode = { + .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array), + .codec_array = bg_video_codecs_decode_array, +}; + +static const struct amdgpu_video_codecs bg_video_codecs_encode = { + .codec_count = 0, + .codec_array = NULL, +}; + static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -335,6 +359,12 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, else *codecs = &sc_video_codecs_decode; return 0; + case CHIP_BEIGE_GOBY: + if (encode) + *codecs = &bg_video_codecs_encode; + else + *codecs = &bg_video_codecs_decode; + return 0; case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: -- cgit v1.2.3 From e7d9560aeae51415f6c9bc343feb783a441ff4c5 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 16 Jun 2021 12:21:30 -0400 Subject: Revert "drm/amd/display: Fix overlay validation by considering cursors" This reverts commit 33f409e60eb0c59a4d0d06a62ab4642a988e17f7. The patch that we are reverting here was originally applied because it fixes multiple IGT issues and flickering in Android. However, after a discussion with Sean Paul and Mark, it looks like that this patch might cause problems on ChromeOS. For this reason, we decided to revert this patch. Cc: Nicholas Kazlauskas Cc: Harry Wentland Cc: Hersen Wu Cc: Sean Paul Cc: Mark Yacoub Cc: Greg Kroah-Hartman Signed-off-by: Rodrigo Siqueira Reviewed-by: Sean Paul Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2688a2e759de..cfb2f9e43661 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10120,8 +10120,8 @@ static int validate_overlay(struct drm_atomic_state *state) { int i; struct drm_plane *plane; - struct drm_plane_state *new_plane_state; - struct drm_plane_state *primary_state, *cursor_state, *overlay_state = NULL; + struct drm_plane_state *old_plane_state, *new_plane_state; + struct drm_plane_state *primary_state, *overlay_state = NULL; /* Check if primary plane is contained inside overlay */ for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) { @@ -10151,14 +10151,6 @@ static int validate_overlay(struct drm_atomic_state *state) if (!primary_state->crtc) return 0; - /* check if cursor plane is enabled */ - cursor_state = drm_atomic_get_plane_state(state, overlay_state->crtc->cursor); - if (IS_ERR(cursor_state)) - return PTR_ERR(cursor_state); - - if (drm_atomic_plane_disabling(plane->state, cursor_state)) - return 0; - /* Perform the bounds check to ensure the overlay plane covers the primary */ if (primary_state->crtc_x < overlay_state->crtc_x || primary_state->crtc_y < overlay_state->crtc_y || -- cgit v1.2.3 From ed50995514d319ff27dafe3747f04abab9e87bdf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 29 Jun 2021 16:55:47 -0400 Subject: drm/amdgpu/display: drop unused variable Remove unused variable. Fixes: e7d9560aeae514 ("Revert "drm/amd/display: Fix overlay validation by considering cursors"") Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index cfb2f9e43661..01e1062dc235 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10120,7 +10120,7 @@ static int validate_overlay(struct drm_atomic_state *state) { int i; struct drm_plane *plane; - struct drm_plane_state *old_plane_state, *new_plane_state; + struct drm_plane_state *new_plane_state; struct drm_plane_state *primary_state, *overlay_state = NULL; /* Check if primary plane is contained inside overlay */ -- cgit v1.2.3 From 91161b06be5729050a11e36366d2db8a679f35b6 Mon Sep 17 00:00:00 2001 From: Darren Powell Date: Thu, 24 Jun 2021 22:54:35 -0400 Subject: amdgpu/pm: remove code duplication in show_power_cap calls v3: updated patch to apply to latest code v2: reorder to check pointers before calling pm_runtime_* functions created generic function and call with enum from * amdgpu_hwmon_show_power_cap_max * amdgpu_hwmon_show_power_cap * amdgpu_hwmon_show_power_cap_default === Test === AMDGPU_PCI_ADDR=`lspci -nn | grep "VGA\|Display" | cut -d " " -f 1` AMDGPU_HWMON=`ls -la /sys/class/hwmon | grep $AMDGPU_PCI_ADDR | cut -d " " -f 10` HWMON_DIR=/sys/class/hwmon/${AMDGPU_HWMON} cp pp_show_power_cap.txt{,.old} lspci -nn | grep "VGA\|Display" > pp_show_power_cap.test.log FILES=" power1_cap power1_cap_max power1_cap_default " for f in $FILES do echo $f = `cat $HWMON_DIR/$f` >> pp_show_power_cap.test.log done Signed-off-by: Darren Powell Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 95 +++++++++----------------------------- 1 file changed, 21 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index a276ebad47e6..769f58d5ae1a 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2902,14 +2902,15 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, return sprintf(buf, "%i\n", 0); } -static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, - struct device_attribute *attr, - char *buf) + +static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev, + struct device_attribute *attr, + char *buf, + enum pp_power_limit_level pp_limit_level) { struct amdgpu_device *adev = dev_get_drvdata(dev); const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; enum pp_power_type power_type = to_sensor_dev_attr(attr)->index; - enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_MAX; uint32_t limit; ssize_t size; int r; @@ -2919,17 +2920,17 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, if (adev->in_suspend && !adev->in_runpm) return -EPERM; + if ( !(pp_funcs && pp_funcs->get_power_limit)) + return -ENODATA; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; } - if (pp_funcs && pp_funcs->get_power_limit) - r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, - pp_limit_level, power_type); - else - r = -ENODATA; + r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, + pp_limit_level, power_type); if (!r) size = sysfs_emit(buf, "%u\n", limit * 1000000); @@ -2942,85 +2943,31 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, return size; } -static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, + +static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, struct device_attribute *attr, char *buf) { - struct amdgpu_device *adev = dev_get_drvdata(dev); - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - enum pp_power_type power_type = to_sensor_dev_attr(attr)->index; - enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_CURRENT; - uint32_t limit; - ssize_t size; - int r; - - if (amdgpu_in_reset(adev)) - return -EPERM; - if (adev->in_suspend && !adev->in_runpm) - return -EPERM; - - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - if (r < 0) { - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } - - if (pp_funcs && pp_funcs->get_power_limit) - r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, - pp_limit_level, power_type); - else - r = -ENODATA; + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MAX); - if (!r) - size = sysfs_emit(buf, "%u\n", limit * 1000000); - else - size = sysfs_emit(buf, "\n"); +} - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); +static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_CURRENT); - return size; } static ssize_t amdgpu_hwmon_show_power_cap_default(struct device *dev, struct device_attribute *attr, char *buf) { - struct amdgpu_device *adev = dev_get_drvdata(dev); - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - enum pp_power_type power_type = to_sensor_dev_attr(attr)->index; - enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_DEFAULT; - uint32_t limit; - ssize_t size; - int r; + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_DEFAULT); - if (amdgpu_in_reset(adev)) - return -EPERM; - if (adev->in_suspend && !adev->in_runpm) - return -EPERM; - - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - if (r < 0) { - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } - - if (pp_funcs && pp_funcs->get_power_limit) - r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, - pp_limit_level, power_type); - else - r = -ENODATA; - - if (!r) - size = sysfs_emit(buf, "%u\n", limit * 1000000); - else - size = sysfs_emit(buf, "\n"); - - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - - return size; } + static ssize_t amdgpu_hwmon_show_power_label(struct device *dev, struct device_attribute *attr, char *buf) -- cgit v1.2.3 From a2f55040cfbe7b208dc079232326fab86f2790c2 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Mon, 26 Apr 2021 16:18:10 +0800 Subject: drm/amd/amdgpu: enable gpu recovery for beige_goby Enable gpu recovery for beige_goby. Signed-off-by: Chengming Gui Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 130a9adf09ef..b5cce56a2a33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4304,6 +4304,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: case CHIP_VANGOGH: case CHIP_ALDEBARAN: break; -- cgit v1.2.3 From f18f58012ee894039cd59ee8c889bf499d7a3943 Mon Sep 17 00:00:00 2001 From: Mikel Rychliski Date: Thu, 24 Jun 2021 00:51:20 -0400 Subject: drm/radeon: Fix NULL dereference when updating memory stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit radeon_ttm_bo_destroy() is attempting to access the resource object to update memory counters. However, the resource object is already freed when ttm calls this function via the destroy callback. This causes an oops when a bo is freed: BUG: kernel NULL pointer dereference, address: 0000000000000010 RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon] Call Trace: radeon_bo_unref+0x1a/0x30 [radeon] radeon_gem_object_free+0x33/0x50 [radeon] drm_gem_object_release_handle+0x69/0x70 [drm] drm_gem_handle_delete+0x62/0xa0 [drm] ? drm_mode_destroy_dumb+0x40/0x40 [drm] drm_ioctl_kernel+0xb2/0xf0 [drm] drm_ioctl+0x30a/0x3c0 [drm] ? drm_mode_destroy_dumb+0x40/0x40 [drm] radeon_drm_ioctl+0x49/0x80 [radeon] __x64_sys_ioctl+0x8e/0xd0 Avoid the issue by updating the counters in the delete_mem_notify callback instead. Also, fix memory statistic updating in radeon_bo_move() to identify the source type correctly. The source type needs to be saved before the move, because the moved from object may be altered by the move. Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2") Signed-off-by: Mikel Rychliski Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210624045121.15643-1-mikel@mikelr.com --- drivers/gpu/drm/radeon/radeon_object.c | 29 ++++++++++++----------------- drivers/gpu/drm/radeon/radeon_object.h | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 13 ++++++++++--- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bfaaa3c969a3..56ede9d63b12 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo); * function are calling it. */ -static void radeon_update_memory_usage(struct radeon_bo *bo, - unsigned mem_type, int sign) +static void radeon_update_memory_usage(struct ttm_buffer_object *bo, + unsigned int mem_type, int sign) { - struct radeon_device *rdev = bo->rdev; + struct radeon_device *rdev = radeon_get_rdev(bo->bdev); switch (mem_type) { case TTM_PL_TT: if (sign > 0) - atomic64_add(bo->tbo.base.size, &rdev->gtt_usage); + atomic64_add(bo->base.size, &rdev->gtt_usage); else - atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage); + atomic64_sub(bo->base.size, &rdev->gtt_usage); break; case TTM_PL_VRAM: if (sign > 0) - atomic64_add(bo->tbo.base.size, &rdev->vram_usage); + atomic64_add(bo->base.size, &rdev->vram_usage); else - atomic64_sub(bo->tbo.base.size, &rdev->vram_usage); + atomic64_sub(bo->base.size, &rdev->vram_usage); break; } } @@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); - radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1); - mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); mutex_unlock(&bo->rdev->gem.mutex); @@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, } void radeon_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, + unsigned int old_type, struct ttm_resource *new_mem) { struct radeon_bo *rbo; + radeon_update_memory_usage(bo, old_type, -1); + if (new_mem) + radeon_update_memory_usage(bo, new_mem->mem_type, 1); + if (!radeon_ttm_bo_is_radeon_bo(bo)) return; rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 1); radeon_vm_bo_invalidate(rbo->rdev, rbo); - - /* update statistics */ - if (!new_mem) - return; - - radeon_update_memory_usage(rbo, bo->resource->mem_type, -1); - radeon_update_memory_usage(rbo, new_mem->mem_type, 1); } vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 1739c6a142cd..1afc7992ef91 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop); extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, + unsigned int old_type, struct ttm_resource *new_mem); extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index ad2a5a791bba..a06d4cc2fb1c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, struct ttm_resource *old_mem = bo->resource; struct radeon_device *rdev; struct radeon_bo *rbo; - int r; + int r, old_type; if (new_mem->mem_type == TTM_PL_TT) { r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem); @@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, if (WARN_ON_ONCE(rbo->tbo.pin_count > 0)) return -EINVAL; + /* Save old type for statistics update */ + old_type = old_mem->mem_type; + rdev = radeon_get_rdev(bo->bdev); if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { ttm_bo_move_null(bo, new_mem); @@ -261,7 +264,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, out: /* update statistics */ atomic64_add(bo->base.size, &rdev->num_bytes_moved); - radeon_bo_move_notify(bo, evict, new_mem); + radeon_bo_move_notify(bo, old_type, new_mem); return 0; } @@ -682,7 +685,11 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev, static void radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - radeon_bo_move_notify(bo, false, NULL); + unsigned int old_type = TTM_PL_SYSTEM; + + if (bo->resource) + old_type = bo->resource->mem_type; + radeon_bo_move_notify(bo, old_type, NULL); } static struct ttm_device_funcs radeon_bo_driver = { -- cgit v1.2.3 From e2329e74a615cc58b25c42b7aa1477a5e3f6a435 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Fri, 25 Jun 2021 13:50:19 +0800 Subject: drm/amdgpu: enable sdma0 tmz for Raven/Renoir(V2) Without driver loaded, SDMA0_UTCL1_PAGE.TMZ_ENABLE is set to 1 by default for all asic. On Raven/Renoir, the sdma goldsetting changes SDMA0_UTCL1_PAGE.TMZ_ENABLE to 0. This patch restores SDMA0_UTCL1_PAGE.TMZ_ENABLE to 1. Signed-off-by: Aaron Liu Acked-by: Luben Tuikov Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ae5464e2535a..f6881d99609b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -144,7 +144,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) }; @@ -288,7 +288,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe) }; -- cgit v1.2.3 From e38ca7e422791a4d1c01e56dbf7f9982db0ed365 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Mon, 28 Jun 2021 17:03:48 +0800 Subject: drm/amd/display: fix incorrrect valid irq check valid DAL irq should be < DAL_IRQ_SOURCES_NUMBER. Signed-off-by: Guchun Chen Reviewed-and-tested-by: Evan Quan Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/irq_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index 5f9346622301..1139b9eb9f6f 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -165,7 +165,7 @@ enum irq_type }; #define DAL_VALID_IRQ_SRC_NUM(src) \ - ((src) <= DAL_IRQ_SOURCES_NUMBER && (src) > DC_IRQ_SOURCE_INVALID) + ((src) < DAL_IRQ_SOURCES_NUMBER && (src) > DC_IRQ_SOURCE_INVALID) /* Number of Page Flip IRQ Sources. */ #define DAL_PFLIP_IRQ_SRC_NUM \ -- cgit v1.2.3 From b66596f62665e8ef9a26508f2dc938289eac6d0c Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Mon, 28 Jun 2021 17:08:00 +0800 Subject: drm/amd/display: fix null pointer access in gpu reset During GPU reset, when receiving a DMCUB OUTBUX0 interrupt, DAL code will set it to be OUTBOX interrupt and sets hw interrupt. However, OUTBOX interrupt is not registered yet, so a NULL pointer access will be executed. Call Trace: dal_irq_service_set+0x30/0x90 [amdgpu] dc_interrupt_set+0x24/0x30 [amdgpu] amdgpu_dm_set_dmub_outbox_irq_state+0x22/0x30 [amdgpu] amdgpu_irq_update+0x77/0xa0 [amdgpu] amdgpu_irq_gpu_reset_resume_helper+0x67/0xa0 [amdgpu] amdgpu_do_asic_reset+0x219/0x260 [amdgpu] amdgpu_device_gpu_recover.cold+0x8c5/0xb64 [amdgpu] amdgpu_debugfs_gpu_recover_show+0x2c/0x60 [amdgpu] seq_read_iter+0xc2/0x450 ? do_anonymous_page+0x22c/0x3b0 seq_read+0xf9/0x140 full_proxy_read+0x5c/0x90 vfs_read+0xaa/0x190 ksys_read+0x67/0xe0 __x64_sys_read+0x1a/0x20 Fixes: effbf6ca7eafda ("drm/amdgpu/display: remove an old DCN3 guard") Signed-off-by: Guchun Chen Reviewed-and-tested-by: Evan Quan Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/irq/irq_service.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c index 5f245bde54ff..a2a4fbeb83f8 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c @@ -119,7 +119,7 @@ bool dal_irq_service_set( dal_irq_service_ack(irq_service, source); - if (info->funcs->set) + if (info->funcs && info->funcs->set) return info->funcs->set(irq_service, info, enable); dal_irq_service_set_generic(irq_service, info, enable); @@ -153,7 +153,7 @@ bool dal_irq_service_ack( return false; } - if (info->funcs->ack) + if (info->funcs && info->funcs->ack) return info->funcs->ack(irq_service, info); dal_irq_service_ack_generic(irq_service, info); -- cgit v1.2.3 From 8dbe43e99f0f62fc4f829b4fedc5d628a329fc38 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 28 Jun 2021 17:53:38 -0500 Subject: drm/amdgpu: Set ttm caching flags during bo allocation The ttm caching flags (ttm_cached, ttm_write_combined etc) are used to determine a buffer object's mapping attributes in both CPU page table and GPU page table (when that buffer is also accessed by GPU). Currently the ttm caching flags are set in function amdgpu_ttm_io_mem_reserve which is called during DRM_AMDGPU_GEM_MMAP ioctl. This has a problem since the GPU mapping of the buffer object (ioctl DRM_AMDGPU_GEM_VA) can happen earlier than the mmap time, thus the GPU page table update code can't pick up the right ttm caching flags to decide the right GPU page table attributes. This patch moves the ttm caching flags setting to function amdgpu_vram_mgr_new - this function is called during the first step of a buffer object create (eg, DRM_AMDGPU_GEM_CREATE) so the later both CPU and GPU mapping function calls will pick up this flag for CPU/GPU page table set up. v2: rebase (Alex) Signed-off-by: Oak Zeng Suggested-by: Christian Koenig Reviewed-by: Christian Koenig Reviewed-by: Feifei Xu Tested-by: Po Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6a214a4dfe04..a2d1ab192457 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -590,10 +590,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += adev->gmc.aper_base; mem->bus.is_iomem = true; - if (adev->gmc.xgmi.connected_to_cpu) - mem->bus.caching = ttm_cached; - else - mem->bus.caching = ttm_write_combined; break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 436ec246a7da..2fd77c36a1ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -463,6 +463,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (i == 1) node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + if (adev->gmc.xgmi.connected_to_cpu) + node->base.bus.caching = ttm_cached; + else + node->base.bus.caching = ttm_write_combined; + atomic64_add(vis_usage, &mgr->vis_usage); *res = &node->base; return 0; -- cgit v1.2.3 From c1bfd74bfef77bcefc88d12eaf8996c0dfd51331 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 28 Jun 2021 10:53:06 -0400 Subject: drm/radeon: Call radeon_suspend_kms() in radeon_pci_shutdown() for Loongson64 On the Loongson64 platform used with Radeon GPU, shutdown or reboot failed when console=tty is in the boot cmdline. radeon_suspend_kms() puts the hw in the suspend state, especially set fb state as FBINFO_STATE_SUSPENDED: if (fbcon) { console_lock(); radeon_fbdev_set_suspend(rdev, 1); console_unlock(); } Then avoid to do any more fb operations in the related functions: if (p->state != FBINFO_STATE_RUNNING) return; So call radeon_suspend_kms() in radeon_pci_shutdown() for Loongson64 to fix this issue, it looks like some kind of workaround like powerpc. Co-developed-by: Jianmin Lv Signed-off-by: Jianmin Lv Signed-off-by: Tiezhu Yang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 8cd135fa6dcd..5c23b77cb81a 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -374,13 +374,13 @@ radeon_pci_shutdown(struct pci_dev *pdev) if (radeon_device_is_virtual()) radeon_pci_remove(pdev); -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) || defined(CONFIG_MACH_LOONGSON64) /* * Some adapters need to be suspended before a * shutdown occurs in order to prevent an error - * during kexec. - * Make this power specific becauase it breaks - * some non-power boards. + * during kexec, shutdown or reboot. + * Make this power and Loongson specific because + * it breaks some other boards. */ radeon_suspend_kms(pci_get_drvdata(pdev), true, true, false); #endif -- cgit v1.2.3 From c339a80d3a5a580a1aaefc2c9eeeb2a1feee4763 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Wed, 23 Jun 2021 12:30:39 +0200 Subject: drm/amdgpu/dc: Really fix DCN3.1 Makefile for PPC64 Also copy over the part that makes old gcc handling cross-platform. Fixes: df7a1658f257 ("drm/amdgpu/dc: fix DCN3.1 Makefile for PPC64") Fixes: 926d6972efb6 ("drm/amd/display: Add DCN3.1 blocks to the DC Makefile") Reviewed-by: Harry Wentland Signed-off-by: Michal Suchanek Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index 5dcdc5a858fe..4bab97acb155 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -28,6 +28,7 @@ endif CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float endif +ifdef CONFIG_X86 ifdef IS_OLD_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -36,6 +37,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4 else CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2 endif +endif AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) -- cgit v1.2.3 From 9ba85914c36c8fed9bf3e8b69c0782908c1247b7 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Tue, 29 Jun 2021 19:44:55 +0800 Subject: drm/radeon: Add the missed drm_gem_object_put() in radeon_user_framebuffer_create() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit radeon_user_framebuffer_create() misses to call drm_gem_object_put() in an error path. Add the missed function call to fix it. Reviewed-by: Christian König Signed-off-by: Jing Xiangfeng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 406681317419..573154268d43 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1325,6 +1325,7 @@ radeon_user_framebuffer_create(struct drm_device *dev, /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */ if (obj->import_attach) { DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n"); + drm_gem_object_put(obj); return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From 25f178bbd07817acd43643c97d7e6232daf3c323 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Tue, 29 Jun 2021 11:27:18 +1000 Subject: drm/amd/display: Respect CONFIG_FRAME_WARN=0 in dml Makefile Setting CONFIG_FRAME_WARN=0 should disable 'stack frame larger than' warnings. This is useful for example in KASAN builds. Make the dml Makefile respect this config. Fixes the following build warnings with CONFIG_KASAN=y and CONFIG_FRAME_WARN=0: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3642:6: warning: stack frame size of 2216 bytes in function 'dml30_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than=] drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:3957:6: warning: stack frame size of 2568 bytes in function 'dml31_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than=] Reviewed-by: Harry Wentland Signed-off-by: Reka Norman Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index d34024fd798a..45862167e6ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -50,6 +50,10 @@ dml_ccflags += -msse2 endif endif +ifneq ($(CONFIG_FRAME_WARN),0) +frame_warn_flag := -Wframe-larger-than=2048 +endif + CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) ifdef CONFIG_DRM_AMD_DC_DCN @@ -60,9 +64,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048 +CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) -Wframe-larger-than=2048 +CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) -- cgit v1.2.3 From 9f6a78572033b9c35268387630353fa1bdee0320 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 22 Jun 2021 17:33:54 +0800 Subject: drm/amdgpu: move apu flags initialization to the start of device init In some asics, we need to adjust the behavior according to the apu flags at very early stage. Signed-off-by: Huang Rui Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 36 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/nv.c | 1 - drivers/gpu/drm/amd/amdgpu/soc15.c | 10 +-------- 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b5cce56a2a33..d303e88e3c23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1369,6 +1369,38 @@ def_value: adev->pm.smu_prv_buffer_size = 0; } +static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) +{ + if (!(adev->flags & AMD_IS_APU) || + adev->asic_type < CHIP_RAVEN) + return 0; + + switch (adev->asic_type) { + case CHIP_RAVEN: + if (adev->pdev->device == 0x15dd) + adev->apu_flags |= AMD_APU_IS_RAVEN; + if (adev->pdev->device == 0x15d8) + adev->apu_flags |= AMD_APU_IS_PICASSO; + break; + case CHIP_RENOIR: + if ((adev->pdev->device == 0x1636) || + (adev->pdev->device == 0x164c)) + adev->apu_flags |= AMD_APU_IS_RENOIR; + else + adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; + break; + case CHIP_VANGOGH: + adev->apu_flags |= AMD_APU_IS_VANGOGH; + break; + case CHIP_YELLOW_CARP: + break; + default: + return -EINVAL; + } + + return 0; +} + /** * amdgpu_device_check_arguments - validate module params * @@ -3386,6 +3418,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); + r = amdgpu_device_init_apu_flags(adev); + if (r) + return r; + r = amdgpu_device_check_arguments(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 859e761c612c..94a2c0742ee5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1305,7 +1305,6 @@ static int nv_common_early_init(void *handle) break; case CHIP_VANGOGH: - adev->apu_flags |= AMD_APU_IS_VANGOGH; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CP_LS | diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index de85577c9cfd..b02436401d46 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1360,10 +1360,7 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RAVEN: adev->asic_funcs = &soc15_asic_funcs; - if (adev->pdev->device == 0x15dd) - adev->apu_flags |= AMD_APU_IS_RAVEN; - if (adev->pdev->device == 0x15d8) - adev->apu_flags |= AMD_APU_IS_PICASSO; + if (adev->rev_id >= 0x8) adev->apu_flags |= AMD_APU_IS_RAVEN2; @@ -1455,11 +1452,6 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RENOIR: adev->asic_funcs = &soc15_asic_funcs; - if ((adev->pdev->device == 0x1636) || - (adev->pdev->device == 0x164c)) - adev->apu_flags |= AMD_APU_IS_RENOIR; - else - adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; if (adev->apu_flags & AMD_APU_IS_RENOIR) adev->external_rev_id = adev->rev_id + 0x91; -- cgit v1.2.3 From 0e2125227eeef98dbd1cb93c5dd7158981b3355e Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Mon, 28 Jun 2021 13:24:40 +0530 Subject: drm/amd/pm: skip PrepareMp1ForUnload message in s0ix The documentation around PrepareMp1ForUnload message says that anything sent to SMU after this command would be stalled as the PMFW would not be in a state to take further job requests. Technically this is right in case of S3 scenario. But, this might not be the case during s0ix as the PMC driver would be the last to send the SMU on the OS_HINT. If SMU gets a PrepareMp1ForUnload message before the OS_HINT, this would stall the entire S0ix process. Results show that, this message to SMU is not required during S0ix and hence skip it. Reviewed-by: Prike Liang Signed-off-by: Shyam Sundar S K Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 7664334d8144..18a1ffdca227 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -189,10 +189,11 @@ err0_out: static int yellow_carp_system_features_control(struct smu_context *smu, bool en) { struct smu_feature *feature = &smu->smu_feature; + struct amdgpu_device *adev = smu->adev; uint32_t feature_mask[2]; int ret = 0; - if (!en) + if (!en && !adev->in_s0ix) ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); bitmap_zero(feature->enabled, feature->feature_num); -- cgit v1.2.3 From 06ac9b6c736ac9da600b1782d7ac6d6e746286c4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 28 Jun 2021 15:56:01 -0400 Subject: drm/amdgpu: add new dimgrey cavefish DID Add new PCI device id. Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 975be7d78d6d..71beb0db0125 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1199,6 +1199,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, /* Aldebaran */ -- cgit v1.2.3 From aa6158112645aae514982ad8d56df64428fcf203 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Tue, 29 Jun 2021 21:08:52 -0500 Subject: drm/amdgpu: Update NV SIMD-per-CU to 2 Navi series GPUs have 2 SIMDs per CU (and then 2 CUs per WGP). The NV enum headers incorrectly listed this as 4, which later meant we were incorrectly reporting the number of SIMDs in the HSA topology. This could cause problems down the line for user-space applications that want to launch a fixed amount of work to each SIMD. Signed-off-by: Joseph Greathouse Reviewed-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/include/navi10_enum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/navi10_enum.h b/drivers/gpu/drm/amd/include/navi10_enum.h index d5ead9680c6e..84bcb96f76ea 100644 --- a/drivers/gpu/drm/amd/include/navi10_enum.h +++ b/drivers/gpu/drm/amd/include/navi10_enum.h @@ -430,7 +430,7 @@ ARRAY_2D_DEPTH = 0x00000001, */ typedef enum ENUM_NUM_SIMD_PER_CU { -NUM_SIMD_PER_CU = 0x00000004, +NUM_SIMD_PER_CU = 0x00000002, } ENUM_NUM_SIMD_PER_CU; /* -- cgit v1.2.3 From 1d40ef902d4c82f830ba2ee5fd389b33a5374675 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 30 Jun 2021 10:07:29 -0400 Subject: drm/amd/display: Extend DMUB diagnostic logging to DCN3.1 [Why & How] Extend existing support for DCN2.1 DMUB diagnostic logging to DCN3.1 so we can collect useful information if the DMUB hangs. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 60 +++++++++++++++++++++++ drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h | 16 +++++- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 5 +- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 8c886ece71f6..973de346410d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -352,3 +352,63 @@ uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub) { return REG_READ(DMCUB_TIMER_CURRENT); } + +void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +{ + uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; + uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + + if (!dmub || !diag_data) + return; + + memset(diag_data, 0, sizeof(*diag_data)); + + diag_data->dmcub_version = dmub->fw_version; + + diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); + diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); + diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); + diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); + diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); + diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); + diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); + diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); + diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); + diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); + diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); + diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); + diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); + diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); + diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); + diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); + + diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); + diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); + diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); + + diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); + diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); + diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); + + diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); + diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); + diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); + diag_data->is_dmcub_enabled = is_dmub_enabled; + + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); + diag_data->is_dmcub_soft_reset = is_soft_reset; + + REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); + diag_data->is_dmcub_secure_reset = is_sec_reset; + + REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); + diag_data->is_traceport_en = is_traceport_enabled; + + REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); + diag_data->is_cw0_enabled = is_cw0_enabled; + + REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); + diag_data->is_cw6_enabled = is_cw6_enabled; +} diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h index 2829c3e9a310..9456a6a2d518 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h @@ -36,6 +36,9 @@ struct dmub_srv; DMUB_SR(DMCUB_CNTL) \ DMUB_SR(DMCUB_CNTL2) \ DMUB_SR(DMCUB_SEC_CNTL) \ + DMUB_SR(DMCUB_INBOX0_SIZE) \ + DMUB_SR(DMCUB_INBOX0_RPTR) \ + DMUB_SR(DMCUB_INBOX0_WPTR) \ DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \ DMUB_SR(DMCUB_INBOX1_SIZE) \ DMUB_SR(DMCUB_INBOX1_RPTR) \ @@ -103,11 +106,15 @@ struct dmub_srv; DMUB_SR(DMCUB_SCRATCH14) \ DMUB_SR(DMCUB_SCRATCH15) \ DMUB_SR(DMCUB_GPINT_DATAIN1) \ + DMUB_SR(DMCUB_GPINT_DATAOUT) \ DMUB_SR(CC_DC_PIPE_DIS) \ DMUB_SR(MMHUBBUB_SOFT_RESET) \ DMUB_SR(DCN_VM_FB_LOCATION_BASE) \ DMUB_SR(DCN_VM_FB_OFFSET) \ - DMUB_SR(DMCUB_TIMER_CURRENT) + DMUB_SR(DMCUB_TIMER_CURRENT) \ + DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \ + DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \ + DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR) #define DMUB_DCN31_FIELDS() \ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ @@ -115,6 +122,7 @@ struct dmub_srv; DMUB_SF(DMCUB_CNTL2, DMCUB_SOFT_RESET) \ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \ + DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS) \ DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \ DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \ DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \ @@ -138,11 +146,13 @@ struct dmub_srv; DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \ DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \ - DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) + DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \ + DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) struct dmub_srv_dcn31_reg_offset { #define DMUB_SR(reg) uint32_t reg; DMUB_DCN31_REGS() + DMCUB_INTERNAL_REGS() #undef DMUB_SR }; @@ -227,4 +237,6 @@ void dmub_dcn31_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset); uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub); +void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); + #endif /* _DMUB_DCN31_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index fd7e996ab1d7..2bdbd7406f56 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -208,6 +208,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) break; case DMUB_ASIC_DCN31: + dmub->regs_dcn31 = &dmub_srv_dcn31_regs; funcs->reset = dmub_dcn31_reset; funcs->reset_release = dmub_dcn31_reset_release; funcs->backdoor_load = dmub_dcn31_backdoor_load; @@ -231,9 +232,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->get_outbox0_wptr = dmub_dcn31_get_outbox0_wptr; funcs->set_outbox0_rptr = dmub_dcn31_set_outbox0_rptr; - if (asic == DMUB_ASIC_DCN31) { - dmub->regs_dcn31 = &dmub_srv_dcn31_regs; - } + funcs->get_diagnostic_data = dmub_dcn31_get_diagnostic_data; funcs->get_current_time = dmub_dcn31_get_current_time; -- cgit v1.2.3 From 9e4a91cd9ea9b9d6218f6c64c57c5ff412de919f Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 29 Jun 2021 11:40:47 -0500 Subject: drm/amdkfd: inc counter on child ranges with xnack off During GPU page table invalidation with xnack off, new ranges split may occur concurrently in the same prange. Creating a new child per split. Each child should also increment its invalid counter, to assure GPU page table updates in these ranges. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 90e2eacb8ba0..34abf6460585 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1590,6 +1590,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, unsigned long start, unsigned long last) { struct svm_range_list *svms = prange->svms; + struct svm_range *pchild; struct kfd_process *p; int r = 0; @@ -1601,7 +1602,19 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, if (!p->xnack_enabled) { int evicted_ranges; - atomic_inc(&prange->invalid); + list_for_each_entry(pchild, &prange->child_list, child_list) { + mutex_lock_nested(&pchild->lock, 1); + if (pchild->start <= last && pchild->last >= start) { + pr_debug("increment pchild invalid [0x%lx 0x%lx]\n", + pchild->start, pchild->last); + atomic_inc(&pchild->invalid); + } + mutex_unlock(&pchild->lock); + } + + if (prange->start <= last && prange->last >= start) + atomic_inc(&prange->invalid); + evicted_ranges = atomic_inc_return(&svms->evicted_ranges); if (evicted_ranges != 1) return r; @@ -1618,7 +1631,6 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, schedule_delayed_work(&svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } else { - struct svm_range *pchild; unsigned long s, l; pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", -- cgit v1.2.3 From 3a61dae854cccb32eb9f31b3b4efda8ea414e647 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Wed, 5 May 2021 12:43:10 -0500 Subject: drm/amdkfd: device pgmap owner at the svm migrate init GPUs in the same XGMI hive have direct access to all members'VRAM. When mapping memory to a GPU, we don't need hmm_range_fault to fault device-private pages in the same hive back to the host. Identifying the page owner as the hive, rather than the individual GPU, accomplishes this. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d8092a84e0a4..f1be090f7798 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -379,7 +379,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate.start = start; migrate.end = end; migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; - migrate.pgmap_owner = adev; + migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); size *= npages; @@ -601,7 +601,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate.start = start; migrate.end = end; migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; - migrate.pgmap_owner = adev; + migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); size *= npages; @@ -873,7 +873,7 @@ int svm_migrate_init(struct amdgpu_device *adev) pgmap->range.start = res->start; pgmap->range.end = res->end; pgmap->ops = &svm_migrate_pgmap_ops; - pgmap->owner = adev; + pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev); pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index a9af03994d1a..1f88bdfdbcc2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -35,6 +35,9 @@ #include "amdgpu.h" #include "kfd_priv.h" +#define SVM_ADEV_PGMAP_OWNER(adev)\ + ((adev)->hive ? (void *)(adev)->hive : (void *)(adev)) + struct svm_range_bo { struct amdgpu_bo *bo; struct kref kref; -- cgit v1.2.3 From 8c21fc49a8e637bee5c868dafbd7e3c885a926bd Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 6 May 2021 12:23:07 -0500 Subject: drm/amdkfd: add owner ref param to get hmm pages The parameter is used in the dev_private_owner to decide if device pages in the range require to be migrated back to system memory, based if they are or not in the same memory domain. In this case, this reference could come from the same memory domain with devices connected to the same hive. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index d6c54c7f7679..4b153daf283d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -160,7 +160,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, struct mm_struct *mm, struct page **pages, uint64_t start, uint64_t npages, struct hmm_range **phmm_range, bool readonly, - bool mmap_locked) + bool mmap_locked, void *owner) { struct hmm_range *hmm_range; unsigned long timeout; @@ -185,6 +185,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, hmm_range->hmm_pfns = pfns; hmm_range->start = start; hmm_range->end = start + npages * PAGE_SIZE; + hmm_range->dev_private_owner = owner; /* Assuming 512MB takes maxmium 1 second to fault page address */ timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index 7f7d37a457c3..14a3c1864085 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -34,7 +34,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, struct mm_struct *mm, struct page **pages, uint64_t start, uint64_t npages, struct hmm_range **phmm_range, bool readonly, - bool mmap_locked); + bool mmap_locked, void *owner); int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); #if defined(CONFIG_HMM_MIRROR) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a2d1ab192457..2e9ad6e0dfbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -692,7 +692,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, ttm->num_pages, >t->range, readonly, - false); + false, NULL); out_putmm: mmput(mm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 34abf6460585..e64427c31373 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1416,7 +1416,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, prange->start << PAGE_SHIFT, prange->npages, &hmm_range, - false, true); + false, true, NULL); if (r) { pr_debug("failed %d to get svm range pages\n", r); goto unreserve_out; @@ -2728,7 +2728,7 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm) r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, prange->start << PAGE_SHIFT, prange->npages, &hmm_range, - false, true); + false, true, NULL); if (!r) { amdgpu_hmm_range_get_pages_done(hmm_range); prange->validated_once = true; -- cgit v1.2.3 From a010d98a78c05652f9f2860f537efdcd65507c6d Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 6 May 2021 13:06:54 -0500 Subject: drm/amdkfd: set owner ref to svm range prefault svm_range_prefault is called right before migrations to VRAM, to make sure pages are resident in system memory before the migration. With partial migrations, this reference is used by hmm range get pages to avoid migrating pages that are already in the same VRAM domain. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index f1be090f7798..90645017258a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -471,7 +471,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, prange->start, prange->last, best_loc); /* FIXME: workaround for page locking bug with invalid pages */ - svm_range_prefault(prange, mm); + svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev)); start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e64427c31373..51d89433ca1b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2717,7 +2717,8 @@ out: /* FIXME: This is a workaround for page locking bug when some pages are * invalid during migration to VRAM */ -void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm) +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm, + void *owner) { struct hmm_range *hmm_range; int r; @@ -2728,7 +2729,7 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm) r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, prange->start << PAGE_SHIFT, prange->npages, &hmm_range, - false, true, NULL); + false, true, owner); if (!r) { amdgpu_hmm_range_get_pages_done(hmm_range); prange->validated_once = true; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 1f88bdfdbcc2..9a7744a12dab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -176,7 +176,8 @@ void schedule_deferred_list_work(struct svm_range_list *svms); void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages); void svm_range_free_dma_mappings(struct svm_range *prange); -void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm, + void *owner); struct kfd_process_device * svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); -- cgit v1.2.3 From 1fc160cfe17ad741157ba8bf38ea5867f4d9fe53 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 6 May 2021 13:18:40 -0500 Subject: drm/amdgpu: get owner ref in validate and map Get the proper owner reference for amdgpu_hmm_range_get_pages function. This is useful for partial migrations. To avoid migrating back to system memory, VRAM pages, that are accessible by all devices in the same memory domain. Ex. multiple devices in the same hive. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 51d89433ca1b..a782af09b9b9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1337,6 +1337,17 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx) ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list); } +static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) +{ + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + adev = (struct amdgpu_device *)pdd->dev->kgd; + + return SVM_ADEV_PGMAP_OWNER(adev); +} + /* * Validation+GPU mapping with concurrent invalidation (MMU notifiers) * @@ -1367,6 +1378,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm, { struct svm_validate_context ctx; struct hmm_range *hmm_range; + struct kfd_process *p; + void *owner; + int32_t idx; int r = 0; ctx.process = container_of(prange->svms, struct kfd_process, svms); @@ -1413,10 +1427,19 @@ static int svm_range_validate_and_map(struct mm_struct *mm, svm_range_reserve_bos(&ctx); if (!prange->actual_loc) { + p = container_of(prange->svms, struct kfd_process, svms); + owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap, + MAX_GPU_INSTANCE)); + for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) { + if (kfd_svm_page_owner(p, idx) != owner) { + owner = NULL; + break; + } + } r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, prange->start << PAGE_SHIFT, prange->npages, &hmm_range, - false, true, NULL); + false, true, owner); if (r) { pr_debug("failed %d to get svm range pages\n", r); goto unreserve_out; -- cgit v1.2.3 From 278a708758b5fc6d3101776b0e3846a8cd37e188 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Wed, 23 Jun 2021 17:06:22 -0500 Subject: drm/amdkfd: use hmm range fault to get both domain pfns Now that prange could have mixed domains (VRAM or SYSRAM), actual_loc nor svm_bo can not be used to check its current domain and eventually get its pfns to map them in GPU. Instead, pfns from both domains, are now obtained from hmm_range_fault through amdgpu_hmm_range_get_pages call. This is done everytime a GPU map occur. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 68 ++++++++++++++---------------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index a782af09b9b9..3e782572073f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1426,42 +1426,38 @@ static int svm_range_validate_and_map(struct mm_struct *mm, svm_range_reserve_bos(&ctx); - if (!prange->actual_loc) { - p = container_of(prange->svms, struct kfd_process, svms); - owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap, - MAX_GPU_INSTANCE)); - for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) { - if (kfd_svm_page_owner(p, idx) != owner) { - owner = NULL; - break; - } - } - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, - prange->start << PAGE_SHIFT, - prange->npages, &hmm_range, - false, true, owner); - if (r) { - pr_debug("failed %d to get svm range pages\n", r); - goto unreserve_out; - } - - r = svm_range_dma_map(prange, ctx.bitmap, - hmm_range->hmm_pfns); - if (r) { - pr_debug("failed %d to dma map range\n", r); - goto unreserve_out; + p = container_of(prange->svms, struct kfd_process, svms); + owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap, + MAX_GPU_INSTANCE)); + for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) { + if (kfd_svm_page_owner(p, idx) != owner) { + owner = NULL; + break; } + } + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, &hmm_range, + false, true, owner); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + goto unreserve_out; + } - prange->validated_once = true; + r = svm_range_dma_map(prange, ctx.bitmap, + hmm_range->hmm_pfns); + if (r) { + pr_debug("failed %d to dma map range\n", r); + goto unreserve_out; } + prange->validated_once = true; + svm_range_lock(prange); - if (!prange->actual_loc) { - if (amdgpu_hmm_range_get_pages_done(hmm_range)) { - pr_debug("hmm update the range, need validate again\n"); - r = -EAGAIN; - goto unlock_out; - } + if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + pr_debug("hmm update the range, need validate again\n"); + r = -EAGAIN; + goto unlock_out; } if (!list_empty(&prange->child_list)) { pr_debug("range split by unmap in parallel, validate again\n"); @@ -2797,16 +2793,6 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, best_loc == prange->actual_loc) return 0; - /* - * Prefetch to GPU without host access flag, set actual_loc to gpu, then - * validate on gpu and map to gpus will be handled afterwards. - */ - if (best_loc && !prange->actual_loc && - !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) { - prange->actual_loc = best_loc; - return 0; - } - if (!best_loc) { r = svm_migrate_vram_to_ram(prange, mm); *migrated = !r; -- cgit v1.2.3 From 1d5dbfe6c06a5269b535f8e6b13569f32c42ea60 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Wed, 5 May 2021 14:15:50 -0500 Subject: drm/amdkfd: classify and map mixed svm range pages in GPU [Why] svm ranges can have mixed pages from device or system memory. A good example is, after a prange has been allocated in VRAM and a copy-on-write is triggered by a fork. This invalidates some pages inside the prange. Endding up in mixed pages. [How] By classifying each page inside a prange, based on its type. Device or system memory, during dma mapping call. If page corresponds to VRAM domain, a flag is set to its dma_addr entry for each GPU. Then, at the GPU page table mapping. All group of contiguous pages within the same type are mapped with their proper pte flags. v2: Instead of using ttm_res to calculate vram pfns in the svm_range. It is now done by setting the vram real physical address into drm_addr array. This makes more flexible VRAM management, plus removes the need to have a BO reference in the svm_range. v3: Remove mapping member from svm_range Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 87 ++++++++++++++++++++++-------------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 +- 2 files changed, 55 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3e782572073f..f9a1a47482c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -119,28 +119,40 @@ static void svm_range_remove_notifier(struct svm_range *prange) } static int -svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr, - unsigned long *hmm_pfns, uint64_t npages) +svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, + unsigned long *hmm_pfns, uint32_t gpuidx) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; - dma_addr_t *addr = *dma_addr; + dma_addr_t *addr = prange->dma_addr[gpuidx]; + struct device *dev = adev->dev; struct page *page; int i, r; if (!addr) { - addr = kvmalloc_array(npages, sizeof(*addr), + addr = kvmalloc_array(prange->npages, sizeof(*addr), GFP_KERNEL | __GFP_ZERO); if (!addr) return -ENOMEM; - *dma_addr = addr; + prange->dma_addr[gpuidx] = addr; } - for (i = 0; i < npages; i++) { + for (i = 0; i < prange->npages; i++) { if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), "leaking dma mapping\n")) dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); page = hmm_pfn_to_page(hmm_pfns[i]); + if (is_zone_device_page(page)) { + struct amdgpu_device *bo_adev = + amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + + addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + + bo_adev->vm_manager.vram_base_offset - + bo_adev->kfd.dev->pgmap.range.start; + addr[i] |= SVM_RANGE_VRAM_DOMAIN; + pr_debug("vram address detected: 0x%llx\n", addr[i]); + continue; + } addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); r = dma_mapping_error(dev, addr[i]); if (r) { @@ -175,8 +187,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } adev = (struct amdgpu_device *)pdd->dev->kgd; - r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx], - hmm_pfns, prange->npages); + r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx); if (r) break; } @@ -1020,21 +1031,22 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, } static uint64_t -svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) +svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, + int domain) { struct amdgpu_device *bo_adev; uint32_t flags = prange->flags; uint32_t mapping_flags = 0; uint64_t pte_flags; - bool snoop = !prange->ttm_res; + bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; - if (prange->svm_bo && prange->ttm_res) + if (domain == SVM_RANGE_VRAM_DOMAIN) bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); switch (adev->asic_type) { case CHIP_ARCTURUS: - if (prange->svm_bo && prange->ttm_res) { + if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_adev == adev) { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1050,7 +1062,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) } break; case CHIP_ALDEBARAN: - if (prange->svm_bo && prange->ttm_res) { + if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_adev == adev) { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1080,14 +1092,14 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; pte_flags = AMDGPU_PTE_VALID; - pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM; + pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n", prange->svms, prange->start, prange->last, - prange->ttm_res ? 1:0, pte_flags, mapping_flags); + (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags); return pte_flags; } @@ -1158,31 +1170,41 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va bo_va; bool table_freed = false; uint64_t pte_flags; + unsigned long last_start; + int last_domain; int r = 0; + int64_t i; pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); - if (prange->svm_bo && prange->ttm_res) { + if (prange->svm_bo && prange->ttm_res) bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); - prange->mapping.bo_va = &bo_va; - } - prange->mapping.start = prange->start; - prange->mapping.last = prange->last; - prange->mapping.offset = prange->ttm_res ? prange->offset : 0; - pte_flags = svm_range_get_pte_flags(adev, prange); + last_start = prange->start; + for (i = 0; i < prange->npages; i++) { + last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; + dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; + if ((prange->start + i) < prange->last && + last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN)) + continue; - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, - prange->mapping.start, - prange->mapping.last, pte_flags, - prange->mapping.offset, - prange->ttm_res, - dma_addr, &vm->last_update, - &table_freed); - if (r) { - pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); - goto out; + pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", + last_start, prange->start + i, last_domain ? "GPU" : "CPU"); + pte_flags = svm_range_get_pte_flags(adev, prange, last_domain); + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, + last_start, + prange->start + i, pte_flags, + last_start - prange->start, + NULL, + dma_addr, + &vm->last_update, + &table_freed); + if (r) { + pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); + goto out; + } + last_start = prange->start + i + 1; } r = amdgpu_vm_update_pdes(adev, vm, false); @@ -1203,7 +1225,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, p->pasid, TLB_FLUSH_LEGACY); } out: - prange->mapping.bo_va = NULL; return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 9a7744a12dab..41115a220c2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -35,6 +35,7 @@ #include "amdgpu.h" #include "kfd_priv.h" +#define SVM_RANGE_VRAM_DOMAIN (1UL << 0) #define SVM_ADEV_PGMAP_OWNER(adev)\ ((adev)->hive ? (void *)(adev)->hive : (void *)(adev)) @@ -113,7 +114,6 @@ struct svm_range { struct list_head update_list; struct list_head remove_list; struct list_head insert_list; - struct amdgpu_bo_va_mapping mapping; uint64_t npages; dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; struct ttm_resource *ttm_res; -- cgit v1.2.3 From 1ade5f84cc25ddd02161859b59345dca9aabc2e8 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Wed, 12 May 2021 10:54:58 -0500 Subject: drm/amdkfd: skip invalid pages during migrations Invalid pages can be the result of pages that have been migrated already due to copy-on-write procedure or pages that were never migrated to VRAM in first place. This is not an issue anymore, as pranges now support mixed memory domains (CPU/GPU). Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 38 +++++++++++++++----------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 90645017258a..d93abe5fe66e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -372,7 +372,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, size_t size; void *buf; int r = -ENOMEM; - int retry = 0; memset(&migrate, 0, sizeof(migrate)); migrate.vma = vma; @@ -391,7 +390,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate.dst = migrate.src + npages; scratch = (dma_addr_t *)(migrate.dst + npages); -retry: r = migrate_vma_setup(&migrate); if (r) { pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", @@ -399,17 +397,9 @@ retry: goto out_free; } if (migrate.cpages != npages) { - pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages, + pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n", + migrate.cpages, npages); - migrate_vma_finalize(&migrate); - if (retry++ >= 3) { - r = -ENOMEM; - pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n", - r, prange->svms, prange->start, prange->last); - goto out_free; - } - - goto retry; } if (migrate.cpages) { @@ -506,9 +496,8 @@ static void svm_migrate_page_free(struct page *page) static int svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch) + dma_addr_t *scratch, uint64_t npages) { - uint64_t npages = migrate->cpages; struct device *dev = adev->dev; uint64_t *src; dma_addr_t *dst; @@ -525,15 +514,23 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, src = (uint64_t *)(scratch + npages); dst = scratch; - for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) { + for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) { struct page *spage; spage = migrate_pfn_to_page(migrate->src[i]); - if (!spage) { - pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n", + if (!spage || !is_zone_device_page(spage)) { + pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); - r = -ENOMEM; - goto out_oom; + if (j) { + r = svm_migrate_copy_memory_gart(adev, dst + i - j, + src + i - j, j, + FROM_VRAM_TO_RAM, + mfence); + if (r) + goto out_oom; + j = 0; + } + continue; } src[i] = svm_migrate_addr(adev, spage); if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { @@ -566,6 +563,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); migrate->dst[i] |= MIGRATE_PFN_LOCKED; + j++; } r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j, @@ -624,7 +622,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, if (migrate.cpages) { r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, - scratch); + scratch, npages); migrate_vma_pages(&migrate); svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); -- cgit v1.2.3 From 6ffecc946fada1c39455e89488dba57941e81659 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Wed, 30 Jun 2021 15:09:10 -0400 Subject: drm/amdkfd: skip migration for pages already in VRAM Migration skipped for pages that are already in VRAM domain. These could be the result of previous partial migrations to SYS RAM, and prefetch back to VRAM. Ex. Coherent pages in VRAM that were not written/invalidated after a copy-on-write. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d93abe5fe66e..610f33f7c2af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -293,15 +293,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, for (i = j = 0; i < npages; i++) { struct page *spage; - dst[i] = cursor.start + (j << PAGE_SHIFT); - migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); - svm_migrate_get_vram_page(prange, migrate->dst[i]); - - migrate->dst[i] = migrate_pfn(migrate->dst[i]); - migrate->dst[i] |= MIGRATE_PFN_LOCKED; - - if (migrate->src[i] & MIGRATE_PFN_VALID) { - spage = migrate_pfn_to_page(migrate->src[i]); + spage = migrate_pfn_to_page(migrate->src[i]); + if (spage && !is_zone_device_page(spage)) { + dst[i] = cursor.start + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE); r = dma_mapping_error(dev, src[i]); -- cgit v1.2.3 From 3bf8282c6b9454422a24b1c443ece80ab325c389 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Wed, 12 May 2021 11:02:33 -0500 Subject: drm/amdkfd: add invalid pages debug at vram migration This is for debug purposes only. It conditionally generates partial migrations to test mixed CPU/GPU memory domain pages in a prange easily. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 610f33f7c2af..3a51384cd0a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -353,6 +353,20 @@ out_free_vram_pages: } } +#ifdef DEBUG_FORCE_MIXED_DOMAINS + for (i = 0, j = 0; i < npages; i += 4, j++) { + if (j & 1) + continue; + svm_migrate_put_vram_page(adev, dst[i]); + migrate->dst[i] = 0; + svm_migrate_put_vram_page(adev, dst[i + 1]); + migrate->dst[i + 1] = 0; + svm_migrate_put_vram_page(adev, dst[i + 2]); + migrate->dst[i + 2] = 0; + svm_migrate_put_vram_page(adev, dst[i + 3]); + migrate->dst[i + 3] = 0; + } +#endif out: return r; } -- cgit v1.2.3 From 7981ec65497a42ab83dfe43192ed7a98c542f001 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 9 Mar 2021 22:12:15 -0600 Subject: drm/amdkfd: Maintain svm_bo reference in page->zone_device_data Each zone-device page holds a reference to the SVM BO that manages its backing storage. This is necessary to correctly hold on to the BO in case zone_device pages are shared with a child-process. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 10 ++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +--------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 9 +++++++++ 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 3a51384cd0a1..dab290a4d19d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -218,7 +218,8 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) struct page *page; page = pfn_to_page(pfn); - page->zone_device_data = prange; + svm_range_bo_ref(prange->svm_bo); + page->zone_device_data = prange->svm_bo; get_page(page); lock_page(page); } @@ -502,7 +503,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, static void svm_migrate_page_free(struct page *page) { - /* Keep this function to avoid warning */ + struct svm_range_bo *svm_bo = page->zone_device_data; + + if (svm_bo) { + pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref)); + svm_range_bo_unref(svm_bo); + } } static int diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f9a1a47482c6..9a71d8919bd6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -312,14 +312,6 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo) return true; } -static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo) -{ - if (svm_bo) - kref_get(&svm_bo->kref); - - return svm_bo; -} - static void svm_range_bo_release(struct kref *kref) { struct svm_range_bo *svm_bo; @@ -358,7 +350,7 @@ static void svm_range_bo_release(struct kref *kref) kfree(svm_bo); } -static void svm_range_bo_unref(struct svm_range_bo *svm_bo) +void svm_range_bo_unref(struct svm_range_bo *svm_bo) { if (!svm_bo) return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 41115a220c2b..3fc1fd8b4fbc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -150,6 +150,14 @@ static inline void svm_range_unlock(struct svm_range *prange) mutex_unlock(&prange->lock); } +static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo) +{ + if (svm_bo) + kref_get(&svm_bo->kref); + + return svm_bo; +} + int svm_range_list_init(struct kfd_process *p); void svm_range_list_fini(struct kfd_process *p); int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, @@ -186,6 +194,7 @@ svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); */ #define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0) +void svm_range_bo_unref(struct svm_range_bo *svm_bo); #else struct kfd_process; -- cgit v1.2.3 From 93c5bcd4eaaafd7c25c062089806c86d9b7890dd Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 29 Jun 2021 16:34:10 -0400 Subject: drm/amdgpu: Conditionally reset SDMA RAS error counts Reset SDMA RAS error counts during init only if persistent EDC harvesting is not supported. Signed-off-by: Mukul Joshi Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index f6881d99609b..8931000dcd41 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1896,8 +1896,11 @@ static int sdma_v4_0_late_init(void *handle) sdma_v4_0_setup_ulv(adev); - if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count) - adev->sdma.funcs->reset_ras_error_count(adev); + if (!amdgpu_persistent_edc_harvesting_supported(adev)) { + if (adev->sdma.funcs && + adev->sdma.funcs->reset_ras_error_count) + adev->sdma.funcs->reset_ras_error_count(adev); + } if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) return adev->sdma.funcs->ras_late_init(adev, &ih_info); -- cgit v1.2.3 From 07b72960d2b4a087ff2445e286159e69742069cc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Jun 2021 14:33:01 -0700 Subject: drm/i915/display: Do not zero past infoframes.vsc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_dp_vsc_sdp_unpack() was using a memset() size (36, struct dp_sdp) larger than the destination (24, struct drm_dp_vsc_sdp), clobbering fields in struct intel_crtc_state after infoframes.vsc. Use the actual target size for the memset(). Fixes: 1b404b7dbb10 ("drm/i915/dp: Read out DP SDPs") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210617213301.1824728-1-keescook@chromium.org (cherry picked from commit c88e2647c5bb45d04dc4302018ebe6ebbf331823) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 5c9222283044..6cc03b9e4321 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2868,7 +2868,7 @@ static int intel_dp_vsc_sdp_unpack(struct drm_dp_vsc_sdp *vsc, if (size < sizeof(struct dp_sdp)) return -EINVAL; - memset(vsc, 0, size); + memset(vsc, 0, sizeof(*vsc)); if (sdp->sdp_header.HB0 != 0) return -EINVAL; -- cgit v1.2.3 From 995e9bcb42f99b68a45400f51bbbf41bb871ba2f Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 30 Jun 2021 14:05:22 -0700 Subject: drm/i915/display/dg1: Correctly map DPLLs during state readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _DG1_DPCLKA0_CFGCR0 maps between DPLL 0 and 1 with one bit for phy A and B while _DG1_DPCLKA1_CFGCR0 maps between DPLL 2 and 3 with one bit for phy C and D. Reusing _cnl_ddi_get_pll() don't take that into cosideration returing DPLL 0 and 1 for phy C and D. That is a regression introduced in the refactor done in commit 351221ffc5e5 ("drm/i915: Move DDI clock readout to encoder->get_config()"). While at it also dropping the macros previously used, not reusing it to improve readability. BSpec: 50286 Fixes: 351221ffc5e5 ("drm/i915: Move DDI clock readout to encoder->get_config()") Cc: Lucas De Marchi Cc: Ville Syrjälä Signed-off-by: José Roberto de Souza Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210630210522.162674-1-jose.souza@intel.com (cherry picked from commit 3352d86dcd3336a117630f0c1cfbc6bb8c93e1cf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_ddi.c | 19 ++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 3 --- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 390869bd6b63..be716b56e8e0 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1791,10 +1791,23 @@ static struct intel_shared_dpll *dg1_ddi_get_pll(struct intel_encoder *encoder) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); + enum intel_dpll_id id; + u32 val; - return _cnl_ddi_get_pll(i915, DG1_DPCLKA_CFGCR0(phy), - DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), - DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); + val = intel_de_read(i915, DG1_DPCLKA_CFGCR0(phy)); + val &= DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + val >>= DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + id = val; + + /* + * _DG1_DPCLKA0_CFGCR0 maps between DPLL 0 and 1 with one bit for phy A + * and B while _DG1_DPCLKA1_CFGCR0 maps between DPLL 2 and 3 with one + * bit for phy C and D. + */ + if (phy >= PHY_C) + id += DPLL_ID_DG1_DPLL2; + + return intel_get_shared_dpll_by_id(i915, id); } static void icl_ddi_combo_enable_clock(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e915ec034c98..94fde5ca26ae 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10513,7 +10513,6 @@ enum skl_power_gate { #define _DG1_DPCLKA1_CFGCR0 0x16C280 #define _DG1_DPCLKA_PHY_IDX(phy) ((phy) % 2) #define _DG1_DPCLKA_PLL_IDX(pll) ((pll) % 2) -#define _DG1_PHY_DPLL_MAP(phy) ((phy) >= PHY_C ? DPLL_ID_DG1_DPLL2 : DPLL_ID_DG1_DPLL0) #define DG1_DPCLKA_CFGCR0(phy) _MMIO_PHY((phy) / 2, \ _DG1_DPCLKA_CFGCR0, \ _DG1_DPCLKA1_CFGCR0) @@ -10521,8 +10520,6 @@ enum skl_power_gate { #define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy) (_DG1_DPCLKA_PHY_IDX(phy) * 2) #define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL(pll, phy) (_DG1_DPCLKA_PLL_IDX(pll) << DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) #define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy) (0x3 << DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) -#define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_DPLL_MAP(clk_sel, phy) \ - (((clk_sel) >> DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) + _DG1_PHY_DPLL_MAP(phy)) /* ADLS Clocks */ #define _ADLS_DPCLKA_CFGCR0 0x164280 -- cgit v1.2.3 From 320ad343ea3023b4035dc4b5091eb19060391dbc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 1 Jul 2021 19:36:17 +0200 Subject: drm/i915: Use the correct IRQ during resume The code in xcs_resume() probably didn't work as intended. It uses struct drm_device.irq, which is allocated to 0, but never initialized by i915 to the device's interrupt number. Change all calls to synchronize_hardirq() to intel_synchronize_irq(), which uses the correct interrupt. _hardirq() functions are not needed in this context. v5: * go back to _hardirq() after PCI probe reported wrong context; add rsp comment v4: * switch everything to intel_synchronize_irq() (Daniel) v3: * also use intel_synchronize_hardirq() at another callsite v2: * wrap irq code in intel_synchronize_hardirq() (Ville) Signed-off-by: Thomas Zimmermann Fixes: 536f77b1caa0 ("drm/i915/gt: Call stop_ring() from ring resume, again") Cc: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: Rodrigo Vivi Cc: Joonas Lahtinen Cc: Maarten Lankhorst Cc: Lucas De Marchi Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210701173618.10718-2-tzimmermann@suse.de (cherry picked from commit 27e4b467d94e216b365da388358c9407af818662) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 7 +++++-- drivers/gpu/drm/i915/i915_irq.c | 5 +++++ drivers/gpu/drm/i915/i915_irq.h | 1 + 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 9ceddfbb1687..7f03df236613 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1279,7 +1279,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; /* Waiting to drain ELSP? */ - synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq); + intel_synchronize_hardirq(engine->i915); intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 0c423f096e2b..37d74d4ed59b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -184,8 +184,11 @@ static int xcs_resume(struct intel_engine_cs *engine) ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", ring->head, ring->tail); - /* Double check the ring is empty & disabled before we resume */ - synchronize_hardirq(engine->i915->drm.irq); + /* + * Double check the ring is empty & disabled before we resume. Called + * from atomic context during PCI probe, so _hardirq(). + */ + intel_synchronize_hardirq(engine->i915); if (!stop_ring(engine)) goto err; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a11bdb667241..7d13d2147054 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4575,3 +4575,8 @@ void intel_synchronize_irq(struct drm_i915_private *i915) { synchronize_irq(to_pci_dev(i915->drm.dev)->irq); } + +void intel_synchronize_hardirq(struct drm_i915_private *i915) +{ + synchronize_hardirq(to_pci_dev(i915->drm.dev)->irq); +} diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index db34d5dbe402..e43b6734f21b 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -94,6 +94,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); bool intel_irqs_enabled(struct drm_i915_private *dev_priv); void intel_synchronize_irq(struct drm_i915_private *i915); +void intel_synchronize_hardirq(struct drm_i915_private *i915); int intel_get_crtc_scanline(struct intel_crtc *crtc); void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From 3dd6c11b60d2f1e4082221a8831f91093c4494aa Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 1 Jul 2021 19:36:18 +0200 Subject: drm/i915: Drop all references to DRM IRQ midlayer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove all references to DRM's IRQ midlayer. i915 uses Linux' interrupt functions directly. v2: * also remove an outdated comment * move IRQ fix into separate patch * update Fixes tag (Daniel) Signed-off-by: Thomas Zimmermann Fixes: b318b82455bd ("drm/i915: Nuke drm_driver irq vfuncs") Cc: Ville Syrjälä Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210701173618.10718-3-tzimmermann@suse.de (cherry picked from commit 91b96f0008a2d66d76b525556e4818f5a4a089e4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/i915/i915_irq.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 850b499c71c8..73de45472f60 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7d13d2147054..c03943198089 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -33,7 +33,6 @@ #include #include -#include #include "display/intel_de.h" #include "display/intel_display_types.h" @@ -4564,10 +4563,6 @@ void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) bool intel_irqs_enabled(struct drm_i915_private *dev_priv) { - /* - * We only use drm_irq_uninstall() at unload and VT switch, so - * this is the only thing we need to check. - */ return dev_priv->runtime_pm.irqs_enabled; } -- cgit v1.2.3