summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c93
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c11
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c15
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c9
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c202
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/Makefile25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c343
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c132
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c61
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c422
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c379
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c56
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c266
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c1105
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h116
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c4
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h6
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_types.h4
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c4
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c15
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h13
-rw-r--r--drivers/gpu/drm/amd/include/mes_v11_api_def.h3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h31
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h10
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c12
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c36
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c74
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c17
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c50
-rw-r--r--drivers/gpu/drm/bridge/lvds-codec.c2
-rw-r--r--drivers/gpu/drm/drm_debugfs.c4
-rw-r--r--drivers/gpu/drm/drm_edid.c24
-rw-r--r--drivers/gpu/drm/drm_gem.c17
-rw-r--r--drivers/gpu/drm/drm_internal.h4
-rw-r--r--drivers/gpu/drm/drm_prime.c20
-rw-r--r--drivers/gpu/drm/gma500/cdv_device.c4
-rw-r--r--drivers/gpu/drm/gma500/gem.c4
-rw-r--r--drivers/gpu/drm/gma500/gma_display.c11
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_device.c5
-rw-r--r--drivers/gpu/drm/gma500/power.c8
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.c2
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.h5
-rw-r--r--drivers/gpu/drm/gma500/psb_irq.c15
-rw-r--r--drivers/gpu/drm/gma500/psb_irq.h2
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm_drv.c10
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_backlight.c37
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c17
-rw-r--r--drivers/gpu/drm/i915/display/intel_bw.c16
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c35
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_link_training.c22
-rw-r--r--drivers/gpu/drm/i915/display/intel_quirks.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_vdsc.c1
-rw-r--r--drivers/gpu/drm/i915/display/vlv_dsi.c7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c19
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c25
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c77
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c67
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c50
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c14
-rw-r--r--drivers/gpu/drm/i915/gvt/aperture_gm.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h4
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h16
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c36
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h1
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.c5
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.h6
-rw-r--r--drivers/gpu/drm/i915/intel_gvt_mmio_table.c3
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c8
-rw-r--r--drivers/gpu/drm/imx/dcss/dcss-kms.c2
-rw-r--r--drivers/gpu/drm/meson/meson_drv.c5
-rw-r--r--drivers/gpu/drm/meson/meson_plane.c2
-rw-r--r--drivers/gpu/drm/meson/meson_viu.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c6
-rw-r--r--drivers/gpu/drm/msm/dp/dp_ctrl.c2
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_cfg.c4
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.c2
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c2
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_devfreq.c2
-rw-r--r--drivers/gpu/drm/msm/msm_rd.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c22
-rw-r--r--drivers/gpu/drm/panel/panel-edp.c3
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.c11
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c3
-rw-r--r--drivers/gpu/drm/rockchip/cdn-dp-core.c5
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop2.c4
-rw-r--r--drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c10
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c2
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c13
-rw-r--r--drivers/gpu/drm/vc4/Kconfig1
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c17
221 files changed, 3294 insertions, 2406 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index c6cc493a5486..2b97b8a96fb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -148,30 +148,22 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
- struct list_head reset_device_list;
int r = 0;
dev_dbg(adev->dev, "aldebaran perform hw reset\n");
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
/* Wrong context, return error */
return -EINVAL;
}
- INIT_LIST_HEAD(&reset_device_list);
- if (reset_context->hive) {
- list_for_each_entry (tmp_adev,
- &reset_context->hive->device_list,
- gmc.xgmi.head)
- list_add_tail(&tmp_adev->reset_list,
- &reset_device_list);
- } else {
- list_add_tail(&reset_context->reset_req_dev->reset_list,
- &reset_device_list);
- }
-
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
mutex_lock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
}
@@ -179,7 +171,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
* Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
* them together so that they can be completed asynchronously on multiple nodes
*/
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
if (!queue_work(system_unbound_wq,
@@ -197,7 +189,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
/* For XGMI wait for all resets to complete before proceed */
if (!r) {
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
flush_work(&tmp_adev->reset_cntl->reset_work);
r = tmp_adev->asic_reset_res;
@@ -207,7 +199,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
}
}
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
}
@@ -339,10 +331,13 @@ static int
aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
- struct list_head reset_device_list;
int r;
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
@@ -350,19 +345,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
return -EINVAL;
}
- INIT_LIST_HEAD(&reset_device_list);
- if (reset_context->hive) {
- list_for_each_entry (tmp_adev,
- &reset_context->hive->device_list,
- gmc.xgmi.head)
- list_add_tail(&tmp_adev->reset_list,
- &reset_device_list);
- } else {
- list_add_tail(&reset_context->reset_req_dev->reset_list,
- &reset_device_list);
- }
-
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
r = aldebaran_mode2_restore_ip(tmp_adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e146810c700b..d597e2656c47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -317,7 +317,7 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
-
+#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3c09dcc0986e..647220a8762d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence {
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
+ uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a699134a1e8c..2170db83e41d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -40,10 +40,10 @@
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
/*
- * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
* BO chunk
*/
-#define VRAM_ALLOCATION_ALIGN (1 << 21)
+#define VRAM_AVAILABLITY_ALIGN (1 << 21)
/* Impose limit on how much memory KFD can use */
static struct {
@@ -149,7 +149,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
* to avoid fragmentation caused by 4K allocations in the tail
* 2M BO chunk.
*/
- vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
+ vram_needed = size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = size;
} else if (!(alloc_flag &
@@ -182,8 +182,10 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
*/
WARN_ONCE(vram_needed && !adev,
"adev reference can't be null when vram is used");
- if (adev)
+ if (adev) {
adev->kfd.vram_used += vram_needed;
+ adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+ }
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
@@ -203,8 +205,10 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
WARN_ONCE(!adev,
"adev reference can't be null when alloc mem flags vram is set");
- if (adev)
- adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
+ if (adev) {
+ adev->kfd.vram_used -= size;
+ adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+ }
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= size;
} else if (!(alloc_flag &
@@ -1608,15 +1612,14 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t available;
-
spin_lock(&kfd_mem_limit.mem_limit_lock);
available = adev->gmc.real_vram_size
- - adev->kfd.vram_used
+ - adev->kfd.vram_used_aligned
- atomic64_read(&adev->vram_pin_size)
- reserved_for_pt;
spin_unlock(&kfd_mem_limit.mem_limit_lock);
- return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
+ return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
@@ -1725,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- pr_debug("creating userptr BO for user_addr = %llu\n", user_addr);
+ pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
ret = init_user_pages(*mem, user_addr, criu_resume);
if (ret)
goto allocate_init_user_pages_failed;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index fd8f3731758e..b81b77a9efa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -314,7 +314,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = vram_info->v30.channel_num;
mem_channel_width = vram_info->v30.channel_width;
if (vram_width)
- *vram_width = mem_channel_number * mem_channel_width;
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
break;
default:
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d8f1335bc68f..b7bae833c804 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -837,16 +837,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
continue;
r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r) {
- mutex_unlock(&p->bo_list->bo_list_mutex);
+ if (r)
return r;
- }
r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
- if (r) {
- mutex_unlock(&p->bo_list->bo_list_mutex);
+ if (r)
return r;
- }
}
r = amdgpu_vm_handle_moved(adev, vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index e2eec985adb3..cb00c7d6f50b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1705,7 +1705,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
char reg_offset[11];
- uint32_t *new, *tmp = NULL;
+ uint32_t *new = NULL, *tmp = NULL;
int ret, i = 0, len = 0;
do {
@@ -1747,7 +1747,8 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
ret = size;
error_free:
- kfree(tmp);
+ if (tmp != new)
+ kfree(tmp);
kfree(new);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c4a6fe3070b6..be7aff2d4a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2365,8 +2365,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
adev->ip_blocks[i].status.sw = true;
- /* need to do gmc hw init early so we can allocate gpu mem */
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+ /* need to do common hw init early so everything is set up for gmc */
+ r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ if (r) {
+ DRM_ERROR("hw_init %d failed %d\n", i, r);
+ goto init_failed;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ /* need to do gmc hw init early so we can allocate gpu mem */
/* Try to reserve bad pages early */
if (amdgpu_sriov_vf(adev))
amdgpu_virt_exchange_data(adev);
@@ -2456,12 +2464,14 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (!hive->reset_domain ||
!amdgpu_reset_get_reset_domain(hive->reset_domain)) {
r = -ENOENT;
+ amdgpu_put_xgmi_hive(hive);
goto init_failed;
}
/* Drop the early temporary reset domain we created for device */
amdgpu_reset_put_reset_domain(adev->reset_domain);
adev->reset_domain = hive->reset_domain;
+ amdgpu_put_xgmi_hive(hive);
}
}
@@ -3050,8 +3060,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
int i, r;
static enum amd_ip_block_type ip_order[] = {
- AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_COMMON,
+ AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_PSP,
AMD_IP_BLOCK_TYPE_IH,
};
@@ -4413,8 +4423,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
retry:
amdgpu_amdkfd_pre_reset(adev);
- amdgpu_amdkfd_pre_reset(adev);
-
if (from_hypervisor)
r = amdgpu_virt_request_full_gpu(adev, true);
else
@@ -4742,6 +4750,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_reset_reg_dumps(tmp_adev);
+
+ reset_context->reset_device_list = device_list_handle;
r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
/* If reset handler not implemented, continue; otherwise return */
if (r == -ENOSYS)
@@ -5522,7 +5532,8 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
adev->gmc.aper_base + adev->gmc.aper_size - 1;
- bool p2p_access = !(pci_p2pdma_distance_many(adev->pdev,
+ bool p2p_access = !adev->gmc.xgmi.connected_to_cpu &&
+ !(pci_p2pdma_distance_many(adev->pdev,
&peer_adev->dev, 1, true) < 0);
return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index c20922a5af9f..5b09c8f4fe95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -38,6 +38,7 @@
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
#include <drm/drm_edid.h>
#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fb_helper.h>
@@ -496,6 +497,7 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
.destroy = drm_gem_fb_destroy,
.create_handle = drm_gem_fb_create_handle,
+ .dirty = drm_atomic_helper_dirtyfb,
};
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index ecada5eadfe3..e325150879df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -66,10 +66,15 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
return true;
case CHIP_SIENNA_CICHLID:
if (strnstr(atom_ctx->vbios_version, "D603",
+ sizeof(atom_ctx->vbios_version))) {
+ if (strnstr(atom_ctx->vbios_version, "D603GLXE",
sizeof(atom_ctx->vbios_version)))
- return true;
- else
+ return false;
+ else
+ return true;
+ } else {
return false;
+ }
default:
return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 5071b96be982..c2fd6f3076a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -159,7 +159,10 @@ void amdgpu_job_free(struct amdgpu_job *job)
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
- dma_fence_put(&job->hw_fence);
+ if (!job->hw_fence.ops)
+ kfree(job);
+ else
+ dma_fence_put(&job->hw_fence);
}
int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
@@ -272,10 +275,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
/* Signal all jobs not yet scheduled */
for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
struct drm_sched_rq *rq = &sched->sched_rq[i];
-
- if (!rq)
- continue;
-
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index b067ce45d226..c9dec2434f37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -486,11 +486,14 @@ static int psp_sw_fini(void *handle)
release_firmware(psp->ta_fw);
psp->ta_fw = NULL;
}
- if (adev->psp.cap_fw) {
+ if (psp->cap_fw) {
release_firmware(psp->cap_fw);
psp->cap_fw = NULL;
}
-
+ if (psp->toc_fw) {
+ release_firmware(psp->toc_fw);
+ psp->toc_fw = NULL;
+ }
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7))
psp_sysfs_fini(adev);
@@ -753,7 +756,7 @@ static int psp_tmr_init(struct psp_context *psp)
}
pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
- ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE(psp->adev),
+ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT,
AMDGPU_GEM_DOMAIN_VRAM,
&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
@@ -2401,7 +2404,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
static bool fw_load_skip_check(struct psp_context *psp,
struct amdgpu_firmware_info *ucode)
{
- if (!ucode->fw)
+ if (!ucode->fw || !ucode->ucode_size)
return true;
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
@@ -2641,6 +2644,9 @@ static int psp_hw_fini(void *handle)
psp_rap_terminate(psp);
psp_dtm_terminate(psp);
psp_hdcp_terminate(psp);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1)
+ psp_xgmi_terminate(psp);
}
psp_asd_terminate(psp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index c32b74bd970f..e593e8c2a54d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -36,6 +36,7 @@
#define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000)
+#define PSP_TMR_ALIGNMENT 0x100000
#define PSP_FW_NAME_LEN 0x24
enum psp_shared_mem_size {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ff5361f5c2d4..12c6f97945a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1811,7 +1811,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
amdgpu_ras_query_error_status(adev, &info);
if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
- adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) &&
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) {
if (amdgpu_ras_reset_error_status(adev, info.head.block))
dev_warn(adev->dev, "Failed to reset error counter and error status");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 9e55a5d7a825..ffda1560c648 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -37,6 +37,7 @@ struct amdgpu_reset_context {
struct amdgpu_device *reset_req_dev;
struct amdgpu_job *job;
struct amdgpu_hive_info *hive;
+ struct list_head *reset_device_list;
unsigned long flags;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3b4c19412625..134575a3893c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -637,6 +637,8 @@ struct amdgpu_ttm_tt {
#endif
};
+#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
+
#ifdef CONFIG_DRM_AMDGPU_USERPTR
/*
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
@@ -648,7 +650,7 @@ struct amdgpu_ttm_tt {
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
struct ttm_tt *ttm = bo->tbo.ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct mm_struct *mm;
@@ -702,7 +704,7 @@ out_unlock:
*/
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
bool r = false;
if (!gtt || !gtt->userptr)
@@ -751,7 +753,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -788,7 +790,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -822,7 +824,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
struct ttm_tt *ttm = tbo->ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (amdgpu_bo_encrypted(abo))
flags |= AMDGPU_PTE_TMZ;
@@ -860,7 +862,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_resource *bo_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void*)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
uint64_t flags;
int r;
@@ -927,7 +929,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
struct ttm_placement placement;
struct ttm_place placements;
struct ttm_resource *tmp;
@@ -998,7 +1000,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
/* if the pages have userptr pinning then clear that first */
if (gtt->userptr) {
@@ -1025,7 +1027,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt->usertask)
put_task_struct(gtt->usertask);
@@ -1079,7 +1081,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
struct ttm_operation_ctx *ctx)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
pgoff_t i;
int ret;
@@ -1113,7 +1115,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev;
pgoff_t i;
@@ -1182,7 +1184,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
/* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
- gtt = (void *)bo->ttm;
+ gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
gtt->userptr = addr;
gtt->userflags = flags;
@@ -1199,7 +1201,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
*/
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return NULL;
@@ -1218,7 +1220,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end, unsigned long *userptr)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1241,7 +1243,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
*/
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL || !gtt->userptr)
return false;
@@ -1254,7 +1256,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
*/
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index ebed3f5226db..96b6cf4c4d54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -390,6 +390,7 @@ union amdgpu_firmware_header {
struct rlc_firmware_header_v2_1 rlc_v2_1;
struct rlc_firmware_header_v2_2 rlc_v2_2;
struct rlc_firmware_header_v2_3 rlc_v2_3;
+ struct rlc_firmware_header_v2_4 rlc_v2_4;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct sdma_firmware_header_v2_0 sdma_v2_0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 108e8e8a1a36..576849e95296 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -496,8 +496,7 @@ static int amdgpu_vkms_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 1b108d03e785..f2aebbf3fbe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -742,7 +742,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
amdgpu_put_xgmi_hive(hive);
}
- return psp_xgmi_terminate(&adev->psp);
+ return 0;
}
static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index 33a8a7365aef..f0e235f98afb 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -28,13 +28,44 @@
#include "navi10_enum.h"
#include "soc15_common.h"
+#define regATHUB_MISC_CNTL_V3_0_1 0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+ break;
+ default:
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ }
+ return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+ break;
+ default:
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ }
+}
+
static void
athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
- def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ def = data = athub_v3_0_get_cg_cntl(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
if (def != data)
- WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ athub_v3_0_set_cg_cntl(adev, data);
}
static void
@@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
{
uint32_t def, data;
- def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ def = data = athub_v3_0_get_cg_cntl(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
@@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
if (def != data)
- WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ athub_v3_0_set_cg_cntl(adev, data);
}
int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
@@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->ip_versions[ATHUB_HWIP][0]) {
case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 0, 2):
athub_v3_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
@@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
int data;
/* AMD_CG_SUPPORT_ATHUB_MGCG */
- data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ data = athub_v3_0_get_cg_cntl(adev);
if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 9c964cd3b5d4..288fce7dc0ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2796,8 +2796,7 @@ static int dce_v10_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index e0ad9f27dc3f..cbe5250b31cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2914,8 +2914,7 @@ static int dce_v11_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 77f5e998a120..b1c44fab074f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2673,8 +2673,7 @@ static int dce_v6_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_width = 16384;
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 802e5c753271..a22b45c92792 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2693,8 +2693,11 @@ static int dce_v8_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index fafbad3cf08d..a3cd5c1e8529 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4274,35 +4274,45 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
}
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
- info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
- info->fw = adev->gfx.rlc_fw;
- adev->firmware.fw_size +=
- ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
@@ -4846,7 +4856,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 6fd71cb10e54..f6b1bb40e503 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -53,6 +53,7 @@
#define GFX11_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388
#define regCGTT_WD_CLK_CTRL 0x5086
#define regCGTT_WD_CLK_CTRL_BASE_IDX 1
@@ -130,6 +131,8 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
bool all_hub, uint8_t dst_sel);
static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
@@ -1138,6 +1141,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
.init_spm_golden = &gfx_v11_0_init_spm_golden_registers,
+ .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
};
static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
@@ -5181,9 +5185,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
- data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
- data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
- WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
} else {
/* Program RLC_CGCG_CGLS_CTRL */
def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
@@ -5212,9 +5219,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
- data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
- data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
- WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
}
}
@@ -5279,6 +5289,38 @@ static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
.update_spm_vmid = gfx_v11_0_update_spm_vmid,
};
+static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
+
+ // Program RLC_PG_DELAY3 for CGPG hysteresis
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 1):
+ WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
+
+ gfx_v11_cntl_power_gating(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
+}
+
static int gfx_v11_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
@@ -5293,6 +5335,10 @@ static int gfx_v11_0_set_powergating_state(void *handle,
case IP_VERSION(11, 0, 2):
amdgpu_gfx_off_ctrl(adev, enable);
break;
+ case IP_VERSION(11, 0, 1):
+ gfx_v11_cntl_pg(adev, enable);
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
default:
break;
}
@@ -5310,6 +5356,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c6e0f9313a7f..fc9c1043244c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2587,7 +2587,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
gfx_v9_0_tiling_mode_table_init(adev);
- gfx_v9_0_setup_rb(adev);
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_setup_rb(adev);
gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9ae8cdaa033e..f513e2c2e964 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 22761a3bb818..4603653916f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
up_read(&adev->reset_domain->sem);
@@ -1624,12 +1625,15 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
else
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
case IP_VERSION(9, 4, 1):
adev->num_vmhubs = 3;
/* Keep the vm size same with Vega20 */
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
index 39a696cd45b5..29c3484ae1f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
0);
}
+static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch, forced on MEM clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable MEM clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+
+ if (enable) {
+ hdp_clk_cntl &=
+ ~(uint32_t)
+ (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+ } else {
+ hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+ }
+
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ hdp_v5_2_update_mem_power_gating(adev, enable);
+ hdp_v5_2_update_medium_grain_clock_gating(adev, enable);
+}
+
const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
.flush_hdp = hdp_v5_2_flush_hdp,
+ .update_clock_gating = hdp_v5_2_update_clock_gating,
+ .get_clock_gating_state = hdp_v5_2_get_clockgating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index 92dc60a9d209..085e613f3646 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -727,6 +727,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
.get_wptr = ih_v6_0_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = ih_v6_0_set_rptr
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 120ea294abef..cc3fdbbcd314 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -183,6 +183,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+ mes_add_queue_pkt.trap_en = 1;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 3f44a099c52a..3e51e773f92b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -176,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
+ tmp = mmVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
index cac72ced94c8..e8058edc1d10 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -518,18 +518,41 @@ static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
- //TODO
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
}
static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
- //TODO
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
}
static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
@@ -539,7 +562,20 @@ static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- //TODO
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
}
const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 6e0145b2b408..445cb06b9d26 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -295,9 +295,17 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ unsigned int num_level, block_size;
uint32_t tmp;
int i;
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
@@ -305,7 +313,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_DEPTH,
- adev->vm_manager.num_level);
+ num_level);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
@@ -323,7 +331,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
- adev->vm_manager.block_size - 9);
+ block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 4b5396d3e60f..eec13cb5bf75 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index b465baa26762..aa761ff3a5fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -380,6 +380,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
WREG32_PCIE(smnPCIE_LC_CNTL, data);
}
+#ifdef CONFIG_PCIEASPM
static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -401,9 +402,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -459,7 +462,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v2_3_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v2_3_program_ltr(adev);
def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -483,6 +489,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
}
static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index f7f6ddebd3e4..37615a77287b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -282,6 +282,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
+#ifdef CONFIG_PCIEASPM
static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -303,9 +304,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -361,7 +364,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v6_1_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v6_1_program_ltr(adev);
def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -385,6 +391,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
}
const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 11848d1e238b..19455a725939 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -673,6 +673,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = {
};
+#ifdef CONFIG_PCIEASPM
static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -694,9 +695,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
}
+#endif
static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
{
+#ifdef CONFIG_PCIEASPM
uint32_t def, data;
if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4))
@@ -755,7 +758,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL6, data);
- nbio_v7_4_program_ltr(adev);
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v7_4_program_ltr(adev);
def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
@@ -779,6 +785,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
if (def != data)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
}
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index 01e8288d09a8..def89379b51a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -28,6 +28,14 @@
#include "nbio/nbio_7_7_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
+static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp;
@@ -68,12 +76,6 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan
doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_CSDMA_DOORBELL_RANGE,
SIZE, doorbell_size);
- doorbell_range = REG_SET_FIELD(doorbell_range,
- GDC0_BIF_SDMA0_DOORBELL_RANGE,
- OFFSET, doorbell_index);
- doorbell_range = REG_SET_FIELD(doorbell_range,
- GDC0_BIF_SDMA0_DOORBELL_RANGE,
- SIZE, doorbell_size);
} else {
doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_SDMA0_DOORBELL_RANGE,
@@ -247,6 +249,81 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
}
+static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data);
+}
+
+static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (enable)
+ data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
@@ -262,6 +339,10 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_7_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_7_get_clockgating_state,
.ih_control = nbio_v7_7_ih_control,
.init_registers = nbio_v7_7_init_registers,
+ .remap_hdp_registers = nbio_v7_7_remap_hdp_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index a2588200ea58..0b2ac418e4ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
adev->psp.dtm_context.context.bin_desc.start_addr =
(uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+ if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+ adev->psp.securedisplay_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->securedisplay.fw_version);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+ adev->psp.securedisplay_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+ }
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 726a5bba40b2..a75a286e1ecf 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -20,7 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <linux/dev_printk.h>
#include <drm/drm_drv.h>
#include <linux/vmalloc.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 65181efba50e..56424f75dd2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1504,6 +1504,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
WREG32_SDMA(i, mmSDMA0_CNTL, temp);
if (!amdgpu_sriov_vf(adev)) {
+ ring = &adev->sdma.instance[i].ring;
+ adev->nbio.funcs->sdma_doorbell_range(adev, i,
+ ring->use_doorbell, ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
+
/* unhalt engine */
temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index fde6154f2009..183024d7c184 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1211,25 +1211,6 @@ static int soc15_common_sw_fini(void *handle)
return 0;
}
-static void soc15_doorbell_range_init(struct amdgpu_device *adev)
-{
- int i;
- struct amdgpu_ring *ring;
-
- /* sdma/ih doorbell range are programed by hypervisor */
- if (!amdgpu_sriov_vf(adev)) {
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- adev->nbio.funcs->sdma_doorbell_range(adev, i,
- ring->use_doorbell, ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
- }
-
- adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
- adev->irq.ih.doorbell_index);
- }
-}
-
static int soc15_common_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1249,12 +1230,6 @@ static int soc15_common_hw_init(void *handle)
/* enable the doorbell aperture */
soc15_enable_doorbell_aperture(adev, true);
- /* HW doorbell routing policy: doorbell writing not
- * in SDMA/IH/MM/ACV range will be routed to CP. So
- * we need to init SDMA/IH/MM/ACV doorbell range prior
- * to CP ip block init and ring test.
- */
- soc15_doorbell_range_init(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 52816de5e17b..2e50db3b761e 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -421,6 +421,7 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
+ return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
return false;
default:
@@ -494,6 +495,20 @@ static void soc21_pre_asic_init(struct amdgpu_device *adev)
{
}
+static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
static const struct amdgpu_asic_funcs soc21_asic_funcs =
{
.read_disabled_bios = &soc21_read_disabled_bios,
@@ -513,6 +528,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs =
.supports_baco = &amdgpu_dpm_is_baco_supported,
.pre_asic_init = &soc21_pre_asic_init,
.query_video_codecs = &soc21_query_video_codecs,
+ .update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
};
static int soc21_common_early_init(void *handle)
@@ -546,8 +562,10 @@ static int soc21_common_early_init(void *handle)
case IP_VERSION(11, 0, 0):
adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
+#if 0
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGLS |
+#endif
AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_REPEATER_FGCG |
AMD_CG_SUPPORT_GFX_FGCG |
@@ -575,7 +593,9 @@ static int soc21_common_early_init(void *handle)
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_ATHUB_MGCG |
- AMD_CG_SUPPORT_ATHUB_LS;
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
adev->pg_flags =
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
@@ -586,9 +606,25 @@ static int soc21_common_early_init(void *handle)
break;
case IP_VERSION(11, 0, 1):
adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x1;
break;
@@ -683,6 +719,8 @@ static int soc21_common_set_clockgating_state(void *handle,
switch (adev->ip_versions[NBIO_HWIP][0]) {
case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
+ case IP_VERSION(7, 7, 0):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index ca14c3ef742e..fb2d74f30448 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1115,7 +1115,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
*
* Stop VCN block with dpg mode
*/
-static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
{
uint32_t tmp;
@@ -1133,7 +1133,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
- return 0;
}
/**
@@ -1154,7 +1153,7 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev)
fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v4_0_stop_dpg_mode(adev, i);
+ vcn_v4_0_stop_dpg_mode(adev, i);
continue;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index cdd599a08125..1e83db0c5438 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
}
}
+ if (!amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->irq.ih.doorbell_index);
+
pci_set_master(adev->pdev);
/* enable interrupts */
@@ -334,9 +338,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -409,6 +415,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 3b4eb8285943..59dfca093155 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
}
}
+ if (!amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->irq.ih.doorbell_index);
+
pci_set_master(adev->pdev);
/* enable interrupts */
@@ -385,9 +389,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -461,6 +467,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 2b3d8bc8f0aa..dc774ddf3445 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -874,7 +874,7 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
err = kfd_wait_on_events(p, args->num_events,
(void __user *)args->events_ptr,
(args->wait_for_all != 0),
- args->timeout, &args->wait_result);
+ &args->timeout, &args->wait_result);
return err;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index f5853835f03a..22c0929d410b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -102,13 +102,18 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
switch (sdma_version) {
case IP_VERSION(6, 0, 0):
- case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
/* Reserve 1 for paging and 1 for gfx */
kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
break;
+ case IP_VERSION(6, 0, 1):
+ /* Reserve 1 for paging and 1 for gfx */
+ kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+ /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */
+ kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
+ break;
default:
break;
}
@@ -377,12 +382,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(10, 3, 6):
- gfx_target_version = 100306;
- if (!vf)
- f2g = &gfx_v10_3_kfd2kgd;
- break;
case IP_VERSION(10, 3, 7):
- gfx_target_version = 100307;
+ gfx_target_version = 100306;
if (!vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 3942a56c28bb..83e3ce9f6049 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -894,7 +894,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
return msecs_to_jiffies(user_timeout_ms) + 1;
}
-static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
+static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
+ bool undo_auto_reset)
{
uint32_t i;
@@ -903,6 +904,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
spin_lock(&waiters[i].event->lock);
remove_wait_queue(&waiters[i].event->wq,
&waiters[i].wait);
+ if (undo_auto_reset && waiters[i].activated &&
+ waiters[i].event && waiters[i].event->auto_reset)
+ set_event(waiters[i].event);
spin_unlock(&waiters[i].event->lock);
}
@@ -911,7 +915,7 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result)
{
struct kfd_event_data __user *events =
@@ -920,7 +924,7 @@ int kfd_wait_on_events(struct kfd_process *p,
int ret = 0;
struct kfd_event_waiter *event_waiters = NULL;
- long timeout = user_timeout_to_jiffies(user_timeout_ms);
+ long timeout = user_timeout_to_jiffies(*user_timeout_ms);
event_waiters = alloc_event_waiters(num_events);
if (!event_waiters) {
@@ -970,15 +974,11 @@ int kfd_wait_on_events(struct kfd_process *p,
}
if (signal_pending(current)) {
- /*
- * This is wrong when a nonzero, non-infinite timeout
- * is specified. We need to use
- * ERESTARTSYS_RESTARTBLOCK, but struct restart_block
- * contains a union with data for each user and it's
- * in generic kernel code that I don't want to
- * touch yet.
- */
ret = -ERESTARTSYS;
+ if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
+ *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
+ *user_timeout_ms = jiffies_to_msecs(
+ max(0l, timeout-1));
break;
}
@@ -1019,7 +1019,7 @@ int kfd_wait_on_events(struct kfd_process *p,
event_waiters, events);
out_unlock:
- free_waiters(num_events, event_waiters);
+ free_waiters(num_events, event_waiters, ret == -ERESTARTSYS);
mutex_unlock(&p->event_mutex);
out:
if (ret)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d03a3b9c9c5d..bf610e3b683b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1317,7 +1317,7 @@ void kfd_event_free_process(struct kfd_process *p);
int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result);
void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint32_t valid_id_bits);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index a67ba8879a56..11074cc8c333 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -541,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
kfree(svm_bo);
return -ESRCH;
}
- svm_bo->svms = prange->svms;
svm_bo->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
mm,
@@ -3273,7 +3272,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
static void svm_range_evict_svm_bo_worker(struct work_struct *work)
{
struct svm_range_bo *svm_bo;
- struct kfd_process *p;
struct mm_struct *mm;
int r = 0;
@@ -3281,13 +3279,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
if (!svm_bo_ref_unless_zero(svm_bo))
return; /* svm_bo was freed while eviction was pending */
- /* svm_range_bo_release destroys this worker thread. So during
- * the lifetime of this thread, kfd_process and mm will be valid.
- */
- p = container_of(svm_bo->svms, struct kfd_process, svms);
- mm = p->mm;
- if (!mm)
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ mm = svm_bo->eviction_fence->mm;
+ } else {
+ svm_range_bo_unref(svm_bo);
return;
+ }
mmap_read_lock(mm);
spin_lock(&svm_bo->list_lock);
@@ -3305,8 +3302,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
- r = svm_migrate_vram_to_ram(prange,
- svm_bo->eviction_fence->mm,
+ r = svm_migrate_vram_to_ram(prange, mm,
KFD_MIGRATE_TRIGGER_TTM_EVICTION);
} while (!r && prange->actual_loc && --retries);
@@ -3324,6 +3320,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
}
spin_unlock(&svm_bo->list_lock);
mmap_read_unlock(mm);
+ mmput(mm);
dma_fence_signal(&svm_bo->eviction_fence->base);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 9156b041ef17..cfac13ad06ef 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -46,7 +46,6 @@ struct svm_range_bo {
spinlock_t list_lock;
struct amdgpu_amdkfd_fence *eviction_fence;
struct work_struct eviction_work;
- struct svm_range_list *svms;
uint32_t evicting;
struct work_struct release_work;
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 25990bec600d..3f0a4a415907 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1392,8 +1392,8 @@ static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
{
+ struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link;
struct kfd_iolink_properties *props = NULL, *props2 = NULL;
- struct kfd_iolink_properties *gpu_link, *cpu_link;
struct kfd_topology_device *cpu_dev;
int ret = 0;
int i, num_cpu;
@@ -1416,16 +1416,19 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g
continue;
/* find CPU <--> CPU links */
+ cpu_link = NULL;
cpu_dev = kfd_topology_device_by_proximity_domain(i);
if (cpu_dev) {
- list_for_each_entry(cpu_link,
+ list_for_each_entry(tmp_link,
&cpu_dev->io_link_props, list) {
- if (cpu_link->node_to == gpu_link->node_to)
+ if (tmp_link->node_to == gpu_link->node_to) {
+ cpu_link = tmp_link;
break;
+ }
}
}
- if (cpu_link->node_to != gpu_link->node_to)
+ if (!cpu_link)
return -ENOMEM;
/* CPU <--> CPU <--> GPU, GPU node*/
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 8660d93cc405..5140d9c2bf3b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3825,8 +3825,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- /* disable prefer shadow for now due to hibernation issues */
- adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* indicates support for immediate flip */
adev_to_drm(adev)->mode_config.async_page_flip = true;
@@ -4135,6 +4138,7 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
}
}
+static void amdgpu_set_panel_orientation(struct drm_connector *connector);
/*
* In this architecture, the association
@@ -4326,6 +4330,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
adev_to_drm(adev)->vblank_disable_immediate = false;
}
}
+ amdgpu_set_panel_orientation(&aconnector->base);
}
/* Software is initialized. Now we can register interrupt handlers. */
@@ -6684,6 +6689,10 @@ static void amdgpu_set_panel_orientation(struct drm_connector *connector)
connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
return;
+ mutex_lock(&connector->dev->mode_config.mutex);
+ amdgpu_dm_connector_get_modes(connector);
+ mutex_unlock(&connector->dev->mode_config.mutex);
+
encoder = amdgpu_dm_connector_to_encoder(connector);
if (!encoder)
return;
@@ -6728,8 +6737,6 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
* restored here.
*/
amdgpu_dm_update_freesync_caps(connector, edid);
-
- amdgpu_set_panel_orientation(connector);
} else {
amdgpu_dm_connector->num_modes = 0;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 0e48824f55e3..ee242d9d8b06 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -3288,6 +3288,7 @@ void crtc_debugfs_init(struct drm_crtc *crtc)
&crc_win_y_end_fops);
debugfs_create_file_unsafe("crc_win_update", 0644, dir, crtc,
&crc_win_update_fops);
+ dput(dir);
#endif
debugfs_create_file("amdgpu_current_bpc", 0644, crtc->debugfs_entry,
crtc, &amdgpu_current_bpc_fops);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index b841b8b0a9d8..987bde4dca3d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -34,6 +34,7 @@
#include "dal_asic_id.h"
#include "amdgpu_display.h"
#include "amdgpu_dm_trace.h"
+#include "amdgpu_dm_plane.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
@@ -149,12 +150,12 @@ static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_
*size += 1;
}
-bool modifier_has_dcc(uint64_t modifier)
+static bool modifier_has_dcc(uint64_t modifier)
{
return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
}
-unsigned modifier_gfx9_swizzle_mode(uint64_t modifier)
+static unsigned modifier_gfx9_swizzle_mode(uint64_t modifier)
{
if (modifier == DRM_FORMAT_MOD_LINEAR)
return 0;
@@ -660,7 +661,7 @@ static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_ty
add_gfx10_1_modifiers(adev, mods, &size, &capacity);
break;
case AMDGPU_FAMILY_GC_11_0_0:
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
add_gfx11_modifiers(adev, mods, &size, &capacity);
break;
}
@@ -1412,7 +1413,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane,
}
break;
case AMDGPU_FAMILY_GC_11_0_0:
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
switch (AMD_FMT_MOD_GET(TILE, modifier)) {
case AMD_FMT_MOD_TILE_GFX11_256K_R_X:
case AMD_FMT_MOD_TILE_GFX9_64K_R_X:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
index 95168c2cfa6f..286981a2dd40 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
@@ -36,17 +36,9 @@ int fill_dc_scaling_info(struct amdgpu_device *adev,
const struct drm_plane_state *state,
struct dc_scaling_info *scaling_info);
-void get_min_max_dc_plane_scaling(struct drm_device *dev,
- struct drm_framebuffer *fb,
- int *min_downscale, int *max_upscale);
-
int dm_plane_helper_check_state(struct drm_plane_state *state,
struct drm_crtc_state *new_crtc_state);
-bool modifier_has_dcc(uint64_t modifier);
-
-unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier);
-
int fill_plane_buffer_attributes(struct amdgpu_device *adev,
const struct amdgpu_framebuffer *afb,
const enum surface_pixel_format format,
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index 6767fab55c26..352e9afb85c6 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -100,3 +100,24 @@ void convert_float_matrix(
matrix[i] = (uint16_t)reg_value;
}
}
+
+static uint32_t find_gcd(uint32_t a, uint32_t b)
+{
+ uint32_t remainder = 0;
+ while (b != 0) {
+ remainder = a % b;
+ a = b;
+ b = remainder;
+ }
+ return a;
+}
+
+void reduce_fraction(uint32_t num, uint32_t den,
+ uint32_t *out_num, uint32_t *out_den)
+{
+ uint32_t gcd = 0;
+
+ gcd = find_gcd(num, den);
+ *out_num = num / gcd;
+ *out_den = den / gcd;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.h b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
index ade785c4fdc7..81da4e6f7a1a 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.h
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
@@ -38,6 +38,9 @@ void convert_float_matrix(
struct fixed31_32 *flt,
uint32_t buffer_size);
+void reduce_fraction(uint32_t num, uint32_t den,
+ uint32_t *out_num, uint32_t *out_den);
+
static inline unsigned int log_2(unsigned int num)
{
return ilog2(num);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 4c76091fd1f2..f276abb63bcd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -337,7 +337,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
break;
}
- case AMDGPU_FAMILY_GC_11_0_2: {
+ case AMDGPU_FAMILY_GC_11_0_1: {
struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
if (clk_mgr == NULL) {
@@ -397,7 +397,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
dcn32_clk_mgr_destroy(clk_mgr);
break;
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
dcn314_clk_mgr_destroy(clk_mgr);
break;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 0202dc682682..ca6dfd2d7561 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -24,10 +24,9 @@
*/
#include "dccg.h"
-#include "clk_mgr_internal.h"
+#include "rn_clk_mgr.h"
#include "dcn20/dcn20_clk_mgr.h"
-#include "rn_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dce100/dce_clk_mgr.h"
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
index 2e088c5171b2..f1319957e400 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
@@ -28,6 +28,7 @@
#include "clk_mgr.h"
#include "dm_pp_smu.h"
+#include "clk_mgr_internal.h"
extern struct wm_table ddr4_wm_table_gs;
extern struct wm_table lpddr4_wm_table_gs;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index ee99974b3b62..9781a8dbc238 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -307,16 +307,6 @@ static void dcn314_enable_pme_wa(struct clk_mgr *clk_mgr_base)
dcn314_smu_enable_pme_wa(clk_mgr);
}
-void dcn314_init_clocks(struct clk_mgr *clk_mgr)
-{
- memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
- // Assumption is that boot state always supports pstate
- clk_mgr->clks.p_state_change_support = true;
- clk_mgr->clks.prev_p_state_change_support = true;
- clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
- clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
-}
-
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b)
{
@@ -425,7 +415,7 @@ static struct wm_table lpddr5_wm_table = {
}
};
-static DpmClocks_t dummy_clocks;
+static DpmClocks314_t dummy_clocks;
static struct dcn314_watermarks dummy_wms = { 0 };
@@ -510,7 +500,7 @@ static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
struct dcn314_smu_dpm_clks *smu_dpm_clks)
{
- DpmClocks_t *table = smu_dpm_clks->dpm_clks;
+ DpmClocks314_t *table = smu_dpm_clks->dpm_clks;
if (!clk_mgr->smu_ver)
return;
@@ -527,6 +517,26 @@ static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
}
+static inline bool is_valid_clock_value(uint32_t clock_value)
+{
+ return clock_value > 1 && clock_value < 100000;
+}
+
+static unsigned int convert_wck_ratio(uint8_t wck_ratio)
+{
+ switch (wck_ratio) {
+ case WCK_RATIO_1_2:
+ return 2;
+
+ case WCK_RATIO_1_4:
+ return 4;
+
+ default:
+ break;
+ }
+ return 1;
+}
+
static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
{
uint32_t max = 0;
@@ -540,89 +550,129 @@ static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
return max;
}
-static unsigned int find_clk_for_voltage(
- const DpmClocks_t *clock_table,
- const uint32_t clocks[],
- unsigned int voltage)
-{
- int i;
- int max_voltage = 0;
- int clock = 0;
-
- for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
- if (clock_table->SocVoltage[i] == voltage) {
- return clocks[i];
- } else if (clock_table->SocVoltage[i] >= max_voltage &&
- clock_table->SocVoltage[i] < voltage) {
- max_voltage = clock_table->SocVoltage[i];
- clock = clocks[i];
- }
- }
-
- ASSERT(clock);
- return clock;
-}
-
static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
struct integrated_info *bios_info,
- const DpmClocks_t *clock_table)
+ const DpmClocks314_t *clock_table)
{
- int i, j;
struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
- uint32_t max_dispclk = 0, max_dppclk = 0;
-
- j = -1;
-
- ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
-
- /* Find lowest DPM, FCLK is filled in reverse order*/
+ struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+ uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+ int i;
- for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) {
- if (clock_table->DfPstateTable[i].FClk != 0) {
- j = i;
- break;
+ /* Find highest valid fclk pstate */
+ for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) {
+ if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) &&
+ clock_table->DfPstateTable[i].FClk > max_fclk) {
+ max_fclk = clock_table->DfPstateTable[i].FClk;
+ max_pstate = i;
}
}
- if (j == -1) {
- /* clock table is all 0s, just use our own hardcode */
- ASSERT(0);
- return;
- }
+ /* We expect the table to contain at least one valid fclk entry. */
+ ASSERT(is_valid_clock_value(max_fclk));
- bw_params->clk_table.num_entries = j + 1;
-
- /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+ /* Dispclk and dppclk can be max at any voltage, same number of levels for both */
if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
} else {
+ /* Invalid number of entries in the table from PMFW. */
ASSERT(0);
}
- for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
- bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk;
- bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk;
- bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage;
- switch (clock_table->DfPstateTable[j].WckRatio) {
- case WCK_RATIO_1_2:
- bw_params->clk_table.entries[i].wck_ratio = 2;
- break;
- case WCK_RATIO_1_4:
- bw_params->clk_table.entries[i].wck_ratio = 4;
- break;
- default:
- bw_params->clk_table.entries[i].wck_ratio = 1;
+ /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+ for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+ uint32_t min_fclk = clock_table->DfPstateTable[0].FClk;
+ int j;
+
+ for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) {
+ if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) &&
+ clock_table->DfPstateTable[j].FClk < min_fclk &&
+ clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
+ min_fclk = clock_table->DfPstateTable[j].FClk;
+ min_pstate = j;
+ }
}
- bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage);
- bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage);
+
+ /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+ if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+ break;
+
+ bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].fclk_mhz = min_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+ bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->DfPstateTable[min_pstate].WckRatio);
+ };
+
+ /* Make sure to include at least one entry at highest pstate */
+ if (max_pstate != min_pstate || i == 0) {
+ if (i > MAX_NUM_DPM_LVL - 1)
+ i = MAX_NUM_DPM_LVL - 1;
+
+ bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->DfPstateTable[max_pstate].WckRatio);
+ i++;
}
+ bw_params->clk_table.num_entries = i--;
+ /* Make sure all highest clocks are included*/
+ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
+ ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+
+ /*
+ * Set any 0 clocks to max default setting. Not an issue for
+ * power since we aren't doing switching in such case anyway
+ */
+ for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+ if (!bw_params->clk_table.entries[i].fclk_mhz) {
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+ bw_params->clk_table.entries[i].voltage = def_max.voltage;
+ }
+ if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+ bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz)
+ bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+ if (!bw_params->clk_table.entries[i].dispclk_mhz)
+ bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+ if (!bw_params->clk_table.entries[i].dppclk_mhz)
+ bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_mhz)
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ }
+ ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
bw_params->vram_type = bios_info->memory_type;
- bw_params->num_channels = bios_info->ma_channel_number;
+
+ bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
+ bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
for (i = 0; i < WM_SET_COUNT; i++) {
bw_params->wm_table.entries[i].wm_inst = i;
@@ -641,7 +691,7 @@ static struct clk_mgr_funcs dcn314_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn314_update_clocks,
- .init_clocks = dcn314_init_clocks,
+ .init_clocks = dcn31_init_clocks,
.enable_pme_wa = dcn314_enable_pme_wa,
.are_clock_states_equal = dcn314_are_clock_states_equal,
.notify_wm_ranges = dcn314_notify_wm_ranges
@@ -681,10 +731,10 @@ void dcn314_clk_mgr_construct(
}
ASSERT(clk_mgr->smu_wm_set.wm_set);
- smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem(
+ smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem(
clk_mgr->base.base.ctx,
DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
- sizeof(DpmClocks_t),
+ sizeof(DpmClocks314_t),
&smu_dpm_clks.mc_address.quad_part);
if (smu_dpm_clks.dpm_clks == NULL) {
@@ -729,7 +779,7 @@ void dcn314_clk_mgr_construct(
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
dcn314_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
index c695a4498c50..171f84340eb2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -42,7 +42,7 @@ struct clk_mgr_dcn314 {
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b);
-void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
index a7958dc96581..047d19ea919c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
@@ -36,6 +36,37 @@ typedef enum {
WCK_RATIO_MAX
} WCK_RATIO_e;
+typedef struct {
+ uint32_t FClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+ uint8_t WckRatio;
+ uint8_t Spare[3];
+} DfPstateTable314_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t NumDfPstatesEnabled;
+ uint8_t spare[3];
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks314_t;
+
struct dcn314_watermarks {
// Watermarks
WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
@@ -43,7 +74,7 @@ struct dcn314_watermarks {
};
struct dcn314_smu_dpm_clks {
- DpmClocks_t *dpm_clks;
+ DpmClocks314_t *dpm_clks;
union large_integer mc_address;
};
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index e42f44fc1c08..fb22c3d70528 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1074,8 +1074,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
struct dc_stream_state *old_stream =
dc->current_state->res_ctx.pipe_ctx[i].stream;
bool should_disable = true;
- bool pipe_split_change =
- context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
+ bool pipe_split_change = false;
+
+ if ((context->res_ctx.pipe_ctx[i].top_pipe) &&
+ (dc->current_state->res_ctx.pipe_ctx[i].top_pipe))
+ pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx !=
+ dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx;
+ else
+ pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe !=
+ dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
for (j = 0; j < context->stream_count; j++) {
if (old_stream == context->streams[j]) {
@@ -1087,7 +1094,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
dc->current_state->stream_count != context->stream_count)
should_disable = true;
- if (old_stream && !dc->current_state->res_ctx.pipe_ctx[i].top_pipe) {
+ if (old_stream && !dc->current_state->res_ctx.pipe_ctx[i].top_pipe &&
+ !dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe) {
struct pipe_ctx *old_pipe, *new_pipe;
old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -3229,7 +3237,7 @@ static void commit_planes_for_stream(struct dc *dc,
odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU;
}
- if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) {
+ if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
if (top_pipe_to_program &&
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
if (should_use_dmub_lock(stream->link)) {
@@ -3247,7 +3255,6 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable(
top_pipe_to_program->stream_res.tg);
}
- }
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
if (dc->hwss.subvp_pipe_control_lock)
@@ -3466,7 +3473,7 @@ static void commit_planes_for_stream(struct dc *dc,
dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
}
- if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) {
+ if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
top_pipe_to_program->stream_res.tg->funcs->wait_for_state(
top_pipe_to_program->stream_res.tg,
@@ -3493,21 +3500,19 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_disable(
top_pipe_to_program->stream_res.tg);
}
- }
- if (update_type != UPDATE_TYPE_FAST) {
+ if (update_type != UPDATE_TYPE_FAST)
dc->hwss.post_unlock_program_front_end(dc, context);
- /* Since phantom pipe programming is moved to post_unlock_program_front_end,
- * move the SubVP lock to after the phantom pipes have been setup
- */
- if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
- if (dc->hwss.subvp_pipe_control_lock)
- dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
- } else {
- if (dc->hwss.subvp_pipe_control_lock)
- dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
- }
+ /* Since phantom pipe programming is moved to post_unlock_program_front_end,
+ * move the SubVP lock to after the phantom pipes have been setup
+ */
+ if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
+ } else {
+ if (dc->hwss.subvp_pipe_control_lock)
+ dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
}
// Fire manual trigger only when bottom plane is flipped
@@ -4292,7 +4297,7 @@ bool dc_is_dmub_outbox_supported(struct dc *dc)
!dc->debug.dpia_debug.bits.disable_dpia)
return true;
- if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2 &&
+ if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 &&
!dc->debug.dpia_debug.bits.disable_dpia)
return true;
@@ -4340,6 +4345,7 @@ void dc_enable_dmub_outbox(struct dc *dc)
struct dc_context *dc_ctx = dc->ctx;
dmub_enable_outbox_notification(dc_ctx->dmub_srv);
+ DC_LOG_DC("%s: dmub outbox notifications enabled\n", __func__);
}
/**
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 9e51338441d0..66d2ae7aacf5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3372,7 +3372,7 @@ bool dc_link_setup_psr(struct dc_link *link,
switch(link->ctx->asic_id.chip_family) {
case FAMILY_YELLOW_CARP:
case AMDGPU_FAMILY_GC_10_3_6:
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
if(!dc->debug.disable_z10)
psr_context->psr_level.bits.SKIP_CRTC_DISABLE = false;
break;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index ffc0f1c0ea93..7dbab15bfa68 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -169,7 +169,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev))
dc_version = DCN_VERSION_3_21;
break;
- case AMDGPU_FAMILY_GC_11_0_2:
+ case AMDGPU_FAMILY_GC_11_0_1:
dc_version = DCN_VERSION_3_14;
break;
default:
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index f62d50901d92..0c85ab5933b4 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -329,7 +329,7 @@ bool dc_stream_set_cursor_attributes(
dc = stream->ctx->dc;
- if (attributes->height * attributes->width * 4 > 16384)
+ if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384)
if (stream->mall_stream_config.type == SUBVP_MAIN)
return false;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 8e1e40083ec8..dbf8158b832e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.196"
+#define DC_VER "3.2.198"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@@ -213,6 +213,7 @@ struct dc_caps {
uint32_t cache_num_ways;
uint16_t subvp_fw_processing_delay_us;
uint16_t subvp_prefetch_end_to_mall_start_us;
+ uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height
uint16_t subvp_pstate_allow_width_us;
uint16_t subvp_vertical_int_margin_us;
bool seamless_odm;
@@ -352,6 +353,7 @@ struct dc_config {
bool use_pipe_ctx_sync_logic;
bool ignore_dpref_ss;
bool enable_mipi_converter_optimization;
+ bool use_default_clock_table;
};
enum visual_confirm {
@@ -609,6 +611,7 @@ struct dc_bounding_box_overrides {
int percent_of_ideal_drambw;
int dram_clock_change_latency_ns;
int dummy_clock_change_latency_ns;
+ int fclk_clock_change_latency_ns;
/* This forces a hard min on the DCFCLK we use
* for DML. Unlike the debug option for forcing
* DCFCLK, this override affects watermark calculations
@@ -742,6 +745,7 @@ struct dc_debug_options {
bool disable_fixed_vs_aux_timeout_wa;
bool force_disable_subvp;
bool force_subvp_mclk_switch;
+ bool allow_sw_cursor_fallback;
bool force_usr_allow;
/* uses value at boot and disables switch */
bool disable_dtb_ref_clk_switch;
@@ -751,6 +755,7 @@ struct dc_debug_options {
uint32_t mst_start_top_delay;
uint8_t psr_power_use_phy_fsm;
enum dml_hostvm_override_opts dml_hostvm_override;
+ bool dml_disallow_alternate_prefetch_modes;
bool use_legacy_soc_bb_mechanism;
bool exit_idle_opt_for_cursor_updates;
bool enable_single_display_2to1_odm_policy;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 2d61c2a91cee..52a61b3e5a8b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -29,6 +29,7 @@
#include "dm_helpers.h"
#include "dc_hw_types.h"
#include "core_types.h"
+#include "../basics/conversion.h"
#define CTX dc_dmub_srv->ctx
#define DC_LOGGER CTX->logger
@@ -275,8 +276,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
union dmub_rb_cmd cmd = { 0 };
cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
- // TODO: Uncomment once FW headers are promoted
- //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
+ cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst;
cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
@@ -417,44 +417,42 @@ static void populate_subvp_cmd_drr_info(struct dc *dc,
struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing;
- int16_t drr_frame_us = 0;
- int16_t min_drr_supported_us = 0;
- int16_t max_drr_supported_us = 0;
- int16_t max_drr_vblank_us = 0;
- int16_t max_drr_mallregion_us = 0;
- int16_t mall_region_us = 0;
- int16_t prefetch_us = 0;
- int16_t subvp_active_us = 0;
- int16_t drr_active_us = 0;
- int16_t min_vtotal_supported = 0;
- int16_t max_vtotal_supported = 0;
+ uint16_t drr_frame_us = 0;
+ uint16_t min_drr_supported_us = 0;
+ uint16_t max_drr_supported_us = 0;
+ uint16_t max_drr_vblank_us = 0;
+ uint16_t max_drr_mallregion_us = 0;
+ uint16_t mall_region_us = 0;
+ uint16_t prefetch_us = 0;
+ uint16_t subvp_active_us = 0;
+ uint16_t drr_active_us = 0;
+ uint16_t min_vtotal_supported = 0;
+ uint16_t max_vtotal_supported = 0;
pipe_data->pipe_config.vblank_data.drr_info.drr_in_use = true;
pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for now don't use ramping
pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // hardcode 4ms DRR window for now
- drr_frame_us = div64_s64(drr_timing->v_total * drr_timing->h_total,
- (int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000);
+ drr_frame_us = div64_u64(((uint64_t)drr_timing->v_total * drr_timing->h_total * 1000000),
+ (((uint64_t)drr_timing->pix_clk_100hz * 100)));
// P-State allow width and FW delays already included phantom_timing->v_addressable
- mall_region_us = div64_s64(phantom_timing->v_addressable * phantom_timing->h_total,
- (int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000);
+ mall_region_us = div64_u64(((uint64_t)phantom_timing->v_addressable * phantom_timing->h_total * 1000000),
+ (((uint64_t)phantom_timing->pix_clk_100hz * 100)));
min_drr_supported_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
- min_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 *
- (div64_s64((int64_t)min_drr_supported_us, 1000000)),
- (int64_t)drr_timing->h_total);
-
- prefetch_us = div64_s64((phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total,
- (int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
- dc->caps.subvp_prefetch_end_to_mall_start_us);
- subvp_active_us = div64_s64(main_timing->v_addressable * main_timing->h_total,
- (int64_t)(main_timing->pix_clk_100hz * 100) * 1000000);
- drr_active_us = div64_s64(drr_timing->v_addressable * drr_timing->h_total,
- (int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000);
- max_drr_vblank_us = div64_s64((int64_t)(subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us;
+ min_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * min_drr_supported_us),
+ (((uint64_t)drr_timing->h_total * 1000000)));
+
+ prefetch_us = div64_u64(((uint64_t)(phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total * 1000000),
+ (((uint64_t)phantom_timing->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
+ subvp_active_us = div64_u64(((uint64_t)main_timing->v_addressable * main_timing->h_total * 1000000),
+ (((uint64_t)main_timing->pix_clk_100hz * 100)));
+ drr_active_us = div64_u64(((uint64_t)drr_timing->v_addressable * drr_timing->h_total * 1000000),
+ (((uint64_t)drr_timing->pix_clk_100hz * 100)));
+ max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us;
max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us;
max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us;
- max_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * (div64_s64((int64_t)max_drr_supported_us, 1000000)),
- (int64_t)drr_timing->h_total);
+ max_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * max_drr_supported_us),
+ (((uint64_t)drr_timing->h_total * 1000000)));
pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported;
pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported;
@@ -548,10 +546,12 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing;
struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
- subvp0_prefetch_us = div64_s64((phantom_timing0->v_total - phantom_timing0->v_front_porch) * phantom_timing0->h_total,
- (int64_t)(phantom_timing0->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us);
- subvp1_prefetch_us = div64_s64((phantom_timing1->v_total - phantom_timing1->v_front_porch) * phantom_timing1->h_total,
- (int64_t)(phantom_timing1->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us);
+ subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
+ (uint64_t)phantom_timing0->h_total * 1000000),
+ (((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
+ subvp1_prefetch_us = div64_u64(((uint64_t)(phantom_timing1->v_total - phantom_timing1->v_front_porch) *
+ (uint64_t)phantom_timing1->h_total * 1000000),
+ (((uint64_t)phantom_timing1->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
// Whichever SubVP PIPE has the smaller prefetch (including the prefetch end to mall start time)
// should increase it's prefetch time to match the other
@@ -559,16 +559,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[1];
prefetch_delta_us = subvp0_prefetch_us - subvp1_prefetch_us;
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
- div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) *
- (phantom_timing1->pix_clk_100hz * 100) + phantom_timing1->h_total - 1),
- (int64_t)phantom_timing1->h_total);
+ div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) *
+ ((uint64_t)phantom_timing1->pix_clk_100hz * 100) + ((uint64_t)phantom_timing1->h_total * 1000000 - 1)),
+ ((uint64_t)phantom_timing1->h_total * 1000000));
+
} else if (subvp1_prefetch_us > subvp0_prefetch_us) {
pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[0];
prefetch_delta_us = subvp1_prefetch_us - subvp0_prefetch_us;
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
- div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) *
- (phantom_timing0->pix_clk_100hz * 100) + phantom_timing0->h_total - 1),
- (int64_t)phantom_timing0->h_total);
+ div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) *
+ ((uint64_t)phantom_timing0->pix_clk_100hz * 100) + ((uint64_t)phantom_timing0->h_total * 1000000 - 1)),
+ ((uint64_t)phantom_timing0->h_total * 1000000));
}
}
@@ -601,6 +602,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
&cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+ uint32_t out_num, out_den;
pipe_data->mode = SUBVP;
pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz;
@@ -612,6 +614,16 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable;
pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable;
pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx;
+ pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param;
+
+ /* Calculate the scaling factor from the src and dst height.
+ * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2.
+ * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor"
+ */
+ reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height, &out_num, &out_den);
+ // TODO: Uncomment below lines once DMCUB include headers are promoted
+ //pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num;
+ //pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den;
// Prefetch lines is equal to VACTIVE + BP + VSYNC
pipe_data->pipe_config.subvp_data.prefetch_lines =
@@ -619,13 +631,11 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
// Round up
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
- div64_s64(((div64_s64((int64_t)dc->caps.subvp_prefetch_end_to_mall_start_us, 1000000)) *
- (phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1),
- (int64_t)phantom_timing->h_total);
+ div64_u64(((uint64_t)dc->caps.subvp_prefetch_end_to_mall_start_us * ((uint64_t)phantom_timing->pix_clk_100hz * 100) +
+ ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000));
pipe_data->pipe_config.subvp_data.processing_delay_lines =
- div64_s64(((div64_s64((int64_t)dc->caps.subvp_fw_processing_delay_us, 1000000)) *
- (phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1),
- (int64_t)phantom_timing->h_total);
+ div64_u64(((uint64_t)(dc->caps.subvp_fw_processing_delay_us) * ((uint64_t)phantom_timing->pix_clk_100hz * 100) +
+ ((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000));
// Find phantom pipe index based on phantom stream
for (j = 0; j < dc->res_pool->pipe_count; j++) {
struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index a0af0f6afeef..9544abf75e84 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -344,6 +344,7 @@ enum dc_detect_reason {
DETECT_REASON_HPDRX,
DETECT_REASON_FALLBACK,
DETECT_REASON_RETRAIN,
+ DETECT_REASON_TDR,
};
bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 213de8cabfad..165392380842 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -543,9 +543,11 @@ static void dce112_get_pix_clk_dividers_helper (
switch (pix_clk_params->color_depth) {
case COLOR_DEPTH_101010:
actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2;
+ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
break;
case COLOR_DEPTH_121212:
actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2;
+ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
break;
case COLOR_DEPTH_161616:
actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index d4a6504dfe00..db7ca4b0cdb9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -361,8 +361,6 @@ void dpp1_cnv_setup (
select = INPUT_CSC_SELECT_ICSC;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index b54c12400323..564e061ccb58 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -278,9 +278,6 @@ void hubp1_program_pixel_format(
SURFACE_PIXEL_FORMAT, 10);
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- REG_UPDATE(DCSURF_SURFACE_CONFIG,
- SURFACE_PIXEL_FORMAT, 22);
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
REG_UPDATE(DCSURF_SURFACE_CONFIG,
SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index bed783747f16..5b5d952b2b8c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -110,6 +110,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
*/
if (pipe_ctx->top_pipe ||
!pipe_ctx->stream ||
+ !pipe_ctx->plane_state ||
!tg->funcs->is_tg_enabled(tg))
continue;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 769974375b4b..8e9384094f6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -131,6 +131,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
while (tmp_mpcc != NULL) {
if (tmp_mpcc->dpp_id == dpp_id)
return tmp_mpcc;
+
+ /* avoid circular linked list */
+ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
+ if (tmp_mpcc == tmp_mpcc->mpcc_bot)
+ break;
+
tmp_mpcc = tmp_mpcc->mpcc_bot;
}
return NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index e1a9a45b03b6..3fc300cd1ce9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -465,6 +465,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable)
OTG_CLOCK_ON, 1,
1, 1000);
} else {
+
+ //last chance to clear underflow, otherwise, it will always there due to clock is off.
+ if (optc->funcs->is_optc_underflow_occurred(optc) == true)
+ optc->funcs->clear_optc_underflow(optc);
+
REG_UPDATE_2(OTG_CLOCK_CONTROL,
OTG_CLOCK_GATE_DIS, 0,
OTG_CLOCK_EN, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
index ea1f14af0db7..eaa7032f0f1a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
@@ -166,8 +166,6 @@ static void dpp2_cnv_setup (
select = DCN2_ICSC_SELECT_ICSC_A;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 936af65381ef..9570c2118ccc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -463,9 +463,6 @@ void hubp2_program_pixel_format(
SURFACE_PIXEL_FORMAT, 10);
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- REG_UPDATE(DCSURF_SURFACE_CONFIG,
- SURFACE_PIXEL_FORMAT, 22);
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
REG_UPDATE(DCSURF_SURFACE_CONFIG,
SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
index 3d307dd58e9a..116f67a0b989 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
@@ -531,6 +531,12 @@ static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
while (tmp_mpcc != NULL) {
if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id)
return tmp_mpcc;
+
+ /* avoid circular linked list */
+ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
+ if (tmp_mpcc == tmp_mpcc->mpcc_bot)
+ break;
+
tmp_mpcc = tmp_mpcc->mpcc_bot;
}
return NULL;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
index c5e200d09038..5752271f22df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
@@ -67,9 +67,15 @@ static uint32_t convert_and_clamp(
void dcn21_dchvm_init(struct hubbub *hubbub)
{
struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
- uint32_t riommu_active;
+ uint32_t riommu_active, prefetch_done;
int i;
+ REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done);
+
+ if (prefetch_done) {
+ hubbub->riommu_active = true;
+ return;
+ }
//Init DCHVM block
REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
index 77b00f86c216..4a668d6563df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
@@ -244,8 +244,6 @@ void dpp3_cnv_setup (
select = INPUT_CSC_SELECT_ICSC;
break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- pixel_format = 22;
- break;
case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
pixel_format = 26; /* ARGB16161616_UNORM */
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
index 6a4dcafb9bba..dc3e8df706b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
@@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr(
VMID, address->vmid);
if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1);
+ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0);
REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 0a67f8a5656d..d97076648acb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -372,7 +372,7 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id
int afmt_inst;
/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGE) {
+ if (eng_id <= ENGINE_ID_DIGB) {
vpg_inst = eng_id;
afmt_inst = eng_id;
} else
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
index a788d160953b..ab70ebd8f223 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
@@ -104,6 +104,9 @@ static bool has_query_dp_alt(struct link_encoder *enc)
{
struct dc_dmub_srv *dc_dmub_srv = enc->ctx->dmub_srv;
+ if (enc->ctx->dce_version >= DCN_VERSION_3_15)
+ return true;
+
/* Supports development firmware and firmware >= 4.0.11 */
return dc_dmub_srv &&
!(dc_dmub_srv->dmub->fw_version >= DMUB_FW_VERSION(4, 0, 0) &&
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
index 7c77c71591a0..82c3b3ac1f0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
@@ -162,7 +162,8 @@
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, AIP_ENABLE, mask_sh),\
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, ACM_ENABLE, mask_sh),\
SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_ENABLE, mask_sh),\
- SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh)
+ SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh),\
+ SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_HBLANK_CONTROL, HBLANK_MINIMUM_SYMBOL_WIDTH, mask_sh)
#define DCN3_1_HPO_DP_STREAM_ENC_REG_FIELD_LIST(type) \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 468a893ff785..aedff18aff56 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -2153,7 +2153,7 @@ static bool dcn31_resource_construct(
pool->base.usb4_dpia_count = 4;
}
- if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2)
+ if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1)
pool->base.usb4_dpia_count = 4;
/* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
index 41f8ec99da6b..901436591ed4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn31_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_1_ip;
-extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc;
struct dcn31_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
index e3b5a95e03b1..702c28c2560e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
@@ -13,31 +13,6 @@
DCN314 = dcn314_resource.o dcn314_hwseq.o dcn314_init.o \
dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -msse2
-endif
-endif
-
AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
AMD_DISPLAY_FILES += $(AMD_DAL_DCN314)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
index b384f30395d3..06d8638db696 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
@@ -67,8 +67,7 @@ static void enc314_disable_fifo(struct stream_encoder *enc)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
- REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0,
- DIG_FIFO_READ_START_LEVEL, 0);
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
}
static void enc314_dp_set_odm_combine(
@@ -317,6 +316,7 @@ static void enc314_stream_encoder_dp_unblank(
/* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
* that it overflows during mode transition, and sometimes doesn't recover.
*/
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
udelay(10);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
index 755c715ad8dc..39931d48f385 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
@@ -343,7 +343,10 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
{
struct dc_stream_state *stream = pipe_ctx->stream;
unsigned int odm_combine_factor = 0;
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+ bool two_pix_per_container = false;
+ two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
odm_combine_factor = get_odm_config(pipe_ctx, NULL);
if (is_dp_128b_132b_signal(pipe_ctx)) {
@@ -355,16 +358,13 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
else
*k2_div = PIXEL_RATE_DIV_BY_4;
} else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+ if (two_pix_per_container) {
*k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_2;
- } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) {
- *k1_div = PIXEL_RATE_DIV_BY_2;
- *k2_div = PIXEL_RATE_DIV_BY_2;
} else {
- if (odm_combine_factor == 1)
- *k2_div = PIXEL_RATE_DIV_BY_4;
- else if (odm_combine_factor == 2)
+ *k1_div = PIXEL_RATE_DIV_BY_1;
+ *k2_div = PIXEL_RATE_DIV_BY_4;
+ if ((odm_combine_factor == 2) || dc->debug.enable_dp_dig_pixel_rate_div_policy)
*k2_div = PIXEL_RATE_DIV_BY_2;
}
}
@@ -374,3 +374,31 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
return odm_combine_factor;
}
+
+void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx)
+{
+ uint32_t pix_per_cycle = 1;
+ uint32_t odm_combine_factor = 1;
+
+ if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc)
+ return;
+
+ odm_combine_factor = get_odm_config(pipe_ctx, NULL);
+ if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1
+ || dcn314_is_dp_dig_pixel_rate_div_policy(pipe_ctx))
+ pix_per_cycle = 2;
+
+ if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode)
+ pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
+ pix_per_cycle);
+}
+
+bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx)
+{
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal) && !is_dp_128b_132b_signal(pipe_ctx) &&
+ dc->debug.enable_dp_dig_pixel_rate_div_policy)
+ return true;
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
index be0f5e4d48e1..d014580592ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
@@ -39,4 +39,8 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div);
+void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx);
+
+bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
+
#endif /* __DC_HWSS_DCN314_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
index b9debeb081fd..fcf67eb3478f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
@@ -145,6 +145,8 @@ static const struct hwseq_private_funcs dcn314_private_funcs = {
.set_shaper_3dlut = dcn20_set_shaper_3dlut,
.setup_hpo_hw_control = dcn31_setup_hpo_hw_control,
.calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values,
+ .set_pixels_per_cycle = dcn314_set_pixels_per_cycle,
+ .is_dp_dig_pixel_rate_div_policy = dcn314_is_dp_dig_pixel_rate_div_policy,
};
void dcn314_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
index 0c7980266b85..38aa28ec6b13 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
@@ -98,7 +98,8 @@ static void optc314_set_odm_combine(struct timing_generator *optc, int *opp_id,
REG_UPDATE(OPTC_WIDTH_CONTROL,
OPTC_SEGMENT_WIDTH, mpcc_hactive);
- REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1);
+ REG_UPDATE(OTG_H_TIMING_CNTL,
+ OTG_H_TIMING_DIV_MODE, opp_cnt - 1);
optc1->opp_count = opp_cnt;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
index 63861cdfb09f..2a2a4a9cc117 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
@@ -70,6 +70,7 @@
#include "dce110/dce110_resource.h"
#include "dml/display_mode_vba.h"
#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dcn314/dcn314_fpu.h"
#include "dcn314/dcn314_dccg.h"
#include "dcn10/dcn10_resource.h"
#include "dcn31/dcn31_panel_cntl.h"
@@ -132,155 +133,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C
#define DC_LOGGER_INIT(logger)
-#define DCN3_14_DEFAULT_DET_SIZE 384
-#define DCN3_14_MAX_DET_SIZE 384
-#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
-#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
-struct _vcs_dpi_ip_params_st dcn3_14_ip = {
- .VBlankNomDefaultUS = 668,
- .gpuvm_enable = 1,
- .gpuvm_max_page_table_levels = 1,
- .hostvm_enable = 1,
- .hostvm_max_page_table_levels = 2,
- .rob_buffer_size_kbytes = 64,
- .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
- .config_return_buffer_size_in_kbytes = 1792,
- .compressed_buffer_segment_size_in_kbytes = 64,
- .meta_fifo_size_in_kentries = 32,
- .zero_size_buffer_entries = 512,
- .compbuf_reserved_space_64b = 256,
- .compbuf_reserved_space_zs = 64,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .meta_chunk_size_kbytes = 2,
- .min_meta_chunk_size_bytes = 256,
- .writeback_chunk_size_kbytes = 8,
- .ptoi_supported = false,
- .num_dsc = 4,
- .maximum_dsc_bits_per_component = 10,
- .dsc422_native_support = false,
- .is_line_buffer_bpp_fixed = true,
- .line_buffer_fixed_bpp = 48,
- .line_buffer_size_bits = 789504,
- .max_line_buffer_lines = 12,
- .writeback_interface_buffer_size_kbytes = 90,
- .max_num_dpp = 4,
- .max_num_otg = 4,
- .max_num_hdmi_frl_outputs = 1,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dpte_buffer_size_in_pte_reqs_luma = 64,
- .dpte_buffer_size_in_pte_reqs_chroma = 34,
- .dispclk_ramp_margin_percent = 1,
- .max_inter_dcn_tile_repeaters = 8,
- .cursor_buffer_size = 16,
- .cursor_chunk_size = 2,
- .writeback_line_buffer_buffer_size = 0,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .dppclk_delay_subtotal = 46,
- .dppclk_delay_scl = 50,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dynamic_metadata_vm_enabled = false,
- .odm_combine_4to1_supported = false,
- .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
- /*TODO: correct dispclk/dppclk voltage level determination*/
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 600.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 186.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 1,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 2,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 3,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 371.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 4,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 625.0,
- },
- },
- .num_states = 5,
- .sr_exit_time_us = 9.0,
- .sr_enter_plus_exit_time_us = 11.0,
- .sr_exit_z8_time_us = 442.0,
- .sr_enter_plus_exit_z8_time_us = 560.0,
- .writeback_latency_us = 12.0,
- .dram_channel_width_bytes = 4,
- .round_trip_ping_latency_dcfclk_cycles = 106,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_sdp_bw_after_urgent = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 60.0,
- .fabric_datapath_to_dcn_data_return_bytes = 32,
- .return_bus_width_bytes = 64,
- .downspread_percent = 0.38,
- .dcn_downspread_percent = 0.5,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = false,
- .urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
-};
-
enum dcn31_clk_src_array_id {
DCN31_CLK_SRC_PLL0,
DCN31_CLK_SRC_PLL1,
@@ -602,6 +454,7 @@ static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs
hpo_dp_stream_encoder_reg_list(0),
hpo_dp_stream_encoder_reg_list(1),
hpo_dp_stream_encoder_reg_list(2),
+ hpo_dp_stream_encoder_reg_list(3)
};
static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = {
@@ -1402,7 +1255,7 @@ static struct stream_encoder *dcn314_stream_encoder_create(
int afmt_inst;
/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
+ if (eng_id < ENGINE_ID_DIGF) {
vpg_inst = eng_id;
afmt_inst = eng_id;
} else
@@ -1447,7 +1300,8 @@ static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create(
* VPG[8] -> HPO_DP[2]
* VPG[9] -> HPO_DP[3]
*/
- vpg_inst = hpo_dp_inst + 6;
+ //Uses offset index 5-8, but actually maps to vpg_inst 6-9
+ vpg_inst = hpo_dp_inst + 5;
/* Mapping of APG register blocks to HPO DP block instance:
* APG[0] -> HPO_DP[0]
@@ -1793,109 +1647,16 @@ static struct clock_source *dcn31_clock_source_create(
return NULL;
}
-static bool is_dual_plane(enum surface_pixel_format format)
-{
- return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
-}
-
static int dcn314_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
bool fast_validate)
{
- int i, pipe_cnt;
- struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
- bool upscaled = false;
-
- dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- struct dc_crtc_timing *timing;
-
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
- pipe = &res_ctx->pipe_ctx[i];
- timing = &pipe->stream->timing;
-
- if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
- && pipe->stream->adjust.v_total_min > timing->v_total)
- pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
-
- if (pipe->plane_state &&
- (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
- pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
- upscaled = true;
-
- /*
- * Immediate flip can be set dynamically after enabling the plane.
- * We need to require support for immediate flip or underflow can be
- * intermittently experienced depending on peak b/w requirements.
- */
- pipes[pipe_cnt].pipe.src.immediate_flip = true;
-
- pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
- pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
- pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
- pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
- pipes[pipe_cnt].pipe.src.dcc_rate = 3;
- pipes[pipe_cnt].dout.dsc_input_bpc = 0;
-
- if (pipes[pipe_cnt].dout.dsc_enable) {
- switch (timing->display_color_depth) {
- case COLOR_DEPTH_888:
- pipes[pipe_cnt].dout.dsc_input_bpc = 8;
- break;
- case COLOR_DEPTH_101010:
- pipes[pipe_cnt].dout.dsc_input_bpc = 10;
- break;
- case COLOR_DEPTH_121212:
- pipes[pipe_cnt].dout.dsc_input_bpc = 12;
- break;
- default:
- ASSERT(0);
- break;
- }
- }
-
- pipe_cnt++;
- }
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
-
- dc->config.enable_4to1MPC = false;
- if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
- if (is_dual_plane(pipe->plane_state->format)
- && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
- dc->config.enable_4to1MPC = true;
- } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
- /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
- pipes[0].pipe.src.unbounded_req_mode = true;
- }
- } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
- && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
- } else if (context->stream_count >= 3 && upscaled) {
- context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe->stream)
- continue;
+ int pipe_cnt;
- if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
- pipe->stream->apply_seamless_boot_optimization) {
-
- if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
- context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
- break;
- }
- }
- }
+ DC_FP_START();
+ pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate);
+ DC_FP_END();
return pipe_cnt;
}
@@ -1906,88 +1667,9 @@ static struct dc_cap_funcs cap_funcs = {
static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
- struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st *clock_tmp = dcn3_14_soc._clock_tmp;
- unsigned int i, closest_clk_lvl;
- int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
- int j;
-
- // Default clock levels are used for diags, which may lead to overclocking.
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
-
- dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
- dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
-
- if (bw_params->num_channels > 0)
- dcn3_14_soc.num_chans = bw_params->num_channels;
-
- ASSERT(dcn3_14_soc.num_chans);
- ASSERT(clk_table->num_entries);
-
- /* Prepass to find max clocks independent of voltage level. */
- for (i = 0; i < clk_table->num_entries; ++i) {
- if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
- if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
- }
-
- for (i = 0; i < clk_table->num_entries; i++) {
- /* loop backwards*/
- for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
- if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
- closest_clk_lvl = j;
- break;
- }
- }
- if (clk_table->num_entries == 1) {
- /*smu gives one DPM level, let's take the highest one*/
- closest_clk_lvl = dcn3_14_soc.num_states - 1;
- }
-
- clock_tmp[i].state = i;
-
- /* Clocks dependent on voltage level. */
- clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- if (clk_table->num_entries == 1 &&
- clock_tmp[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
- /*SMU fix not released yet*/
- clock_tmp[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
- }
- clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
-
- if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
- clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
-
- /* Clocks independent of voltage level. */
- clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
- dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-
- clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
- dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-
- clock_tmp[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_tmp[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_tmp[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_tmp[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_tmp[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
- }
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_14_soc.clock_limits[i] = clock_tmp[i];
- if (clk_table->num_entries)
- dcn3_14_soc.num_states = clk_table->num_entries;
- }
-
- if (max_dispclk_mhz) {
- dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- }
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31);
- else
- dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+ DC_FP_START();
+ dcn314_update_bw_bounding_box_fpu(dc, bw_params);
+ DC_FP_END();
}
static struct resource_funcs dcn314_res_pool_funcs = {
@@ -2069,6 +1751,7 @@ static bool dcn314_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.dp_hpo = true;
+ dc->caps.dp_hdmi21_pcon_support = true;
dc->caps.edp_dsc_support = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
index c41108847ce0..0dd3153aa5c1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
@@ -29,6 +29,9 @@
#include "core_types.h"
+extern struct _vcs_dpi_ip_params_st dcn3_14_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc;
+
#define TO_DCN314_RES_POOL(pool)\
container_of(pool, struct dcn314_resource_pool, base)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
index 39929fa67a51..22849eaa6f24 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn315_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_15_ip;
-extern struct _vcs_dpi_ip_params_st dcn3_15_soc;
struct dcn315_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
index 0dc5a6c13ae7..aba6d634131b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
@@ -32,7 +32,6 @@
container_of(pool, struct dcn316_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_16_ip;
-extern struct _vcs_dpi_ip_params_st dcn3_16_soc;
struct dcn316_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
index a31c64b50410..0d5e8a441512 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
@@ -225,19 +225,19 @@ void dccg32_set_dpstreamclk(
case 0:
REG_UPDATE_2(DPSTREAMCLK_CNTL,
DPSTREAMCLK0_EN,
- (src == REFCLK) ? 0 : 1, DPSTREAMCLK0_SRC_SEL, 0);
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK0_SRC_SEL, otg_inst);
break;
case 1:
REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN,
- (src == REFCLK) ? 0 : 1, DPSTREAMCLK1_SRC_SEL, 1);
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK1_SRC_SEL, otg_inst);
break;
case 2:
REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN,
- (src == REFCLK) ? 0 : 1, DPSTREAMCLK2_SRC_SEL, 2);
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK2_SRC_SEL, otg_inst);
break;
case 3:
REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN,
- (src == REFCLK) ? 0 : 1, DPSTREAMCLK3_SRC_SEL, 3);
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK3_SRC_SEL, otg_inst);
break;
default:
BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
index 26648ce772da..38a48983f663 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
@@ -310,6 +310,11 @@ static void enc32_stream_encoder_dp_unblank(
// TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON
REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000);
+ /* read start level = 0 will bring underflow / overflow and DIG_FIFO_ERROR = 1
+ * so set it to 1/2 full = 7 before reset as suggested by hardware team.
+ */
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+
REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1);
REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c
index 6ec1c52535b9..2038cbda33f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c
@@ -103,6 +103,11 @@ void hubp32_cursor_set_attributes(
enum cursor_lines_per_chunk lpc = hubp2_get_lines_per_chunk(
attr->width, attr->color_format);
+ //Round cursor width up to next multiple of 64
+ uint32_t cursor_width = ((attr->width + 63) / 64) * 64;
+ uint32_t cursor_height = attr->height;
+ uint32_t cursor_size = cursor_width * cursor_height;
+
hubp->curs_attr = *attr;
REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH,
@@ -126,7 +131,24 @@ void hubp32_cursor_set_attributes(
/* used to shift the cursor chunk request deadline */
CURSOR0_CHUNK_HDL_ADJUST, 3);
- if (attr->width * attr->height * 4 > 16384)
+ switch (attr->color_format) {
+ case CURSOR_MODE_MONO:
+ cursor_size /= 2;
+ break;
+ case CURSOR_MODE_COLOR_1BIT_AND:
+ case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
+ case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
+ cursor_size *= 4;
+ break;
+
+ case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
+ case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
+ default:
+ cursor_size *= 8;
+ break;
+ }
+
+ if (cursor_size > 16384)
REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true);
else
REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index d38341f68b17..344fe7535df5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -250,6 +250,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
uint32_t total_lines = 0;
uint32_t lines_per_way = 0;
uint32_t num_ways = 0;
+ uint32_t prev_addr_low = 0;
for (i = 0; i < ctx->stream_count; i++) {
stream = ctx->streams[i];
@@ -267,10 +268,20 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
plane = ctx->stream_status[i].plane_states[j];
// Calculate total surface size
- surface_size = plane->plane_size.surface_pitch *
+ if (prev_addr_low != plane->address.grph.addr.u.low_part) {
+ /* if plane address are different from prev FB, then userspace allocated separate FBs*/
+ surface_size += plane->plane_size.surface_pitch *
plane->plane_size.surface_size.height *
(plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
+ prev_addr_low = plane->address.grph.addr.u.low_part;
+ } else {
+ /* We have the same fb for all the planes.
+ * Xorg always creates one giant fb that holds all surfaces,
+ * so allocating it once is sufficient.
+ * */
+ continue;
+ }
// Convert surface size + starting address to number of cache lines required
// (alignment accounted for)
cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
@@ -284,24 +295,38 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
}
// Include cursor size for CAB allocation
- if (stream->cursor_position.enable && plane->address.grph.cursor_cache_addr.quad_part) {
- cursor_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size;
- switch (stream->cursor_attributes.color_format) {
- case CURSOR_MODE_MONO:
- cursor_size /= 2;
- break;
- case CURSOR_MODE_COLOR_1BIT_AND:
- case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
- case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
- cursor_size *= 4;
- break;
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[j];
+ struct hubp *hubp = pipe->plane_res.hubp;
- case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
- case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
- cursor_size *= 8;
- break;
- }
- cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
+ if (pipe->stream && pipe->plane_state && hubp)
+ /* Find the cursor plane and use the exact size instead of
+ * using the max for calculation
+ */
+ if (hubp->curs_attr.width > 0) {
+ cursor_size = hubp->curs_attr.width * hubp->curs_attr.height;
+ break;
+ }
+ }
+
+ switch (stream->cursor_attributes.color_format) {
+ case CURSOR_MODE_MONO:
+ cursor_size /= 2;
+ break;
+ case CURSOR_MODE_COLOR_1BIT_AND:
+ case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
+ case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
+ cursor_size *= 4;
+ break;
+
+ case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
+ case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
+ cursor_size *= 8;
+ break;
+ }
+
+ if (stream->cursor_position.enable && plane->address.grph.cursor_cache_addr.quad_part) {
+ cache_lines_used += dcn32_cache_lines_for_surface(dc, cursor_size,
plane->address.grph.cursor_cache_addr.quad_part);
}
}
@@ -314,13 +339,36 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
if (cache_lines_used % lines_per_way > 0)
num_ways++;
+ for (i = 0; i < ctx->stream_count; i++) {
+ stream = ctx->streams[i];
+ for (j = 0; j < ctx->stream_status[i].plane_count; j++) {
+ plane = ctx->stream_status[i].plane_states[j];
+
+ if (stream->cursor_position.enable && plane &&
+ !plane->address.grph.cursor_cache_addr.quad_part &&
+ cursor_size > 16384) {
+ /* Cursor caching is not supported since it won't be on the same line.
+ * So we need an extra line to accommodate it. With large cursors and a single 4k monitor
+ * this case triggers corruption. If we're at the edge, then dont trigger display refresh
+ * from MALL. We only need to cache cursor if its greater that 64x64 at 4 bpp.
+ */
+ num_ways++;
+ /* We only expect one cursor plane */
+ break;
+ }
+ }
+ }
+
return num_ways;
}
bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
{
union dmub_rb_cmd cmd;
- uint8_t ways;
+ uint8_t ways, i;
+ int j;
+ bool stereo_in_use = false;
+ struct dc_plane_state *plane = NULL;
if (!dc->ctx->dmub_srv)
return false;
@@ -349,7 +397,23 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
* and configure HUBP's to fetch from MALL
*/
ways = dcn32_calculate_cab_allocation(dc, dc->current_state);
- if (ways <= dc->caps.cache_num_ways) {
+
+ /* MALL not supported with Stereo3D. If any plane is using stereo,
+ * don't try to enter MALL.
+ */
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
+ plane = dc->current_state->stream_status[i].plane_states[j];
+
+ if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO) {
+ stereo_in_use = true;
+ break;
+ }
+ }
+ if (stereo_in_use)
+ break;
+ }
+ if (ways <= dc->caps.cache_num_ways && !stereo_in_use) {
memset(&cmd, 0, sizeof(cmd));
cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB;
@@ -677,15 +741,39 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
struct hubp *hubp = pipe->plane_res.hubp;
if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) {
- if (hubp->curs_attr.width * hubp->curs_attr.height * 4 > 16384)
+ //Round cursor width up to next multiple of 64
+ int cursor_width = ((hubp->curs_attr.width + 63) / 64) * 64;
+ int cursor_height = hubp->curs_attr.height;
+ int cursor_size = cursor_width * cursor_height;
+
+ switch (hubp->curs_attr.color_format) {
+ case CURSOR_MODE_MONO:
+ cursor_size /= 2;
+ break;
+ case CURSOR_MODE_COLOR_1BIT_AND:
+ case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
+ case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
+ cursor_size *= 4;
+ break;
+
+ case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
+ case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
+ default:
+ cursor_size *= 8;
+ break;
+ }
+
+ if (cursor_size > 16384)
cache_cursor = true;
if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
} else {
+ // MALL not supported with Stereo3D
hubp->funcs->hubp_update_mall_sel(hubp,
num_ways <= dc->caps.cache_num_ways &&
- pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0,
+ pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED &&
+ pipe->plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO ? 2 : 0,
cache_cursor);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
index eff1f4e17689..1fad7b48bd5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
@@ -281,7 +281,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
.lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
.lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
.enable_optc_clock = optc1_enable_optc_clock,
- .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted
+ .set_drr = optc32_set_drr,
.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
.set_vtotal_min_max = optc3_set_vtotal_min_max,
.set_static_screen_control = optc1_set_static_screen_control,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 9a26d24b579f..c3b783cea8a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -867,10 +867,11 @@ static const struct dc_debug_options debug_defaults_drv = {
}
},
.use_max_lb = true,
- .force_disable_subvp = true,
+ .force_disable_subvp = false,
.exit_idle_opt_for_cursor_updates = true,
.enable_single_display_2to1_odm_policy = true,
.enable_dp_dig_pixel_rate_div_policy = 1,
+ .allow_sw_cursor_fallback = false,
};
static const struct dc_debug_options debug_defaults_diags = {
@@ -2039,7 +2040,8 @@ static bool dcn32_resource_construct(
dc->caps.max_downscale_ratio = 600;
dc->caps.i2c_speed_in_khz = 100;
dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/
- dc->caps.max_cursor_size = 256;
+ /* TODO: Bring max_cursor_size back to 256 after subvp cursor corruption is fixed*/
+ dc->caps.max_cursor_size = 64;
dc->caps.min_horizontal_blanking_period = 80;
dc->caps.dmdata_alloc_size = 2048;
dc->caps.mall_size_per_mem_channel = 0;
@@ -2051,6 +2053,7 @@ static bool dcn32_resource_construct(
dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64
dc->caps.subvp_fw_processing_delay_us = 15;
dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+ dc->caps.subvp_swath_height_margin_lines = 16;
dc->caps.subvp_pstate_allow_width_us = 20;
dc->caps.subvp_vertical_int_margin_us = 30;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
index 1e7e6201c880..cf15d0e5e9b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
@@ -30,6 +30,9 @@
#define DCN3_2_DET_SEG_SIZE 64
#define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024
+#define DCN3_2_MBLK_WIDTH 128
+#define DCN3_2_MBLK_HEIGHT_4BPE 128
+#define DCN3_2_MBLK_HEIGHT_8BPE 64
#define TO_DCN32_RES_POOL(pool)\
container_of(pool, struct dcn32_resource_pool, base)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index b3f8503cea9c..1f195c5b3377 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -46,7 +46,6 @@
uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_state *context)
{
uint32_t num_ways = 0;
- uint32_t mall_region_pixels = 0;
uint32_t bytes_per_pixel = 0;
uint32_t cache_lines_used = 0;
uint32_t lines_per_way = 0;
@@ -54,20 +53,64 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
uint32_t bytes_in_mall = 0;
uint32_t num_mblks = 0;
uint32_t cache_lines_per_plane = 0;
- uint32_t i = 0;
+ uint32_t i = 0, j = 0;
+ uint32_t mblk_width = 0;
+ uint32_t mblk_height = 0;
+ uint32_t full_vp_width_blk_aligned = 0;
+ uint32_t full_vp_height_blk_aligned = 0;
+ uint32_t mall_alloc_width_blk_aligned = 0;
+ uint32_t mall_alloc_height_blk_aligned = 0;
+ uint32_t full_vp_height = 0;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
// Find the phantom pipes
- if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+ if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe &&
pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
- mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable;
+ struct pipe_ctx *main_pipe = NULL;
- // For bytes required in MALL, calculate based on number of MBlks required
- num_mblks = (mall_region_pixels * bytes_per_pixel +
- DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES;
+ /* Get full viewport height from main pipe (required for MBLK calculation) */
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ main_pipe = &context->res_ctx.pipe_ctx[j];
+ if (main_pipe->stream == pipe->stream->mall_stream_config.paired_stream) {
+ full_vp_height = main_pipe->plane_res.scl_data.viewport.height;
+ break;
+ }
+ }
+
+ bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
+ mblk_width = DCN3_2_MBLK_WIDTH;
+ mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE;
+
+ /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) -
+ * FLOOR(vp_x_start, blk_width)
+ */
+ full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
+ pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
+ (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
+
+ /* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
+ * FLOOR(vp_y_start, blk_height)
+ */
+ full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
+ full_vp_height + mblk_height - 1) / mblk_height * mblk_height) +
+ (pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
+
+ /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */
+ mall_alloc_width_blk_aligned = full_vp_width_blk_aligned;
+
+ /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */
+ mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) /
+ mblk_height * mblk_height + mblk_height;
+
+ /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c;
+ * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c;
+ * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c);
+ * (Should be divisible, but round up if not)
+ */
+ num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
+ ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
// cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
// (MALL is 64-byte aligned)
@@ -144,7 +187,7 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
if (!pipe->stream)
- continue;
+ return false;
if (!pipe->plane_state)
return false;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index 8157e40d2c7e..7309eed33a61 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -868,10 +868,11 @@ static const struct dc_debug_options debug_defaults_drv = {
}
},
.use_max_lb = true,
- .force_disable_subvp = true,
+ .force_disable_subvp = false,
.exit_idle_opt_for_cursor_updates = true,
.enable_single_display_2to1_odm_policy = true,
.enable_dp_dig_pixel_rate_div_policy = 1,
+ .allow_sw_cursor_fallback = false,
};
static const struct dc_debug_options debug_defaults_diags = {
@@ -1651,7 +1652,8 @@ static bool dcn321_resource_construct(
dc->caps.max_downscale_ratio = 600;
dc->caps.i2c_speed_in_khz = 100;
dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/
- dc->caps.max_cursor_size = 256;
+ /* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/
+ dc->caps.max_cursor_size = 64;
dc->caps.min_horizontal_blanking_period = 80;
dc->caps.dmdata_alloc_size = 2048;
dc->caps.mall_size_per_mem_channel = 0;
@@ -1662,8 +1664,9 @@ static bool dcn321_resource_construct(
dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32
dc->caps.subvp_fw_processing_delay_us = 15;
dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+ dc->caps.subvp_swath_height_margin_lines = 16;
dc->caps.subvp_pstate_allow_width_us = 20;
-
+ dc->caps.subvp_vertical_int_margin_us = 30;
dc->caps.max_slave_planes = 1;
dc->caps.max_slave_yuv_planes = 1;
dc->caps.max_slave_rgb_planes = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 359f6e9a1da0..cb81ed2fbd53 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -61,7 +61,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
@@ -71,6 +70,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag)
@@ -82,7 +84,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
@@ -124,6 +125,7 @@ DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o
DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o
DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o
DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o
+DML += dcn314/display_mode_vba_314.o dcn314/display_rq_dlg_calc_314.o
DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o
DML += dcn31/dcn31_fpu.o
DML += dcn32/dcn32_fpu.o
@@ -131,6 +133,7 @@ DML += dcn321/dcn321_fpu.o
DML += dcn301/dcn301_fpu.o
DML += dcn302/dcn302_fpu.o
DML += dcn303/dcn303_fpu.o
+DML += dcn314/dcn314_fpu.o
DML += dsc/rc_calc_fpu.o
DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index ca44df4fca74..d34e0f1314d9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -30,6 +30,7 @@
#include "dchubbub.h"
#include "dcn20/dcn20_resource.h"
#include "dcn21/dcn21_resource.h"
+#include "clk_mgr/dcn21/rn_clk_mgr.h"
#include "dcn20_fpu.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index 876b321b30ca..1cb858dd6ea0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -6610,8 +6610,7 @@ static double CalculateUrgentLatency(
return ret;
}
-
-static void UseMinimumDCFCLK(
+static noinline_for_stack void UseMinimumDCFCLK(
struct display_mode_lib *mode_lib,
int MaxInterDCNTileRepeaters,
int MaxPrefetchMode,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 7ef66e511ec8..d211cf6d234c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -26,6 +26,7 @@
#include "clk_mgr.h"
#include "dcn20/dcn20_resource.h"
#include "dcn301/dcn301_resource.h"
+#include "clk_mgr/dcn301/vg_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dcn301_fpu.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index e36cfa5985ea..149a1b17cdf3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -25,6 +25,9 @@
#include "resource.h"
#include "clk_mgr.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn315/dcn315_resource.h"
+#include "dcn316/dcn316_resource.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dcn31_fpu.h"
@@ -114,7 +117,7 @@ struct _vcs_dpi_ip_params_st dcn3_1_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
/*TODO: correct dispclk/dppclk voltage level determination*/
.clock_limits = {
{
@@ -259,7 +262,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
.sr_exit_time_us = 9.0,
.sr_enter_plus_exit_time_us = 11.0,
.sr_exit_z8_time_us = 50.0,
@@ -355,7 +358,7 @@ struct _vcs_dpi_ip_params_st dcn3_16_ip = {
.dcc_supported = true,
};
-struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
/*TODO: correct dispclk/dppclk voltage level determination*/
.clock_limits = {
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 3fab19134480..8ca66f1644dc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -26,7 +26,7 @@
#include "dc.h"
#include "dc_link.h"
#include "../display_mode_lib.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
#include "display_mode_vba_31.h"
#include "../dml_inline_defs.h"
@@ -251,33 +251,13 @@ static void CalculateRowBandwidth(
static void CalculateFlipSchedule(
struct display_mode_lib *mode_lib,
+ unsigned int k,
double HostVMInefficiencyFactor,
double UrgentExtraLatency,
double UrgentLatency,
- unsigned int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- unsigned int HostVMMaxNonCachedPageTableLevels,
- bool GPUVMEnable,
- double HostVMMinPageSize,
double PDEAndMetaPTEBytesPerFrame,
double MetaRowBytes,
- double DPTEBytesPerRow,
- double BandwidthAvailableForImmediateFlip,
- unsigned int TotImmediateFlipBytes,
- enum source_format_class SourcePixelFormat,
- double LineTime,
- double VRatio,
- double VRatioChroma,
- double Tno_bw,
- bool DCCEnable,
- unsigned int dpte_row_height,
- unsigned int meta_row_height,
- unsigned int dpte_row_height_chroma,
- unsigned int meta_row_height_chroma,
- double *DestinationLinesToRequestVMInImmediateFlip,
- double *DestinationLinesToRequestRowInImmediateFlip,
- double *final_flip_bw,
- bool *ImmediateFlipSupportedForPipe);
+ double DPTEBytesPerRow);
static double CalculateWriteBackDelay(
enum source_format_class WritebackPixelFormat,
double WritebackHRatio,
@@ -311,64 +291,28 @@ static void CalculateVupdateAndDynamicMetadataParameters(
static void CalculateWatermarksAndDRAMSpeedChangeSupport(
struct display_mode_lib *mode_lib,
unsigned int PrefetchMode,
- unsigned int NumberOfActivePlanes,
- unsigned int MaxLineBufferLines,
- unsigned int LineBufferSize,
- unsigned int WritebackInterfaceBufferSize,
double DCFCLK,
double ReturnBW,
- bool SynchronizedVBlank,
- unsigned int dpte_group_bytes[],
- unsigned int MetaChunkSize,
double UrgentLatency,
double ExtraLatency,
- double WritebackLatency,
- double WritebackChunkSize,
double SOCCLK,
- double DRAMClockChangeLatency,
- double SRExitTime,
- double SREnterPlusExitTime,
- double SRExitZ8Time,
- double SREnterPlusExitZ8Time,
double DCFCLKDeepSleep,
unsigned int DETBufferSizeY[],
unsigned int DETBufferSizeC[],
unsigned int SwathHeightY[],
unsigned int SwathHeightC[],
- unsigned int LBBitPerPixel[],
double SwathWidthY[],
double SwathWidthC[],
- double HRatio[],
- double HRatioChroma[],
- unsigned int vtaps[],
- unsigned int VTAPsChroma[],
- double VRatio[],
- double VRatioChroma[],
- unsigned int HTotal[],
- double PixelClock[],
- unsigned int BlendingAndTiming[],
unsigned int DPPPerPlane[],
double BytePerPixelDETY[],
double BytePerPixelDETC[],
- double DSTXAfterScaler[],
- double DSTYAfterScaler[],
- bool WritebackEnable[],
- enum source_format_class WritebackPixelFormat[],
- double WritebackDestinationWidth[],
- double WritebackDestinationHeight[],
- double WritebackSourceHeight[],
bool UnboundedRequestEnabled,
int unsigned CompressedBufferSizeInkByte,
enum clock_change_support *DRAMClockChangeSupport,
- double *UrgentWatermark,
- double *WritebackUrgentWatermark,
- double *DRAMClockChangeWatermark,
- double *WritebackDRAMClockChangeWatermark,
double *StutterExitWatermark,
double *StutterEnterPlusExitWatermark,
double *Z8StutterExitWatermark,
- double *Z8StutterEnterPlusExitWatermark,
- double *MinActiveDRAMClockChangeLatencySupported);
+ double *Z8StutterEnterPlusExitWatermark);
static void CalculateDCFCLKDeepSleep(
struct display_mode_lib *mode_lib,
@@ -2904,33 +2848,13 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
CalculateFlipSchedule(
mode_lib,
+ k,
HostVMInefficiencyFactor,
v->UrgentExtraLatency,
v->UrgentLatency,
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->HostVMMaxNonCachedPageTableLevels,
- v->GPUVMEnable,
- v->HostVMMinPageSize,
v->PDEAndMetaPTEBytesFrame[k],
v->MetaRowByte[k],
- v->PixelPTEBytesPerRow[k],
- v->BandwidthAvailableForImmediateFlip,
- v->TotImmediateFlipBytes,
- v->SourcePixelFormat[k],
- v->HTotal[k] / v->PixelClock[k],
- v->VRatio[k],
- v->VRatioChroma[k],
- v->Tno_bw[k],
- v->DCCEnable[k],
- v->dpte_row_height[k],
- v->meta_row_height[k],
- v->dpte_row_height_chroma[k],
- v->meta_row_height_chroma[k],
- &v->DestinationLinesToRequestVMInImmediateFlip[k],
- &v->DestinationLinesToRequestRowInImmediateFlip[k],
- &v->final_flip_bw[k],
- &v->ImmediateFlipSupportedForPipe[k]);
+ v->PixelPTEBytesPerRow[k]);
}
v->total_dcn_read_bw_with_flip = 0.0;
@@ -3017,64 +2941,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
CalculateWatermarksAndDRAMSpeedChangeSupport(
mode_lib,
PrefetchMode,
- v->NumberOfActivePlanes,
- v->MaxLineBufferLines,
- v->LineBufferSize,
- v->WritebackInterfaceBufferSize,
v->DCFCLK,
v->ReturnBW,
- v->SynchronizedVBlank,
- v->dpte_group_bytes,
- v->MetaChunkSize,
v->UrgentLatency,
v->UrgentExtraLatency,
- v->WritebackLatency,
- v->WritebackChunkSize,
v->SOCCLK,
- v->DRAMClockChangeLatency,
- v->SRExitTime,
- v->SREnterPlusExitTime,
- v->SRExitZ8Time,
- v->SREnterPlusExitZ8Time,
v->DCFCLKDeepSleep,
v->DETBufferSizeY,
v->DETBufferSizeC,
v->SwathHeightY,
v->SwathHeightC,
- v->LBBitPerPixel,
v->SwathWidthY,
v->SwathWidthC,
- v->HRatio,
- v->HRatioChroma,
- v->vtaps,
- v->VTAPsChroma,
- v->VRatio,
- v->VRatioChroma,
- v->HTotal,
- v->PixelClock,
- v->BlendingAndTiming,
v->DPPPerPlane,
v->BytePerPixelDETY,
v->BytePerPixelDETC,
- v->DSTXAfterScaler,
- v->DSTYAfterScaler,
- v->WritebackEnable,
- v->WritebackPixelFormat,
- v->WritebackDestinationWidth,
- v->WritebackDestinationHeight,
- v->WritebackSourceHeight,
v->UnboundedRequestEnabled,
v->CompressedBufferSizeInkByte,
&DRAMClockChangeSupport,
- &v->UrgentWatermark,
- &v->WritebackUrgentWatermark,
- &v->DRAMClockChangeWatermark,
- &v->WritebackDRAMClockChangeWatermark,
&v->StutterExitWatermark,
&v->StutterEnterPlusExitWatermark,
&v->Z8StutterExitWatermark,
- &v->Z8StutterEnterPlusExitWatermark,
- &v->MinActiveDRAMClockChangeLatencySupported);
+ &v->Z8StutterEnterPlusExitWatermark);
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
if (v->WritebackEnable[k] == true) {
@@ -3598,61 +3486,43 @@ static void CalculateRowBandwidth(
static void CalculateFlipSchedule(
struct display_mode_lib *mode_lib,
+ unsigned int k,
double HostVMInefficiencyFactor,
double UrgentExtraLatency,
double UrgentLatency,
- unsigned int GPUVMMaxPageTableLevels,
- bool HostVMEnable,
- unsigned int HostVMMaxNonCachedPageTableLevels,
- bool GPUVMEnable,
- double HostVMMinPageSize,
double PDEAndMetaPTEBytesPerFrame,
double MetaRowBytes,
- double DPTEBytesPerRow,
- double BandwidthAvailableForImmediateFlip,
- unsigned int TotImmediateFlipBytes,
- enum source_format_class SourcePixelFormat,
- double LineTime,
- double VRatio,
- double VRatioChroma,
- double Tno_bw,
- bool DCCEnable,
- unsigned int dpte_row_height,
- unsigned int meta_row_height,
- unsigned int dpte_row_height_chroma,
- unsigned int meta_row_height_chroma,
- double *DestinationLinesToRequestVMInImmediateFlip,
- double *DestinationLinesToRequestRowInImmediateFlip,
- double *final_flip_bw,
- bool *ImmediateFlipSupportedForPipe)
+ double DPTEBytesPerRow)
{
+ struct vba_vars_st *v = &mode_lib->vba;
double min_row_time = 0.0;
unsigned int HostVMDynamicLevelsTrips;
double TimeForFetchingMetaPTEImmediateFlip;
double TimeForFetchingRowInVBlankImmediateFlip;
double ImmediateFlipBW;
+ double LineTime = v->HTotal[k] / v->PixelClock[k];
- if (GPUVMEnable == true && HostVMEnable == true) {
- HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ if (v->GPUVMEnable == true && v->HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
} else {
HostVMDynamicLevelsTrips = 0;
}
- if (GPUVMEnable == true || DCCEnable == true) {
- ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
+ if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
}
- if (GPUVMEnable == true) {
+ if (v->GPUVMEnable == true) {
TimeForFetchingMetaPTEImmediateFlip = dml_max3(
- Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
- UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
LineTime / 4.0);
} else {
TimeForFetchingMetaPTEImmediateFlip = 0;
}
- *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
- if ((GPUVMEnable == true || DCCEnable == true)) {
+ v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
+ if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
UrgentLatency * (HostVMDynamicLevelsTrips + 1),
@@ -3661,54 +3531,54 @@ static void CalculateFlipSchedule(
TimeForFetchingRowInVBlankImmediateFlip = 0;
}
- *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
+ v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
- if (GPUVMEnable == true) {
- *final_flip_bw = dml_max(
- PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
- (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
- } else if ((GPUVMEnable == true || DCCEnable == true)) {
- *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ if (v->GPUVMEnable == true) {
+ v->final_flip_bw[k] = dml_max(
+ PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
+ } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
} else {
- *final_flip_bw = 0;
+ v->final_flip_bw[k] = 0;
}
- if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
- if (GPUVMEnable == true && DCCEnable != true) {
- min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
- } else if (GPUVMEnable != true && DCCEnable == true) {
- min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
} else {
min_row_time = dml_min4(
- dpte_row_height * LineTime / VRatio,
- meta_row_height * LineTime / VRatio,
- dpte_row_height_chroma * LineTime / VRatioChroma,
- meta_row_height_chroma * LineTime / VRatioChroma);
+ v->dpte_row_height[k] * LineTime / v->VRatio[k],
+ v->meta_row_height[k] * LineTime / v->VRatio[k],
+ v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
+ v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
}
} else {
- if (GPUVMEnable == true && DCCEnable != true) {
- min_row_time = dpte_row_height * LineTime / VRatio;
- } else if (GPUVMEnable != true && DCCEnable == true) {
- min_row_time = meta_row_height * LineTime / VRatio;
+ if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
+ min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
+ } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
+ min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
} else {
- min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
}
}
- if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
+ if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
- *ImmediateFlipSupportedForPipe = false;
+ v->ImmediateFlipSupportedForPipe[k] = false;
} else {
- *ImmediateFlipSupportedForPipe = true;
+ v->ImmediateFlipSupportedForPipe[k] = true;
}
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
- dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
- dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
#endif
}
@@ -5300,33 +5170,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
for (k = 0; k < v->NumberOfActivePlanes; k++) {
CalculateFlipSchedule(
mode_lib,
+ k,
HostVMInefficiencyFactor,
v->ExtraLatency,
v->UrgLatency[i],
- v->GPUVMMaxPageTableLevels,
- v->HostVMEnable,
- v->HostVMMaxNonCachedPageTableLevels,
- v->GPUVMEnable,
- v->HostVMMinPageSize,
v->PDEAndMetaPTEBytesPerFrame[i][j][k],
v->MetaRowBytes[i][j][k],
- v->DPTEBytesPerRow[i][j][k],
- v->BandwidthAvailableForImmediateFlip,
- v->TotImmediateFlipBytes,
- v->SourcePixelFormat[k],
- v->HTotal[k] / v->PixelClock[k],
- v->VRatio[k],
- v->VRatioChroma[k],
- v->Tno_bw[k],
- v->DCCEnable[k],
- v->dpte_row_height[k],
- v->meta_row_height[k],
- v->dpte_row_height_chroma[k],
- v->meta_row_height_chroma[k],
- &v->DestinationLinesToRequestVMInImmediateFlip[k],
- &v->DestinationLinesToRequestRowInImmediateFlip[k],
- &v->final_flip_bw[k],
- &v->ImmediateFlipSupportedForPipe[k]);
+ v->DPTEBytesPerRow[i][j][k]);
}
v->total_dcn_read_bw_with_flip = 0.0;
for (k = 0; k < v->NumberOfActivePlanes; k++) {
@@ -5384,64 +5234,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
CalculateWatermarksAndDRAMSpeedChangeSupport(
mode_lib,
v->PrefetchModePerState[i][j],
- v->NumberOfActivePlanes,
- v->MaxLineBufferLines,
- v->LineBufferSize,
- v->WritebackInterfaceBufferSize,
v->DCFCLKState[i][j],
v->ReturnBWPerState[i][j],
- v->SynchronizedVBlank,
- v->dpte_group_bytes,
- v->MetaChunkSize,
v->UrgLatency[i],
v->ExtraLatency,
- v->WritebackLatency,
- v->WritebackChunkSize,
v->SOCCLKPerState[i],
- v->DRAMClockChangeLatency,
- v->SRExitTime,
- v->SREnterPlusExitTime,
- v->SRExitZ8Time,
- v->SREnterPlusExitZ8Time,
v->ProjectedDCFCLKDeepSleep[i][j],
v->DETBufferSizeYThisState,
v->DETBufferSizeCThisState,
v->SwathHeightYThisState,
v->SwathHeightCThisState,
- v->LBBitPerPixel,
v->SwathWidthYThisState,
v->SwathWidthCThisState,
- v->HRatio,
- v->HRatioChroma,
- v->vtaps,
- v->VTAPsChroma,
- v->VRatio,
- v->VRatioChroma,
- v->HTotal,
- v->PixelClock,
- v->BlendingAndTiming,
v->NoOfDPPThisState,
v->BytePerPixelInDETY,
v->BytePerPixelInDETC,
- v->DSTXAfterScaler,
- v->DSTYAfterScaler,
- v->WritebackEnable,
- v->WritebackPixelFormat,
- v->WritebackDestinationWidth,
- v->WritebackDestinationHeight,
- v->WritebackSourceHeight,
UnboundedRequestEnabledThisState,
CompressedBufferSizeInkByteThisState,
&v->DRAMClockChangeSupport[i][j],
- &v->UrgentWatermark,
- &v->WritebackUrgentWatermark,
- &v->DRAMClockChangeWatermark,
- &v->WritebackDRAMClockChangeWatermark,
- &dummy,
&dummy,
&dummy,
&dummy,
- &v->MinActiveDRAMClockChangeLatencySupported);
+ &dummy);
}
}
@@ -5566,64 +5380,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
static void CalculateWatermarksAndDRAMSpeedChangeSupport(
struct display_mode_lib *mode_lib,
unsigned int PrefetchMode,
- unsigned int NumberOfActivePlanes,
- unsigned int MaxLineBufferLines,
- unsigned int LineBufferSize,
- unsigned int WritebackInterfaceBufferSize,
double DCFCLK,
double ReturnBW,
- bool SynchronizedVBlank,
- unsigned int dpte_group_bytes[],
- unsigned int MetaChunkSize,
double UrgentLatency,
double ExtraLatency,
- double WritebackLatency,
- double WritebackChunkSize,
double SOCCLK,
- double DRAMClockChangeLatency,
- double SRExitTime,
- double SREnterPlusExitTime,
- double SRExitZ8Time,
- double SREnterPlusExitZ8Time,
double DCFCLKDeepSleep,
unsigned int DETBufferSizeY[],
unsigned int DETBufferSizeC[],
unsigned int SwathHeightY[],
unsigned int SwathHeightC[],
- unsigned int LBBitPerPixel[],
double SwathWidthY[],
double SwathWidthC[],
- double HRatio[],
- double HRatioChroma[],
- unsigned int vtaps[],
- unsigned int VTAPsChroma[],
- double VRatio[],
- double VRatioChroma[],
- unsigned int HTotal[],
- double PixelClock[],
- unsigned int BlendingAndTiming[],
unsigned int DPPPerPlane[],
double BytePerPixelDETY[],
double BytePerPixelDETC[],
- double DSTXAfterScaler[],
- double DSTYAfterScaler[],
- bool WritebackEnable[],
- enum source_format_class WritebackPixelFormat[],
- double WritebackDestinationWidth[],
- double WritebackDestinationHeight[],
- double WritebackSourceHeight[],
bool UnboundedRequestEnabled,
int unsigned CompressedBufferSizeInkByte,
enum clock_change_support *DRAMClockChangeSupport,
- double *UrgentWatermark,
- double *WritebackUrgentWatermark,
- double *DRAMClockChangeWatermark,
- double *WritebackDRAMClockChangeWatermark,
double *StutterExitWatermark,
double *StutterEnterPlusExitWatermark,
double *Z8StutterExitWatermark,
- double *Z8StutterEnterPlusExitWatermark,
- double *MinActiveDRAMClockChangeLatencySupported)
+ double *Z8StutterEnterPlusExitWatermark)
{
struct vba_vars_st *v = &mode_lib->vba;
double EffectiveLBLatencyHidingY;
@@ -5643,103 +5421,103 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
double TotalPixelBW = 0.0;
int k, j;
- *UrgentWatermark = UrgentLatency + ExtraLatency;
+ v->UrgentWatermark = UrgentLatency + ExtraLatency;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
- dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
#endif
- *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
+ v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
- dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
+ dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
#endif
v->TotalActiveWriteback = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (WritebackEnable[k] == true) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->WritebackEnable[k] == true) {
v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
}
}
if (v->TotalActiveWriteback <= 1) {
- *WritebackUrgentWatermark = WritebackLatency;
+ v->WritebackUrgentWatermark = v->WritebackLatency;
} else {
- *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
}
if (v->TotalActiveWriteback <= 1) {
- *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
} else {
- *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
}
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
TotalPixelBW = TotalPixelBW
- + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
- / (HTotal[k] / PixelClock[k]);
+ + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
+ / (v->HTotal[k] / v->PixelClock[k]);
}
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
double EffectiveDETBufferSizeY = DETBufferSizeY[k];
v->LBLatencyHidingSourceLinesY = dml_min(
- (double) MaxLineBufferLines,
- dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
v->LBLatencyHidingSourceLinesC = dml_min(
- (double) MaxLineBufferLines,
- dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
+ (double) v->MaxLineBufferLines,
+ dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
- EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
- EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+ EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
if (UnboundedRequestEnabled) {
EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
- + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
+ + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
}
LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
- FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
if (BytePerPixelDETC[k] > 0) {
LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
- FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
} else {
LinesInDETC = 0;
FullDETBufferingTimeC = 999999;
}
ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
- - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
- if (NumberOfActivePlanes > 1) {
+ if (v->NumberOfActivePlanes > 1) {
ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
- - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
}
if (BytePerPixelDETC[k] > 0) {
ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
- - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
+ - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
- if (NumberOfActivePlanes > 1) {
+ if (v->NumberOfActivePlanes > 1) {
ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
- - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
+ - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
}
v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
} else {
v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
}
- if (WritebackEnable[k] == true) {
- WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
- / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
- if (WritebackPixelFormat[k] == dm_444_64) {
+ if (v->WritebackEnable[k] == true) {
+ WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
+ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
+ if (v->WritebackPixelFormat[k] == dm_444_64) {
WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
}
WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
@@ -5749,14 +5527,14 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
v->MinActiveDRAMClockChangeMargin = 999999;
PlaneWithMinActiveDRAMClockChangeMargin = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
- if (BlendingAndTiming[k] == k) {
+ if (v->BlendingAndTiming[k] == k) {
PlaneWithMinActiveDRAMClockChangeMargin = k;
} else {
- for (j = 0; j < NumberOfActivePlanes; ++j) {
- if (BlendingAndTiming[k] == j) {
+ for (j = 0; j < v->NumberOfActivePlanes; ++j) {
+ if (v->BlendingAndTiming[k] == j) {
PlaneWithMinActiveDRAMClockChangeMargin = j;
}
}
@@ -5764,11 +5542,11 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
}
}
- *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
+ v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
&& v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
}
@@ -5776,25 +5554,25 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
v->TotalNumberOfActiveOTG = 0;
- for (k = 0; k < NumberOfActivePlanes; ++k) {
- if (BlendingAndTiming[k] == k) {
+ for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ if (v->BlendingAndTiming[k] == k) {
v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
}
}
if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
- } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
+ } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
|| SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
} else {
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
}
- *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
- *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
- *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
- *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
+ *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
index 66b82e4f05c6..35d10b4d018b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
@@ -27,7 +27,7 @@
#include "../display_mode_vba.h"
#include "../dml_inline_defs.h"
#include "display_rq_dlg_calc_31.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
static bool is_dual_plane(enum source_format_class source_format)
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
new file mode 100644
index 000000000000..4bb3b31ea7e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn314_fpu.h"
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dml/display_mode_vba.h"
+
+struct _vcs_dpi_ip_params_st dcn3_14_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dcn3_14_soc.clock_limits;
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) {
+
+ dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
+
+ if (bw_params->dram_channel_width_bytes > 0)
+ dcn3_14_soc.dram_channel_width_bytes = bw_params->dram_channel_width_bytes;
+
+ if (bw_params->num_channels > 0)
+ dcn3_14_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(dcn3_14_soc.num_chans);
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_14_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+ clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ for (i = 0; i < clk_table->num_entries; i++)
+ dcn3_14_soc.clock_limits[i] = clock_limits[i];
+ if (clk_table->num_entries) {
+ dcn3_14_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ if (max_dispclk_mhz) {
+ dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314);
+ else
+ dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe;
+ bool upscaled = false;
+
+ dc_assert_fp_enabled();
+
+ dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
+ && pipe->stream->adjust.v_total_min > timing->v_total)
+ pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the plane.
+ * We need to require support for immediate flip or underflow can be
+ * intermittently experienced depending on peak b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+ pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
+ pipes[pipe_cnt].pipe.src.gpuvm = true;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
+
+ dc->config.enable_4to1MPC = false;
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
+ /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
+ && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
new file mode 100644
index 000000000000..d32c5bb99f4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN314_FPU_H__
+#define __DCN314_FPU_H__
+
+#define DCN3_14_DEFAULT_DET_SIZE 384
+#define DCN3_14_MAX_DET_SIZE 384
+#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ bool fast_validate);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index fc4d7474c111..01f3fad172f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -61,7 +61,7 @@
// fudge factor for min dcfclk calclation
#define __DML_MIN_DCFCLK_FACTOR__ 1.15
-struct {
+typedef struct {
double DPPCLK;
double DISPCLK;
double PixelClock;
@@ -1599,7 +1599,7 @@ static void CalculateDCCConfiguration(
int segment_order_vert_contiguous_luma;
int segment_order_vert_contiguous_chroma;
- enum {
+ typedef enum {
REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
} RequestType;
RequestType RequestLuma;
@@ -4071,9 +4071,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_
v->SourceFormatPixelAndScanSupport = true;
for (k = 0; k < v->NumberOfActivePlanes; k++) {
- if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
- || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
- || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
+ if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
v->SourceFormatPixelAndScanSupport = false;
}
}
@@ -7157,12 +7155,13 @@ static double CalculateExtraLatencyBytes(
HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
else
HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
- else
+ } else {
HostVMDynamicLevels = 0;
+ }
ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
- if (GPUVMEnable == true)
+ if (GPUVMEnable == true) {
for (k = 0; k < NumberOfActivePlanes; ++k)
ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 66453546e24f..8e4c9d0887ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -473,8 +473,11 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
// DML calculation for MALL region doesn't take into account FW delay
// and required pstate allow width for multi-display cases
+ /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned
+ * to 2 swaths (i.e. 16 lines)
+ */
phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
- pstate_width_fw_delay_lines;
+ pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines;
// For backporch of phantom pipe, use vstartup of the main pipe
phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
@@ -490,6 +493,7 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
phantom_stream->timing.v_front_porch +
phantom_stream->timing.v_sync_width +
phantom_bp;
+ phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing
}
/**
@@ -983,9 +987,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
* DML favors voltage over p-state, but we're more interested in
* supporting p-state over voltage. We can't support p-state in
* prefetch mode > 0 so try capping the prefetch mode to start.
+ * Override present for testing.
*/
- context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ if (dc->debug.dml_disallow_alternate_prefetch_modes)
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_uclk_fclk_and_stutter;
+ else
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+ dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
*vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
/* This may adjust vlevel and maxMpcComb */
if (*vlevel < context->bw_ctx.dml.soc.num_states)
@@ -1004,6 +1014,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
dc->debug.force_subvp_mclk_switch)) {
dcn32_merge_pipes_for_subvp(dc, context);
+ // to re-initialize viewport after the pipe merge
+ for (int i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx->plane_state || !pipe_ctx->stream)
+ continue;
+
+ resource_build_scaling_params(pipe_ctx);
+ }
while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) &&
dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) {
@@ -1014,7 +1033,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
* will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched
* enough to support MCLK switching.
*/
- if (*vlevel == context->bw_ctx.dml.soc.num_states) {
+ if (*vlevel == context->bw_ctx.dml.soc.num_states &&
+ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final ==
+ dm_prefetch_support_uclk_fclk_and_stutter) {
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_stutter;
/* There are params (such as FabricClock) that need to be recalculated
@@ -1344,7 +1365,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
int split[MAX_PIPES] = { 0 };
bool merge[MAX_PIPES] = { false };
bool newly_split[MAX_PIPES] = { false };
- int pipe_cnt, i, pipe_idx, vlevel;
+ int pipe_cnt, i, pipe_idx;
+ int vlevel = context->bw_ctx.dml.soc.num_states;
struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
dc_assert_fp_enabled();
@@ -1373,17 +1395,22 @@ bool dcn32_internal_validate_bw(struct dc *dc,
DC_FP_END();
}
- if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
- vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
+ if (fast_validate ||
+ (dc->debug.dml_disallow_alternate_prefetch_modes &&
+ (vlevel == context->bw_ctx.dml.soc.num_states ||
+ vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) {
/*
- * If mode is unsupported or there's still no p-state support then
- * fall back to favoring voltage.
+ * If dml_disallow_alternate_prefetch_modes is false, then we have already
+ * tried alternate prefetch modes during full validation.
+ *
+ * If mode is unsupported or there is no p-state support, then
+ * fall back to favouring voltage.
*
- * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if
- * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
+ * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try
+ * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
*/
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
- dm_prefetch_support_fclk_and_stutter;
+ dm_prefetch_support_fclk_and_stutter;
vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
@@ -2098,6 +2125,13 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
}
+ if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dcn3_2_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000)
!= dc->bb_overrides.dummy_clock_change_latency_ns
&& dc->bb_overrides.dummy_clock_change_latency_ns) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 890612db08dc..9a60f27eceaa 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -221,7 +221,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
// VBA_DELTA
// Calculate DET size, swath height
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -461,7 +460,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
{
dml32_CalculateVMRowAndSwath(
- &v->dummy_vars.dml32_CalculateVMRowAndSwath,
mode_lib->vba.NumberOfActiveSurfaces,
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters,
v->SurfaceSizeInMALL,
@@ -758,31 +756,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k];
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(
- &v->dummy_vars.dml32_CalculatePrefetchSchedule,
+ v,
+ k,
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
- &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k],
- mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater,
- mode_lib->vba.DPPCLKDelaySCL,
- mode_lib->vba.DPPCLKDelaySCLLBOnly,
- mode_lib->vba.DPPCLKDelayCNVCCursor,
- mode_lib->vba.DISPCLKDelaySubtotal,
- (unsigned int) (v->SwathWidthY[k] / mode_lib->vba.HRatio[k]),
- mode_lib->vba.OutputFormat[k],
- mode_lib->vba.MaxInterDCNTileRepeaters,
+ &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe,
+ v->DSCDelay[k],
+ (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
v->MaxVStartupLines[k],
- mode_lib->vba.GPUVMMaxPageTableLevels,
- mode_lib->vba.GPUVMEnable,
- mode_lib->vba.HostVMEnable,
- mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
- mode_lib->vba.HostVMMinPageSize,
- mode_lib->vba.DynamicMetadataEnable[k],
- mode_lib->vba.DynamicMetadataVMEnabled,
- mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
- mode_lib->vba.DynamicMetadataTransmittedBytes[k],
v->UrgentLatency,
v->UrgentExtraLatency,
- mode_lib->vba.TCalc,
+ v->TCalc,
v->PDEAndMetaPTEBytesFrame[k],
v->MetaRowByte[k],
v->PixelPTEBytesPerRow[k],
@@ -796,8 +780,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->MaxNumSwathC[k],
v->swath_width_luma_ub[k],
v->swath_width_chroma_ub[k],
- mode_lib->vba.SwathHeightY[k],
- mode_lib->vba.SwathHeightC[k],
+ v->SwathHeightY[k],
+ v->SwathHeightC[k],
TWait,
/* Output */
&v->DSTXAfterScaler[k],
@@ -1167,59 +1151,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
- mode_lib->vba.USRRetrainingRequiredFinal,
- mode_lib->vba.UsesMALLForPStateChange,
- mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
- mode_lib->vba.NumberOfActiveSurfaces,
- mode_lib->vba.MaxLineBufferLines,
- mode_lib->vba.LineBufferSizeFinal,
- mode_lib->vba.WritebackInterfaceBufferSize,
- mode_lib->vba.DCFCLK,
- mode_lib->vba.ReturnBW,
- mode_lib->vba.SynchronizeTimingsFinal,
- mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
- mode_lib->vba.DRRDisplay,
- v->dpte_group_bytes,
- v->meta_row_height,
- v->meta_row_height_chroma,
+ v,
+ v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb],
+ v->DCFCLK,
+ v->ReturnBW,
v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters,
- mode_lib->vba.WritebackChunkSize,
- mode_lib->vba.SOCCLK,
+ v->SOCCLK,
v->DCFCLKDeepSleep,
- mode_lib->vba.DETBufferSizeY,
- mode_lib->vba.DETBufferSizeC,
- mode_lib->vba.SwathHeightY,
- mode_lib->vba.SwathHeightC,
- mode_lib->vba.LBBitPerPixel,
+ v->DETBufferSizeY,
+ v->DETBufferSizeC,
+ v->SwathHeightY,
+ v->SwathHeightC,
v->SwathWidthY,
v->SwathWidthC,
- mode_lib->vba.HRatio,
- mode_lib->vba.HRatioChroma,
- mode_lib->vba.vtaps,
- mode_lib->vba.VTAPsChroma,
- mode_lib->vba.VRatio,
- mode_lib->vba.VRatioChroma,
- mode_lib->vba.HTotal,
- mode_lib->vba.VTotal,
- mode_lib->vba.VActive,
- mode_lib->vba.PixelClock,
- mode_lib->vba.BlendingAndTiming,
- mode_lib->vba.DPPPerPlane,
+ v->DPPPerPlane,
v->BytePerPixelDETY,
v->BytePerPixelDETC,
v->DSTXAfterScaler,
v->DSTYAfterScaler,
- mode_lib->vba.WritebackEnable,
- mode_lib->vba.WritebackPixelFormat,
- mode_lib->vba.WritebackDestinationWidth,
- mode_lib->vba.WritebackDestinationHeight,
- mode_lib->vba.WritebackSourceHeight,
v->UnboundedRequestEnabled,
v->CompressedBufferSizeInkByte,
/* Output */
- &v->Watermark,
&v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support,
v->MaxActiveDRAMClockChangeLatencySupported,
v->SubViewportLinesNeededInMALL,
@@ -1811,10 +1764,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
&mode_lib->vba.Read256BlockHeightC[k],
&mode_lib->vba.Read256BlockWidthY[k],
&mode_lib->vba.Read256BlockWidthC[k],
- &mode_lib->vba.MicroTileHeightY[k],
- &mode_lib->vba.MicroTileHeightC[k],
- &mode_lib->vba.MicroTileWidthY[k],
- &mode_lib->vba.MicroTileWidthC[k]);
+ &mode_lib->vba.MacroTileHeightY[k],
+ &mode_lib->vba.MacroTileHeightC[k],
+ &mode_lib->vba.MacroTileWidthY[k],
+ &mode_lib->vba.MacroTileWidthC[k]);
}
/*Bandwidth Support Check*/
@@ -1952,7 +1905,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2549,7 +2501,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
dml32_CalculateSwathAndDETConfiguration(
- &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
mode_lib->vba.DETSizeOverride,
mode_lib->vba.UsesMALLForPStateChange,
mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2666,10 +2617,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.Read256BlockWidthC,
mode_lib->vba.Read256BlockHeightY,
mode_lib->vba.Read256BlockHeightC,
- mode_lib->vba.MicroTileWidthY,
- mode_lib->vba.MicroTileWidthC,
- mode_lib->vba.MicroTileHeightY,
- mode_lib->vba.MicroTileHeightC,
+ mode_lib->vba.MacroTileWidthY,
+ mode_lib->vba.MacroTileWidthC,
+ mode_lib->vba.MacroTileHeightY,
+ mode_lib->vba.MacroTileHeightC,
/* Output */
mode_lib->vba.SurfaceSizeInMALL,
@@ -2716,10 +2667,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k];
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k];
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k];
- v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MicroTileWidthY[k];
- v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MicroTileHeightY[k];
- v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MicroTileWidthC[k];
- v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MicroTileHeightC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthY = mode_lib->vba.MacroTileWidthY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightY = mode_lib->vba.MacroTileHeightY[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockWidthC = mode_lib->vba.MacroTileWidthC[k];
+ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].BlockHeightC = mode_lib->vba.MacroTileHeightC[k];
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].InterlaceEnable = mode_lib->vba.Interlace[k];
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].HTotal = mode_lib->vba.HTotal[k];
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters[k].DCCEnable = mode_lib->vba.DCCEnable[k];
@@ -2749,7 +2700,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
{
dml32_CalculateVMRowAndSwath(
- &v->dummy_vars.dml32_CalculateVMRowAndSwath,
mode_lib->vba.NumberOfActiveSurfaces,
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters,
mode_lib->vba.SurfaceSizeInMALL,
@@ -3266,64 +3216,47 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.NoTimeForPrefetch[i][j][k] =
dml32_CalculatePrefetchSchedule(
- &v->dummy_vars.dml32_CalculatePrefetchSchedule,
+ v,
+ k,
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe,
- mode_lib->vba.DSCDelayPerState[i][k],
- mode_lib->vba.DPPCLKDelaySubtotal +
- mode_lib->vba.DPPCLKDelayCNVCFormater,
- mode_lib->vba.DPPCLKDelaySCL,
- mode_lib->vba.DPPCLKDelaySCLLBOnly,
- mode_lib->vba.DPPCLKDelayCNVCCursor,
- mode_lib->vba.DISPCLKDelaySubtotal,
- mode_lib->vba.SwathWidthYThisState[k] /
- mode_lib->vba.HRatio[k],
- mode_lib->vba.OutputFormat[k],
- mode_lib->vba.MaxInterDCNTileRepeaters,
- dml_min(mode_lib->vba.MaxVStartup,
- mode_lib->vba.MaximumVStartup[i][j][k]),
- mode_lib->vba.MaximumVStartup[i][j][k],
- mode_lib->vba.GPUVMMaxPageTableLevels,
- mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable,
- mode_lib->vba.HostVMMaxNonCachedPageTableLevels,
- mode_lib->vba.HostVMMinPageSize,
- mode_lib->vba.DynamicMetadataEnable[k],
- mode_lib->vba.DynamicMetadataVMEnabled,
- mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
- mode_lib->vba.DynamicMetadataTransmittedBytes[k],
- mode_lib->vba.UrgLatency[i],
- mode_lib->vba.ExtraLatency,
- mode_lib->vba.TimeCalc,
- mode_lib->vba.PDEAndMetaPTEBytesPerFrame[i][j][k],
- mode_lib->vba.MetaRowBytes[i][j][k],
- mode_lib->vba.DPTEBytesPerRow[i][j][k],
- mode_lib->vba.PrefetchLinesY[i][j][k],
- mode_lib->vba.SwathWidthYThisState[k],
- mode_lib->vba.PrefillY[k],
- mode_lib->vba.MaxNumSwY[k],
- mode_lib->vba.PrefetchLinesC[i][j][k],
- mode_lib->vba.SwathWidthCThisState[k],
- mode_lib->vba.PrefillC[k],
- mode_lib->vba.MaxNumSwC[k],
- mode_lib->vba.swath_width_luma_ub_this_state[k],
- mode_lib->vba.swath_width_chroma_ub_this_state[k],
- mode_lib->vba.SwathHeightYThisState[k],
- mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.TWait,
+ v->DSCDelayPerState[i][k],
+ v->SwathWidthYThisState[k] / v->HRatio[k],
+ dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
+ v->MaximumVStartup[i][j][k],
+ v->UrgLatency[i],
+ v->ExtraLatency,
+ v->TimeCalc,
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ v->MetaRowBytes[i][j][k],
+ v->DPTEBytesPerRow[i][j][k],
+ v->PrefetchLinesY[i][j][k],
+ v->SwathWidthYThisState[k],
+ v->PrefillY[k],
+ v->MaxNumSwY[k],
+ v->PrefetchLinesC[i][j][k],
+ v->SwathWidthCThisState[k],
+ v->PrefillC[k],
+ v->MaxNumSwC[k],
+ v->swath_width_luma_ub_this_state[k],
+ v->swath_width_chroma_ub_this_state[k],
+ v->SwathHeightYThisState[k],
+ v->SwathHeightCThisState[k], v->TWait,
/* Output */
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler[k],
- &mode_lib->vba.LineTimesForPrefetch[k],
- &mode_lib->vba.PrefetchBW[k],
- &mode_lib->vba.LinesForMetaPTE[k],
- &mode_lib->vba.LinesForMetaAndDPTERow[k],
- &mode_lib->vba.VRatioPreY[i][j][k],
- &mode_lib->vba.VRatioPreC[i][j][k],
- &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[0][0][k],
- &mode_lib->vba.RequiredPrefetchPixelDataBWChroma[0][0][k],
- &mode_lib->vba.NoTimeForDynamicMetadata[i][j][k],
- &mode_lib->vba.Tno_bw[k],
- &mode_lib->vba.prefetch_vmrow_bw[k],
+ &v->LineTimesForPrefetch[k],
+ &v->PrefetchBW[k],
+ &v->LinesForMetaPTE[k],
+ &v->LinesForMetaAndDPTERow[k],
+ &v->VRatioPreY[i][j][k],
+ &v->VRatioPreC[i][j][k],
+ &v->RequiredPrefetchPixelDataBWLuma[0][0][k],
+ &v->RequiredPrefetchPixelDataBWChroma[0][0][k],
+ &v->NoTimeForDynamicMetadata[i][j][k],
+ &v->Tno_bw[k],
+ &v->prefetch_vmrow_bw[k],
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup
@@ -3566,63 +3499,32 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
{
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
- mode_lib->vba.USRRetrainingRequiredFinal,
- mode_lib->vba.UsesMALLForPStateChange,
- mode_lib->vba.PrefetchModePerState[i][j],
- mode_lib->vba.NumberOfActiveSurfaces,
- mode_lib->vba.MaxLineBufferLines,
- mode_lib->vba.LineBufferSizeFinal,
- mode_lib->vba.WritebackInterfaceBufferSize,
- mode_lib->vba.DCFCLKState[i][j],
- mode_lib->vba.ReturnBWPerState[i][j],
- mode_lib->vba.SynchronizeTimingsFinal,
- mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
- mode_lib->vba.DRRDisplay,
- mode_lib->vba.dpte_group_bytes,
- mode_lib->vba.meta_row_height,
- mode_lib->vba.meta_row_height_chroma,
+ v,
+ v->PrefetchModePerState[i][j],
+ v->DCFCLKState[i][j],
+ v->ReturnBWPerState[i][j],
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.mSOCParameters,
- mode_lib->vba.WritebackChunkSize,
- mode_lib->vba.SOCCLKPerState[i],
- mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j],
- mode_lib->vba.DETBufferSizeYThisState,
- mode_lib->vba.DETBufferSizeCThisState,
- mode_lib->vba.SwathHeightYThisState,
- mode_lib->vba.SwathHeightCThisState,
- mode_lib->vba.LBBitPerPixel,
- mode_lib->vba.SwathWidthYThisState, // 24
- mode_lib->vba.SwathWidthCThisState,
- mode_lib->vba.HRatio,
- mode_lib->vba.HRatioChroma,
- mode_lib->vba.vtaps,
- mode_lib->vba.VTAPsChroma,
- mode_lib->vba.VRatio,
- mode_lib->vba.VRatioChroma,
- mode_lib->vba.HTotal,
- mode_lib->vba.VTotal,
- mode_lib->vba.VActive,
- mode_lib->vba.PixelClock,
- mode_lib->vba.BlendingAndTiming,
- mode_lib->vba.NoOfDPPThisState,
- mode_lib->vba.BytePerPixelInDETY,
- mode_lib->vba.BytePerPixelInDETC,
+ v->SOCCLKPerState[i],
+ v->ProjectedDCFCLKDeepSleep[i][j],
+ v->DETBufferSizeYThisState,
+ v->DETBufferSizeCThisState,
+ v->SwathHeightYThisState,
+ v->SwathHeightCThisState,
+ v->SwathWidthYThisState, // 24
+ v->SwathWidthCThisState,
+ v->NoOfDPPThisState,
+ v->BytePerPixelInDETY,
+ v->BytePerPixelInDETC,
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler,
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTYAfterScaler,
- mode_lib->vba.WritebackEnable,
- mode_lib->vba.WritebackPixelFormat,
- mode_lib->vba.WritebackDestinationWidth,
- mode_lib->vba.WritebackDestinationHeight,
- mode_lib->vba.WritebackSourceHeight,
- mode_lib->vba.UnboundedRequestEnabledThisState,
- mode_lib->vba.CompressedBufferSizeInkByteThisState,
+ v->UnboundedRequestEnabledThisState,
+ v->CompressedBufferSizeInkByteThisState,
/* Output */
- &mode_lib->vba.Watermark, // Store the values in vba
- &mode_lib->vba.DRAMClockChangeSupport[i][j],
+ &v->DRAMClockChangeSupport[i][j],
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[]
- &mode_lib->vba.FCLKChangeSupport[i][j],
+ &v->FCLKChangeSupport[i][j],
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported
&mode_lib->vba.USRRetrainingSupport[i][j],
mode_lib->vba.ActiveDRAMClockChangeLatencyMargin);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 07f8f3b8626b..59c2547d01b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -391,7 +391,6 @@ void dml32_CalculateBytePerPixelAndBlockSizes(
} // CalculateBytePerPixelAndBlockSizes
void dml32_CalculateSwathAndDETConfiguration(
- struct dml32_CalculateSwathAndDETConfiguration *st_vars,
unsigned int DETSizeOverride[],
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int ConfigReturnBufferSizeInKByte,
@@ -456,10 +455,18 @@ void dml32_CalculateSwathAndDETConfiguration(
bool ViewportSizeSupportPerSurface[],
bool *ViewportSizeSupport)
{
+ unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+ unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
+ unsigned int RoundedUpSwathSizeBytesY;
+ unsigned int RoundedUpSwathSizeBytesC;
+ double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
+ double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
unsigned int k;
-
- st_vars->TotalActiveDPP = 0;
- st_vars->NoChromaSurfaces = true;
+ unsigned int TotalActiveDPP = 0;
+ bool NoChromaSurfaces = true;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
@@ -494,43 +501,43 @@ void dml32_CalculateSwathAndDETConfiguration(
DPPPerSurface,
/* Output */
- st_vars->SwathWidthdoubleDPP,
- st_vars->SwathWidthdoubleDPPChroma,
+ SwathWidthdoubleDPP,
+ SwathWidthdoubleDPPChroma,
SwathWidth,
SwathWidthChroma,
- st_vars->MaximumSwathHeightY,
- st_vars->MaximumSwathHeightC,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
swath_width_luma_ub,
swath_width_chroma_ub);
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
- st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
+ RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+ RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
- dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
- st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+ RoundedUpMaxSwathSizeBytesY[k]);
dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
- dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
- st_vars->RoundedUpMaxSwathSizeBytesC[k]);
+ RoundedUpMaxSwathSizeBytesC[k]);
#endif
if (SourcePixelFormat[k] == dm_420_10) {
- st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
- st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
+ RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
+ RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
}
}
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
- st_vars->NoChromaSurfaces = false;
+ NoChromaSurfaces = false;
}
}
@@ -540,10 +547,10 @@ void dml32_CalculateSwathAndDETConfiguration(
// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
- *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
+ *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
if (*CompBufReservedSpaceNeedAdjustment == 1) {
- *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
+ *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
}
#ifdef __DML_VBA_DEBUG__
@@ -551,7 +558,7 @@ void dml32_CalculateSwathAndDETConfiguration(
dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
#endif
- *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+ *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
dml32_CalculateDETBufferSize(DETSizeOverride,
UseMALLForPStateChange,
@@ -566,8 +573,8 @@ void dml32_CalculateSwathAndDETConfiguration(
SourcePixelFormat,
ReadBandwidthLuma,
ReadBandwidthChroma,
- st_vars->RoundedUpMaxSwathSizeBytesY,
- st_vars->RoundedUpMaxSwathSizeBytesC,
+ RoundedUpMaxSwathSizeBytesY,
+ RoundedUpMaxSwathSizeBytesC,
DPPPerSurface,
/* Output */
@@ -575,7 +582,7 @@ void dml32_CalculateSwathAndDETConfiguration(
CompressedBufferSizeInkByte);
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
+ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
@@ -586,42 +593,42 @@ void dml32_CalculateSwathAndDETConfiguration(
*ViewportSizeSupport = true;
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
+ DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
- st_vars->DETBufferSizeInKByteForSwathCalculation);
+ DETBufferSizeInKByteForSwathCalculation);
#endif
- if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
- } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
- st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
- } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
- st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+ if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+ RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ SwathHeightY[k] = MaximumSwathHeightY[k];
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
} else {
- SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
- SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
- st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
- st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+ SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
}
- if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
- st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
+ if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
+ DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
*ViewportSizeSupport = false;
@@ -636,7 +643,7 @@ void dml32_CalculateSwathAndDETConfiguration(
#endif
DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
DETBufferSizeC[k] = 0;
- } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
+ } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
#endif
@@ -654,11 +661,11 @@ void dml32_CalculateSwathAndDETConfiguration(
dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
- k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+ k, RoundedUpMaxSwathSizeBytesY[k]);
dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
- k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
- dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
- dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
+ k, RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
+ dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
@@ -1867,7 +1874,6 @@ void dml32_CalculateSurfaceSizeInMall(
} // CalculateSurfaceSizeInMall
void dml32_CalculateVMRowAndSwath(
- struct dml32_CalculateVMRowAndSwath *st_vars,
unsigned int NumberOfActiveSurfaces,
DmlPipe myPipe[],
unsigned int SurfaceSizeInMALL[],
@@ -1933,6 +1939,21 @@ void dml32_CalculateVMRowAndSwath(
unsigned int BIGK_FRAGMENT_SIZE[])
{
unsigned int k;
+ unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
+ unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
+ unsigned int PDEAndMetaPTEBytesFrameY;
+ unsigned int PDEAndMetaPTEBytesFrameC;
+ unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
+ unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+ unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
+ bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
for (k = 0; k < NumberOfActiveSurfaces; ++k) {
if (HostVMEnable == true) {
@@ -1954,15 +1975,15 @@ void dml32_CalculateVMRowAndSwath(
myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
!IsVertical(myPipe[k].SourceRotation)) {
- st_vars->PTEBufferSizeInRequestsForLuma[k] =
+ PTEBufferSizeInRequestsForLuma[k] =
(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
} else {
- st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
+ PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
}
- st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
+ PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
myPipe[k].ViewportStationary,
myPipe[k].DCCEnable,
myPipe[k].DPPPerSurface,
@@ -1982,21 +2003,21 @@ void dml32_CalculateVMRowAndSwath(
GPUVMMaxPageTableLevels,
GPUVMMinPageSizeKBytes[k],
HostVMMinPageSize,
- st_vars->PTEBufferSizeInRequestsForChroma[k],
+ PTEBufferSizeInRequestsForChroma[k],
myPipe[k].PitchC,
myPipe[k].DCCMetaPitchC,
myPipe[k].BlockWidthC,
myPipe[k].BlockHeightC,
/* Output */
- &st_vars->MetaRowByteC[k],
- &st_vars->PixelPTEBytesPerRowC[k],
+ &MetaRowByteC[k],
+ &PixelPTEBytesPerRowC[k],
&dpte_row_width_chroma_ub[k],
&dpte_row_height_chroma[k],
&dpte_row_height_linear_chroma[k],
- &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
- &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
- &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
+ &PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &dpte_row_height_chroma_one_row_per_frame[k],
&meta_req_width_chroma[k],
&meta_req_height_chroma[k],
&meta_row_width_chroma[k],
@@ -2024,19 +2045,19 @@ void dml32_CalculateVMRowAndSwath(
&VInitPreFillC[k],
&MaxNumSwathC[k]);
} else {
- st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
- st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
- st_vars->PixelPTEBytesPerRowC[k] = 0;
- st_vars->PDEAndMetaPTEBytesFrameC = 0;
- st_vars->MetaRowByteC[k] = 0;
+ PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
+ PTEBufferSizeInRequestsForChroma[k] = 0;
+ PixelPTEBytesPerRowC[k] = 0;
+ PDEAndMetaPTEBytesFrameC = 0;
+ MetaRowByteC[k] = 0;
MaxNumSwathC[k] = 0;
PrefetchSourceLinesC[k] = 0;
- st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
- st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
- st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
}
- st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
+ PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
myPipe[k].ViewportStationary,
myPipe[k].DCCEnable,
myPipe[k].DPPPerSurface,
@@ -2056,21 +2077,21 @@ void dml32_CalculateVMRowAndSwath(
GPUVMMaxPageTableLevels,
GPUVMMinPageSizeKBytes[k],
HostVMMinPageSize,
- st_vars->PTEBufferSizeInRequestsForLuma[k],
+ PTEBufferSizeInRequestsForLuma[k],
myPipe[k].PitchY,
myPipe[k].DCCMetaPitchY,
myPipe[k].BlockWidthY,
myPipe[k].BlockHeightY,
/* Output */
- &st_vars->MetaRowByteY[k],
- &st_vars->PixelPTEBytesPerRowY[k],
+ &MetaRowByteY[k],
+ &PixelPTEBytesPerRowY[k],
&dpte_row_width_luma_ub[k],
&dpte_row_height_luma[k],
&dpte_row_height_linear_luma[k],
- &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
- &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
- &st_vars->dpte_row_height_luma_one_row_per_frame[k],
+ &PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &dpte_row_width_luma_ub_one_row_per_frame[k],
+ &dpte_row_height_luma_one_row_per_frame[k],
&meta_req_width[k],
&meta_req_height[k],
&meta_row_width[k],
@@ -2098,19 +2119,19 @@ void dml32_CalculateVMRowAndSwath(
&VInitPreFillY[k],
&MaxNumSwathY[k]);
- PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
- MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
+ PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+ MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
- if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
- st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
+ if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
PTEBufferSizeNotExceeded[k] = true;
} else {
PTEBufferSizeNotExceeded[k] = false;
}
- st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
- st_vars->PTEBufferSizeInRequestsForLuma[k] &&
- st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
+ one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
+ PTEBufferSizeInRequestsForLuma[k] &&
+ PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
}
dml32_CalculateMALLUseForStaticScreen(
@@ -2118,7 +2139,7 @@ void dml32_CalculateVMRowAndSwath(
MALLAllocatedForDCN,
UseMALLForStaticScreen, // mode
SurfaceSizeInMALL,
- st_vars->one_row_per_frame_fits_in_buffer,
+ one_row_per_frame_fits_in_buffer,
/* Output */
UsesMALLForStaticScreen); // boolen
@@ -2144,13 +2165,13 @@ void dml32_CalculateVMRowAndSwath(
!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
if (use_one_row_for_frame[k]) {
- dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
- dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
- st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
- dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
- dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
- st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
- PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
+ dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
+ dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
+ dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
+ dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
+ PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
+ PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
}
if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
@@ -2158,7 +2179,7 @@ void dml32_CalculateVMRowAndSwath(
else
DCCMetaBufferSizeNotExceeded[k] = false;
- PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
+ PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
if (use_one_row_for_frame[k])
PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
@@ -2169,11 +2190,11 @@ void dml32_CalculateVMRowAndSwath(
myPipe[k].VRatioChroma,
myPipe[k].DCCEnable,
myPipe[k].HTotal / myPipe[k].PixelClock,
- st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
+ MetaRowByteY[k], MetaRowByteC[k],
meta_row_height[k],
meta_row_height_chroma[k],
- st_vars->PixelPTEBytesPerRowY[k],
- st_vars->PixelPTEBytesPerRowC[k],
+ PixelPTEBytesPerRowY[k],
+ PixelPTEBytesPerRowC[k],
dpte_row_height_luma[k],
dpte_row_height_chroma[k],
@@ -2189,12 +2210,12 @@ void dml32_CalculateVMRowAndSwath(
dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
__func__, k, dpte_row_width_luma_ub[k]);
- dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
__func__, k, dpte_row_height_chroma[k]);
dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
__func__, k, dpte_row_width_chroma_ub[k]);
- dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
__func__, k, PTEBufferSizeNotExceeded[k]);
@@ -3342,29 +3363,14 @@ double dml32_CalculateExtraLatency(
} // CalculateExtraLatency
bool dml32_CalculatePrefetchSchedule(
- struct dml32_CalculatePrefetchSchedule *st_vars,
+ struct vba_vars_st *v,
+ unsigned int k,
double HostVMInefficiencyFactor,
DmlPipe *myPipe,
unsigned int DSCDelay,
- double DPPCLKDelaySubtotalPlusCNVCFormater,
- double DPPCLKDelaySCL,
- double DPPCLKDelaySCLLBOnly,
- double DPPCLKDelayCNVCCursor,
- double DISPCLKDelaySubtotal,
unsigned int DPP_RECOUT_WIDTH,
- enum output_format_class OutputFormat,
- unsigned int MaxInterDCNTileRepeaters,
unsigned int VStartup,
unsigned int MaxVStartup,
- unsigned int GPUVMPageTableLevels,
- bool GPUVMEnable,
- bool HostVMEnable,
- unsigned int HostVMMaxNonCachedPageTableLevels,
- double HostVMMinPageSize,
- bool DynamicMetadataEnable,
- bool DynamicMetadataVMEnabled,
- int DynamicMetadataLinesBeforeActiveRequired,
- unsigned int DynamicMetadataTransmittedBytes,
double UrgentLatency,
double UrgentExtraLatency,
double TCalc,
@@ -3405,72 +3411,100 @@ bool dml32_CalculatePrefetchSchedule(
double *VUpdateWidthPix,
double *VReadyOffsetPix)
{
+ double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
bool MyError = false;
-
- st_vars->TimeForFetchingMetaPTE = 0;
- st_vars->TimeForFetchingRowInVBlank = 0;
- st_vars->LinesToRequestPrefetchPixelData = 0;
- st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__;
- st_vars->Tsw_est1 = 0;
- st_vars->Tsw_est3 = 0;
-
- if (GPUVMEnable == true && HostVMEnable == true)
- st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ unsigned int DPPCycles, DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double prefetch_bw_oto;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingMetaPTE = 0;
+ double TimeForFetchingRowInVBlank = 0;
+ double LinesToRequestPrefetchPixelData = 0;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips;
+ double Tr0_trips;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double Lsw_oto;
+ double Tpre_rounded;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double prefetch_sw_bytes;
+ double bytes_pp;
+ double dep_bytes;
+ unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
+ double min_Lsw;
+ double Tsw_est1 = 0;
+ double Tsw_est3 = 0;
+
+ if (v->GPUVMEnable == true && v->HostVMEnable == true)
+ HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
else
- st_vars->HostVMDynamicLevelsTrips = 0;
+ HostVMDynamicLevelsTrips = 0;
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
- dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
+ dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
+ dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
- dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
- __func__, HostVMEnable, HostVMInefficiencyFactor);
+ dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
+ __func__, v->HostVMEnable, HostVMInefficiencyFactor);
#endif
dml32_CalculateVUpdateAndDynamicMetadataParameters(
- MaxInterDCNTileRepeaters,
+ v->MaxInterDCNTileRepeaters,
myPipe->Dppclk,
myPipe->Dispclk,
myPipe->DCFClkDeepSleep,
myPipe->PixelClock,
myPipe->HTotal,
myPipe->VBlank,
- DynamicMetadataTransmittedBytes,
- DynamicMetadataLinesBeforeActiveRequired,
+ v->DynamicMetadataTransmittedBytes[k],
+ v->DynamicMetadataLinesBeforeActiveRequired[k],
myPipe->InterlaceEnable,
myPipe->ProgressiveToInterlaceUnitInOPP,
TSetup,
/* output */
- &st_vars->Tdmbf,
- &st_vars->Tdmec,
- &st_vars->Tdmsks,
+ &Tdmbf,
+ &Tdmec,
+ &Tdmsks,
VUpdateOffsetPix,
VUpdateWidthPix,
VReadyOffsetPix);
- st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock;
- st_vars->trip_to_mem = UrgentLatency;
- st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+ LineTime = myPipe->HTotal / myPipe->PixelClock;
+ trip_to_mem = UrgentLatency;
+ Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
- if (DynamicMetadataVMEnabled == true)
- *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem;
+ if (v->DynamicMetadataVMEnabled == true)
+ *Tdmdl = TWait + Tvm_trips + trip_to_mem;
else
*Tdmdl = TWait + UrgentExtraLatency;
#ifdef __DML_VBA_ALLOW_DELTA__
- if (DynamicMetadataEnable == false)
+ if (v->DynamicMetadataEnable[k] == false)
*Tdmdl = 0.0;
#endif
- if (DynamicMetadataEnable == true) {
- if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) {
+ if (v->DynamicMetadataEnable[k] == true) {
+ if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
*NotEnoughTimeForDynamicMetadata = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
- __func__, st_vars->Tdmbf);
- dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+ __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
- __func__, st_vars->Tdmsks);
+ __func__, Tdmsks);
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
__func__, *Tdmdl);
#endif
@@ -3481,22 +3515,22 @@ bool dml32_CalculatePrefetchSchedule(
*NotEnoughTimeForDynamicMetadata = false;
}
- *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
- GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0);
+ *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
+ v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
if (myPipe->ScalerEnabled)
- st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
else
- st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+ DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
- st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+ DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
- st_vars->DISPCLKCycles = DISPCLKDelaySubtotal;
+ DISPCLKCycles = v->DISPCLKDelaySubtotal;
if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
return true;
- *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles *
+ *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
@@ -3506,10 +3540,10 @@ bool dml32_CalculatePrefetchSchedule(
+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles);
+ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
- dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles);
+ dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
@@ -3517,14 +3551,14 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
#endif
- if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
+ if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
*DSTYAfterScaler = 1;
else
*DSTYAfterScaler = 0;
- st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
- *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
- *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+ DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+ *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+ *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
@@ -3532,132 +3566,132 @@ bool dml32_CalculatePrefetchSchedule(
MyError = false;
- st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1);
-
- if (GPUVMEnable == true) {
- st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
- if (GPUVMPageTableLevels >= 3) {
- *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem *
- (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
- } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) /
- 4.0 * st_vars->LineTime; // VBA_ERROR
+ Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
+
+ if (v->GPUVMEnable == true) {
+ Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
+ if (v->GPUVMMaxPageTableLevels >= 3) {
+ *Tno_bw = UrgentExtraLatency + trip_to_mem *
+ (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
+ } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
+ Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
+ 4.0 * LineTime; // VBA_ERROR
*Tno_bw = UrgentExtraLatency;
} else {
*Tno_bw = 0;
}
} else if (myPipe->DCCEnable == true) {
- st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
- st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
*Tno_bw = 0;
} else {
- st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
- st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0;
+ Tvm_trips_rounded = LineTime / 4.0;
+ Tr0_trips_rounded = LineTime / 2.0;
*Tno_bw = 0;
}
- st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0);
- st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0);
+ Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
+ Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
|| myPipe->SourcePixelFormat == dm_420_12) {
- st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
} else {
- st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+ bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
}
- st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
- st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
- st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime));
-
- st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre;
- st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0);
- st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0;
-
- if (GPUVMEnable == true) {
- st_vars->Tvm_oto = dml_max3(
- st_vars->Tvm_trips,
- *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto,
- st_vars->LineTime / 4.0);
+ prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
+ prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
+
+ min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+ min_Lsw = dml_max(min_Lsw, 1.0);
+ Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
+
+ if (v->GPUVMEnable == true) {
+ Tvm_oto = dml_max3(
+ Tvm_trips,
+ *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
+ LineTime / 4.0);
} else
- st_vars->Tvm_oto = st_vars->LineTime / 4.0;
-
- if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
- st_vars->Tr0_oto = dml_max4(
- st_vars->Tr0_trips,
- (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto,
- (st_vars->LineTime - st_vars->Tvm_oto)/2.0,
- st_vars->LineTime / 4.0);
+ Tvm_oto = LineTime / 4.0;
+
+ if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_oto = dml_max4(
+ Tr0_trips,
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
+ (LineTime - Tvm_oto)/2.0,
+ LineTime / 4.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
- (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto);
- dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips);
- dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto);
- dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4);
+ (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
#endif
} else
- st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0;
+ Tr0_oto = (LineTime - Tvm_oto) / 2.0;
- st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0;
- st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0;
- st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto;
+ Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+ Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+ dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
- st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime -
+ dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
- dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw);
+ dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
- dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
- dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes);
- dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
- dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips);
- dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips);
- dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto);
- dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto);
- dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto);
- dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines);
- dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines);
- dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto);
- dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto);
- dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
#endif
- st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0;
- st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime;
+ dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ Tpre_rounded = dst_y_prefetch_equ * LineTime;
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ);
- dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
+ dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
- __func__, VStartup * st_vars->LineTime);
+ __func__, VStartup * LineTime);
dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
- dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf);
- dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
__func__, *DSTYAfterScaler);
#endif
- st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
+ dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
- if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes)
- st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes;
+ if (prefetch_sw_bytes < dep_bytes)
+ prefetch_sw_bytes = 2 * dep_bytes;
*PrefetchBandwidth = 0;
*DestinationLinesToRequestVMInVBlank = 0;
@@ -3665,61 +3699,61 @@ bool dml32_CalculatePrefetchSchedule(
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
- if (st_vars->dst_y_prefetch_equ > 1) {
+ if (dst_y_prefetch_equ > 1) {
double PrefetchBandwidth1;
double PrefetchBandwidth2;
double PrefetchBandwidth3;
double PrefetchBandwidth4;
- if (st_vars->Tpre_rounded - *Tno_bw > 0) {
+ if (Tpre_rounded - *Tno_bw > 0) {
PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
- + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw);
- st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1;
+ + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+ Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
} else
PrefetchBandwidth1 = 0;
- if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw)
- && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) {
+ if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
+ && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw);
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
}
- if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0)
- PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) /
- (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded);
+ if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+ PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
+ (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
else
PrefetchBandwidth2 = 0;
- if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) {
+ if (Tpre_rounded - Tvm_trips_rounded > 0) {
PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
- + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded);
- st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3;
+ + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+ Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
} else
PrefetchBandwidth3 = 0;
if (VStartup == MaxVStartup &&
- (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 *
- st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) {
+ (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
+ LineTime - Tvm_trips_rounded > 0) {
PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded);
+ / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
}
- if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) {
- PrefetchBandwidth4 = st_vars->prefetch_sw_bytes /
- (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded);
+ if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
+ PrefetchBandwidth4 = prefetch_sw_bytes /
+ (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
} else {
PrefetchBandwidth4 = 0;
}
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded);
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
- dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded);
- dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1);
- dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
@@ -3732,9 +3766,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth1 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
- >= st_vars->Tvm_trips_rounded
+ >= Tvm_trips_rounded
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) {
+ / PrefetchBandwidth1 >= Tr0_trips_rounded) {
Case1OK = true;
} else {
Case1OK = false;
@@ -3745,9 +3779,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth2 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
- >= st_vars->Tvm_trips_rounded
+ >= Tvm_trips_rounded
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
- / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) {
+ / PrefetchBandwidth2 < Tr0_trips_rounded) {
Case2OK = true;
} else {
Case2OK = false;
@@ -3758,9 +3792,9 @@ bool dml32_CalculatePrefetchSchedule(
if (PrefetchBandwidth3 > 0) {
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
- st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
+ Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
- st_vars->Tr0_trips_rounded) {
+ Tr0_trips_rounded) {
Case3OK = true;
} else {
Case3OK = false;
@@ -3770,80 +3804,80 @@ bool dml32_CalculatePrefetchSchedule(
}
if (Case1OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth1;
+ prefetch_bw_equ = PrefetchBandwidth1;
else if (Case2OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth2;
+ prefetch_bw_equ = PrefetchBandwidth2;
else if (Case3OK)
- st_vars->prefetch_bw_equ = PrefetchBandwidth3;
+ prefetch_bw_equ = PrefetchBandwidth3;
else
- st_vars->prefetch_bw_equ = PrefetchBandwidth4;
+ prefetch_bw_equ = PrefetchBandwidth4;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
- dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
#endif
- if (st_vars->prefetch_bw_equ > 0) {
- if (GPUVMEnable == true) {
- st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
- HostVMInefficiencyFactor / st_vars->prefetch_bw_equ,
- st_vars->Tvm_trips, st_vars->LineTime / 4);
+ if (prefetch_bw_equ > 0) {
+ if (v->GPUVMEnable == true) {
+ Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
+ HostVMInefficiencyFactor / prefetch_bw_equ,
+ Tvm_trips, LineTime / 4);
} else {
- st_vars->Tvm_equ = st_vars->LineTime / 4;
+ Tvm_equ = LineTime / 4;
}
- if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
- st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
- HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips,
- (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4);
+ if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
+ Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
+ HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
+ (LineTime - Tvm_equ) / 2, LineTime / 4);
} else {
- st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2;
+ Tr0_equ = (LineTime - Tvm_equ) / 2;
}
} else {
- st_vars->Tvm_equ = 0;
- st_vars->Tr0_equ = 0;
+ Tvm_equ = 0;
+ Tr0_equ = 0;
#ifdef __DML_VBA_DEBUG__
dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
#endif
}
}
- if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) {
- *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto;
- st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto;
- st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto;
- *PrefetchBandwidth = st_vars->prefetch_bw_oto;
+ if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+ *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+ TimeForFetchingMetaPTE = Tvm_oto;
+ TimeForFetchingRowInVBlank = Tr0_oto;
+ *PrefetchBandwidth = prefetch_bw_oto;
} else {
- *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ;
- st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ;
- st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ;
- *PrefetchBandwidth = st_vars->prefetch_bw_equ;
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
}
- *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0;
+ *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
*DestinationLinesToRequestRowInVBlank =
- dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0;
+ dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
- st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
+ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
__func__, *DestinationLinesToRequestVMInVBlank);
- dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
__func__, *DestinationLinesToRequestRowInVBlank);
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
- dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
#endif
- if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) {
- *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData;
+ if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
+ *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
@@ -3851,12 +3885,12 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
#endif
if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
- if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
*VRatioPrefetchY =
dml_max((double) PrefetchSourceLinesY /
- st_vars->LinesToRequestPrefetchPixelData,
+ LinesToRequestPrefetchPixelData,
(double) MaxNumSwathY * SwathHeightY /
- (st_vars->LinesToRequestPrefetchPixelData -
+ (LinesToRequestPrefetchPixelData -
(VInitPreFillY - 3.0) / 2.0));
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
} else {
@@ -3870,7 +3904,7 @@ bool dml32_CalculatePrefetchSchedule(
#endif
}
- *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData;
+ *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
#ifdef __DML_VBA_DEBUG__
@@ -3879,11 +3913,11 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
#endif
if ((SwathHeightC > 4)) {
- if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+ if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
*VRatioPrefetchC =
dml_max(*VRatioPrefetchC,
(double) MaxNumSwathC * SwathHeightC /
- (st_vars->LinesToRequestPrefetchPixelData -
+ (LinesToRequestPrefetchPixelData -
(VInitPreFillC - 3.0) / 2.0));
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
} else {
@@ -3898,25 +3932,25 @@ bool dml32_CalculatePrefetchSchedule(
}
*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
- / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
- / st_vars->LineTime;
+ / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
+ / LineTime;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
__func__, *RequiredPrefetchPixDataBWLuma);
#endif
*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
- st_vars->LinesToRequestPrefetchPixelData
+ LinesToRequestPrefetchPixelData
* myPipe->BytePerPixelC
- * swath_width_chroma_ub / st_vars->LineTime;
+ * swath_width_chroma_ub / LineTime;
} else {
MyError = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
- __func__, st_vars->LinesToRequestPrefetchPixelData);
+ __func__, LinesToRequestPrefetchPixelData);
#endif
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
@@ -3925,15 +3959,15 @@ bool dml32_CalculatePrefetchSchedule(
}
#ifdef __DML_VBA_DEBUG__
dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
- (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime +
- 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE);
- dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE);
+ (double)LinesToRequestPrefetchPixelData * LineTime +
+ 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
- (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime);
+ (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
- dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime -
- st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
- ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup);
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
+ TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
+ ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
PixelPTEBytesPerRow);
#endif
@@ -3941,7 +3975,7 @@ bool dml32_CalculatePrefetchSchedule(
MyError = true;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
- __func__, st_vars->dst_y_prefetch_equ);
+ __func__, dst_y_prefetch_equ);
#endif
}
@@ -3957,10 +3991,10 @@ bool dml32_CalculatePrefetchSchedule(
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
__func__, *DestinationLinesToRequestVMInVBlank);
- dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
#endif
prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
- (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime);
+ (*DestinationLinesToRequestVMInVBlank * LineTime);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
#endif
@@ -3977,7 +4011,7 @@ bool dml32_CalculatePrefetchSchedule(
prefetch_row_bw = 0;
} else if (*DestinationLinesToRequestRowInVBlank > 0) {
prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
- (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime);
+ (*DestinationLinesToRequestRowInVBlank * LineTime);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
@@ -4000,12 +4034,12 @@ bool dml32_CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- st_vars->TimeForFetchingMetaPTE = 0;
- st_vars->TimeForFetchingRowInVBlank = 0;
+ TimeForFetchingMetaPTE = 0;
+ TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- st_vars->LinesToRequestPrefetchPixelData = 0;
+ LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -4159,59 +4193,28 @@ void dml32_CalculateFlipSchedule(
} // CalculateFlipSchedule
void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
- bool USRRetrainingRequiredFinal,
- enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ struct vba_vars_st *v,
unsigned int PrefetchMode,
- unsigned int NumberOfActiveSurfaces,
- unsigned int MaxLineBufferLines,
- unsigned int LineBufferSize,
- unsigned int WritebackInterfaceBufferSize,
double DCFCLK,
double ReturnBW,
- bool SynchronizeTimingsFinal,
- bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
- bool DRRDisplay[],
- unsigned int dpte_group_bytes[],
- unsigned int meta_row_height[],
- unsigned int meta_row_height_chroma[],
SOCParametersList mmSOCParameters,
- unsigned int WritebackChunkSize,
double SOCCLK,
double DCFClkDeepSleep,
unsigned int DETBufferSizeY[],
unsigned int DETBufferSizeC[],
unsigned int SwathHeightY[],
unsigned int SwathHeightC[],
- unsigned int LBBitPerPixel[],
double SwathWidthY[],
double SwathWidthC[],
- double HRatio[],
- double HRatioChroma[],
- unsigned int VTaps[],
- unsigned int VTapsChroma[],
- double VRatio[],
- double VRatioChroma[],
- unsigned int HTotal[],
- unsigned int VTotal[],
- unsigned int VActive[],
- double PixelClock[],
- unsigned int BlendingAndTiming[],
unsigned int DPPPerSurface[],
double BytePerPixelDETY[],
double BytePerPixelDETC[],
double DSTXAfterScaler[],
double DSTYAfterScaler[],
- bool WritebackEnable[],
- enum source_format_class WritebackPixelFormat[],
- double WritebackDestinationWidth[],
- double WritebackDestinationHeight[],
- double WritebackSourceHeight[],
bool UnboundedRequestEnabled,
unsigned int CompressedBufferSizeInkByte,
/* Output */
- Watermarks *Watermark,
enum clock_change_support *DRAMClockChangeSupport,
double MaxActiveDRAMClockChangeLatencySupported[],
unsigned int SubViewportLinesNeededInMALL[],
@@ -4221,229 +4224,251 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
double ActiveDRAMClockChangeLatencyMargin[])
{
unsigned int i, j, k;
-
- st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
- st_vars->DRAMClockChangeSupportNumber = 0;
- st_vars->DRAMClockChangeMethod = 0;
- st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
- st_vars->MinActiveFCLKChangeMargin = 0.;
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
- st_vars->TotalPixelBW = 0.0;
- st_vars->TotalActiveWriteback = 0;
-
- Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
- Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
+ unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
+ unsigned int DRAMClockChangeSupportNumber = 0;
+ unsigned int LastSurfaceWithoutMargin;
+ unsigned int DRAMClockChangeMethod = 0;
+ bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+ double MinActiveFCLKChangeMargin = 0.;
+ double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+ double ActiveClockChangeLatencyHidingY;
+ double ActiveClockChangeLatencyHidingC;
+ double ActiveClockChangeLatencyHiding;
+ double EffectiveDETBufferSizeY;
+ double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
+ double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
+ double TotalPixelBW = 0.0;
+ bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DC__NUM_DPP__MAX];
+ double LinesInDETC[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+ unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double WritebackFCLKChangeLatencyMargin;
+ double WritebackLatencyHiding;
+ bool SameTimingForFCLKChange;
+
+ unsigned int TotalActiveWriteback = 0;
+ unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
+ unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
+
+ v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
+ v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
- Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
- Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
- Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
+ v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
+ v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
+ v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
+ 10 / DCFClkDeepSleep;
- Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
+ v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
+ 10 / DCFClkDeepSleep;
- Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
+ v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
+ 10 / DCFClkDeepSleep;
- Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
+ v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
- dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
- dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
- dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
- dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
- dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
- dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
- dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
+ dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
+ dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
- __func__, Watermark->Z8StutterEnterPlusExitWatermark);
+ __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
#endif
- st_vars->TotalActiveWriteback = 0;
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- if (WritebackEnable[k] == true)
- st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
+ TotalActiveWriteback = 0;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->WritebackEnable[k] == true)
+ TotalActiveWriteback = TotalActiveWriteback + 1;
}
- if (st_vars->TotalActiveWriteback <= 1) {
- Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
+ if (TotalActiveWriteback <= 1) {
+ v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
} else {
- Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
- + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
+ + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
}
- if (USRRetrainingRequiredFinal)
- Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark
+ mmSOCParameters.USRRetrainingLatency;
- if (st_vars->TotalActiveWriteback <= 1) {
- Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ if (TotalActiveWriteback <= 1) {
+ v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ mmSOCParameters.WritebackLatency;
- Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ mmSOCParameters.WritebackLatency;
} else {
- Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
- + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
- Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
- + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
+ v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
+ + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
+ + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
}
- if (USRRetrainingRequiredFinal)
- Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
+ mmSOCParameters.USRRetrainingLatency;
- if (USRRetrainingRequiredFinal)
- Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
+ if (v->USRRetrainingRequiredFinal)
+ v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
+ mmSOCParameters.USRRetrainingLatency;
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
- __func__, Watermark->WritebackDRAMClockChangeWatermark);
- dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
- dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
- dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
+ __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
+ dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
+ dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
+ dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
#endif
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
- SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
+ SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
}
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
- st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
- st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
+ LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
+ LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
#ifdef __DML_VBA_DEBUG__
- dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
- dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize);
- dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]);
- dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]);
- dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
+ dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
+ dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal);
+ dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]);
+ dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]);
+ dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]);
#endif
- st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
- st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
- st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
+ EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
+ EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
+ EffectiveDETBufferSizeY = DETBufferSizeY[k];
if (UnboundedRequestEnabled) {
- st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
+ EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+ CompressedBufferSizeInkByte * 1024
- * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
- / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
+ * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
+ / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
}
- st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
- st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
- st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+ LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+ FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
- st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
- - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
+ ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+ - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
- if (NumberOfActiveSurfaces > 1) {
- st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
- - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
- / PixelClock[k] / VRatio[k];
+ if (v->NumberOfActiveSurfaces > 1) {
+ ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
+ - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
+ / v->PixelClock[k] / v->VRatio[k];
}
if (BytePerPixelDETC[k] > 0) {
- st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
- st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
- st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
- / VRatioChroma[k];
- st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
- - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
- / PixelClock[k];
- if (NumberOfActiveSurfaces > 1) {
- st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
- - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
- / PixelClock[k] / VRatioChroma[k];
+ LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
+ FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
+ / v->VRatioChroma[k];
+ ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+ - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
+ / v->PixelClock[k];
+ if (v->NumberOfActiveSurfaces > 1) {
+ ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
+ - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
+ / v->PixelClock[k] / v->VRatioChroma[k];
}
- st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
- st_vars->ActiveClockChangeLatencyHidingC);
+ ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
+ ActiveClockChangeLatencyHidingC);
} else {
- st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
+ ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
}
- ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
- - Watermark->DRAMClockChangeWatermark;
- st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
- - Watermark->FCLKChangeWatermark;
- st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
+ ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
+ - v->Watermark.DRAMClockChangeWatermark;
+ ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
+ - v->Watermark.FCLKChangeWatermark;
+ USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
- if (WritebackEnable[k]) {
- st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
- / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
- / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
- if (WritebackPixelFormat[k] == dm_444_64)
- st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
+ if (v->WritebackEnable[k]) {
+ WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
+ / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
+ / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
+ if (v->WritebackPixelFormat[k] == dm_444_64)
+ WritebackLatencyHiding = WritebackLatencyHiding / 2;
- st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
- - Watermark->WritebackDRAMClockChangeWatermark;
+ WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
+ - v->Watermark.WritebackDRAMClockChangeWatermark;
- st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
- - Watermark->WritebackFCLKChangeWatermark;
+ WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
+ - v->Watermark.WritebackFCLKChangeWatermark;
ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
- st_vars->WritebackFCLKChangeLatencyMargin);
- st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
- st_vars->WritebackDRAMClockChangeLatencyMargin);
+ WritebackFCLKChangeLatencyMargin);
+ ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
+ WritebackDRAMClockChangeLatencyMargin);
}
MaxActiveDRAMClockChangeLatencySupported[k] =
- (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
+ (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
0 :
(ActiveDRAMClockChangeLatencyMargin[k]
+ mmSOCParameters.DRAMClockChangeLatency);
}
- for (i = 0; i < NumberOfActiveSurfaces; ++i) {
- for (j = 0; j < NumberOfActiveSurfaces; ++j) {
+ for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
+ for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
if (i == j ||
- (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
- (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
- (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
- (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
- HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
- VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
- (DRRDisplay[i] || DRRDisplay[j]))) {
- st_vars->SynchronizedSurfaces[i][j] = true;
+ (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
+ (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
+ (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
+ (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
+ v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
+ v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
+ (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
+ SynchronizedSurfaces[i][j] = true;
} else {
- st_vars->SynchronizedSurfaces[i][j] = false;
+ SynchronizedSurfaces[i][j] = false;
}
}
}
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
- st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
- st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
- st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
- st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
+ ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
+ FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
+ MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
+ SurfaceWithMinActiveFCLKChangeMargin = k;
}
}
- *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
+ *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
- st_vars->SameTimingForFCLKChange = true;
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
- if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (st_vars->SameTimingForFCLKChange ||
- st_vars->ActiveFCLKChangeLatencyMargin[k] <
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
- st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
+ SameTimingForFCLKChange = true;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (SameTimingForFCLKChange ||
+ ActiveFCLKChangeLatencyMargin[k] <
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
+ SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
}
- st_vars->SameTimingForFCLKChange = false;
+ SameTimingForFCLKChange = false;
}
}
- if (st_vars->MinActiveFCLKChangeMargin > 0) {
+ if (MinActiveFCLKChangeMargin > 0) {
*FCLKChangeSupport = dm_fclock_change_vactive;
- } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
+ } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
(PrefetchMode <= 1)) {
*FCLKChangeSupport = dm_fclock_change_vblank;
} else {
@@ -4451,95 +4476,95 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
}
*USRRetrainingSupport = true;
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
- (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
+ (USRRetrainingLatencyMargin[k] < 0)) {
*USRRetrainingSupport = false;
}
}
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
- UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
- UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
+ v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
+ v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
ActiveDRAMClockChangeLatencyMargin[k] < 0) {
if (PrefetchMode > 0) {
- st_vars->DRAMClockChangeSupportNumber = 2;
- } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
- st_vars->DRAMClockChangeSupportNumber = 1;
- st_vars->LastSurfaceWithoutMargin = k;
- } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
- !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
- st_vars->DRAMClockChangeSupportNumber = 2;
+ DRAMClockChangeSupportNumber = 2;
+ } else if (DRAMClockChangeSupportNumber == 0) {
+ DRAMClockChangeSupportNumber = 1;
+ LastSurfaceWithoutMargin = k;
+ } else if (DRAMClockChangeSupportNumber == 1 &&
+ !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
+ DRAMClockChangeSupportNumber = 2;
}
}
}
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
- if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
- st_vars->DRAMClockChangeMethod = 1;
- else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
- st_vars->DRAMClockChangeMethod = 2;
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
+ if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
+ DRAMClockChangeMethod = 1;
+ else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
+ DRAMClockChangeMethod = 2;
}
- if (st_vars->DRAMClockChangeMethod == 0) {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ if (DRAMClockChangeMethod == 0) {
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
- } else if (st_vars->DRAMClockChangeMethod == 1) {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ } else if (DRAMClockChangeMethod == 1) {
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
} else {
- if (st_vars->DRAMClockChangeSupportNumber == 0)
+ if (DRAMClockChangeSupportNumber == 0)
*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
- else if (st_vars->DRAMClockChangeSupportNumber == 1)
+ else if (DRAMClockChangeSupportNumber == 1)
*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
else
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
}
- for (k = 0; k < NumberOfActiveSurfaces; ++k) {
+ for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
unsigned int dst_y_pstate;
unsigned int src_y_pstate_l;
unsigned int src_y_pstate_c;
unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
- dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
- src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
- src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
- sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
+ dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
+ src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
+ src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
+ sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
-dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
+dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
-dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]);
+dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]);
dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
#endif
SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
if (BytePerPixelDETC[k] > 0) {
- src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
- src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
- sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
+ src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
+ src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
+ sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
-dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]);
+dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]);
dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
#endif
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 37a314ce284b..924e361ad243 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -82,7 +82,6 @@ void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
double *DPPCLKUsingSingleDPP);
void dml32_CalculateSwathAndDETConfiguration(
- struct dml32_CalculateSwathAndDETConfiguration *st_vars,
unsigned int DETSizeOverride[],
enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
unsigned int ConfigReturnBufferSizeInKByte,
@@ -362,7 +361,6 @@ void dml32_CalculateSurfaceSizeInMall(
bool *ExceededMALLSize);
void dml32_CalculateVMRowAndSwath(
- struct dml32_CalculateVMRowAndSwath *st_vars,
unsigned int NumberOfActiveSurfaces,
DmlPipe myPipe[],
unsigned int SurfaceSizeInMALL[],
@@ -715,29 +713,14 @@ double dml32_CalculateExtraLatency(
unsigned int HostVMMaxNonCachedPageTableLevels);
bool dml32_CalculatePrefetchSchedule(
- struct dml32_CalculatePrefetchSchedule *st_vars,
+ struct vba_vars_st *v,
+ unsigned int k,
double HostVMInefficiencyFactor,
DmlPipe *myPipe,
unsigned int DSCDelay,
- double DPPCLKDelaySubtotalPlusCNVCFormater,
- double DPPCLKDelaySCL,
- double DPPCLKDelaySCLLBOnly,
- double DPPCLKDelayCNVCCursor,
- double DISPCLKDelaySubtotal,
unsigned int DPP_RECOUT_WIDTH,
- enum output_format_class OutputFormat,
- unsigned int MaxInterDCNTileRepeaters,
unsigned int VStartup,
unsigned int MaxVStartup,
- unsigned int GPUVMPageTableLevels,
- bool GPUVMEnable,
- bool HostVMEnable,
- unsigned int HostVMMaxNonCachedPageTableLevels,
- double HostVMMinPageSize,
- bool DynamicMetadataEnable,
- bool DynamicMetadataVMEnabled,
- int DynamicMetadataLinesBeforeActiveRequired,
- unsigned int DynamicMetadataTransmittedBytes,
double UrgentLatency,
double UrgentExtraLatency,
double TCalc,
@@ -811,59 +794,28 @@ void dml32_CalculateFlipSchedule(
bool *ImmediateFlipSupportedForPipe);
void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
- bool USRRetrainingRequiredFinal,
- enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ struct vba_vars_st *v,
unsigned int PrefetchMode,
- unsigned int NumberOfActiveSurfaces,
- unsigned int MaxLineBufferLines,
- unsigned int LineBufferSize,
- unsigned int WritebackInterfaceBufferSize,
double DCFCLK,
double ReturnBW,
- bool SynchronizeTimingsFinal,
- bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
- bool DRRDisplay[],
- unsigned int dpte_group_bytes[],
- unsigned int meta_row_height[],
- unsigned int meta_row_height_chroma[],
SOCParametersList mmSOCParameters,
- unsigned int WritebackChunkSize,
double SOCCLK,
double DCFClkDeepSleep,
unsigned int DETBufferSizeY[],
unsigned int DETBufferSizeC[],
unsigned int SwathHeightY[],
unsigned int SwathHeightC[],
- unsigned int LBBitPerPixel[],
double SwathWidthY[],
double SwathWidthC[],
- double HRatio[],
- double HRatioChroma[],
- unsigned int VTaps[],
- unsigned int VTapsChroma[],
- double VRatio[],
- double VRatioChroma[],
- unsigned int HTotal[],
- unsigned int VTotal[],
- unsigned int VActive[],
- double PixelClock[],
- unsigned int BlendingAndTiming[],
unsigned int DPPPerSurface[],
double BytePerPixelDETY[],
double BytePerPixelDETC[],
double DSTXAfterScaler[],
double DSTYAfterScaler[],
- bool WritebackEnable[],
- enum source_format_class WritebackPixelFormat[],
- double WritebackDestinationWidth[],
- double WritebackDestinationHeight[],
- double WritebackSourceHeight[],
bool UnboundedRequestEnabled,
unsigned int CompressedBufferSizeInkByte,
/* Output */
- Watermarks *Watermark,
enum clock_change_support *DRAMClockChangeSupport,
double MaxActiveDRAMClockChangeLatencySupported[],
unsigned int SubViewportLinesNeededInMALL[],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
index 84b4b00f29cb..c87091683b5d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -498,6 +498,13 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
}
+ if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000)
+ != dc->bb_overrides.fclk_clock_change_latency_ns
+ && dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dcn3_21_soc.fclk_change_latency_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+ }
+
if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000)
!= dc->bb_overrides.dummy_clock_change_latency_ns
&& dc->bb_overrides.dummy_clock_change_latency_ns) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
index 5d27ff0ebb5f..f5400eda07a5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
@@ -35,6 +35,8 @@
#include "dcn30/display_rq_dlg_calc_30.h"
#include "dcn31/display_mode_vba_31.h"
#include "dcn31/display_rq_dlg_calc_31.h"
+#include "dcn314/display_mode_vba_314.h"
+#include "dcn314/display_rq_dlg_calc_314.h"
#include "dcn32/display_mode_vba_32.h"
#include "dcn32/display_rq_dlg_calc_32.h"
#include "dml_logger.h"
@@ -74,6 +76,13 @@ const struct dml_funcs dml31_funcs = {
.rq_dlg_get_rq_reg = dml31_rq_dlg_get_rq_reg
};
+const struct dml_funcs dml314_funcs = {
+ .validate = dml314_ModeSupportAndSystemConfigurationFull,
+ .recalculate = dml314_recalculate,
+ .rq_dlg_get_dlg_reg = dml314_rq_dlg_get_dlg_reg,
+ .rq_dlg_get_rq_reg = dml314_rq_dlg_get_rq_reg
+};
+
const struct dml_funcs dml32_funcs = {
.validate = dml32_ModeSupportAndSystemConfigurationFull,
.recalculate = dml32_recalculate,
@@ -107,6 +116,9 @@ void dml_init_instance(struct display_mode_lib *lib,
case DML_PROJECT_DCN31_FPGA:
lib->funcs = dml31_funcs;
break;
+ case DML_PROJECT_DCN314:
+ lib->funcs = dml314_funcs;
+ break;
case DML_PROJECT_DCN32:
lib->funcs = dml32_funcs;
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
index 2bdd6ed22611..b1878a1440e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
@@ -41,6 +41,7 @@ enum dml_project {
DML_PROJECT_DCN30,
DML_PROJECT_DCN31,
DML_PROJECT_DCN31_FPGA,
+ DML_PROJECT_DCN314,
DML_PROJECT_DCN32,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 8460aefe7b6d..2051ddaa641a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -182,108 +182,6 @@ void Calculate256BBlockSizes(
unsigned int *BlockWidth256BytesY,
unsigned int *BlockWidth256BytesC);
-struct dml32_CalculateSwathAndDETConfiguration {
- unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
- unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
- unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
- unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
- unsigned int RoundedUpSwathSizeBytesY;
- unsigned int RoundedUpSwathSizeBytesC;
- double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
- double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
- unsigned int TotalActiveDPP;
- bool NoChromaSurfaces;
- unsigned int DETBufferSizeInKByteForSwathCalculation;
-};
-
-struct dml32_CalculateVMRowAndSwath {
- unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
- unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
- unsigned int PDEAndMetaPTEBytesFrameY;
- unsigned int PDEAndMetaPTEBytesFrameC;
- unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
- unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
- unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
- bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
-};
-
-struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport {
- unsigned int SurfaceWithMinActiveFCLKChangeMargin;
- unsigned int DRAMClockChangeSupportNumber;
- unsigned int LastSurfaceWithoutMargin;
- unsigned int DRAMClockChangeMethod;
- bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin;
- double MinActiveFCLKChangeMargin;
- double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank;
- double ActiveClockChangeLatencyHidingY;
- double ActiveClockChangeLatencyHidingC;
- double ActiveClockChangeLatencyHiding;
- double EffectiveDETBufferSizeY;
- double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
- double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
- double TotalPixelBW;
- bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
- double EffectiveLBLatencyHidingY;
- double EffectiveLBLatencyHidingC;
- double LinesInDETY[DC__NUM_DPP__MAX];
- double LinesInDETC[DC__NUM_DPP__MAX];
- unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
- unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
- double FullDETBufferingTimeY;
- double FullDETBufferingTimeC;
- double WritebackDRAMClockChangeLatencyMargin;
- double WritebackFCLKChangeLatencyMargin;
- double WritebackLatencyHiding;
- bool SameTimingForFCLKChange;
- unsigned int TotalActiveWriteback;
- unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
- unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
-};
-
-struct dml32_CalculatePrefetchSchedule {
- unsigned int DPPCycles, DISPCLKCycles;
- double DSTTotalPixelsAfterScaler;
- double LineTime;
- double dst_y_prefetch_equ;
- double prefetch_bw_oto;
- double Tvm_oto;
- double Tr0_oto;
- double Tvm_oto_lines;
- double Tr0_oto_lines;
- double dst_y_prefetch_oto;
- double TimeForFetchingMetaPTE;
- double TimeForFetchingRowInVBlank;
- double LinesToRequestPrefetchPixelData;
- unsigned int HostVMDynamicLevelsTrips;
- double trip_to_mem;
- double Tvm_trips;
- double Tr0_trips;
- double Tvm_trips_rounded;
- double Tr0_trips_rounded;
- double Lsw_oto;
- double Tpre_rounded;
- double prefetch_bw_equ;
- double Tvm_equ;
- double Tr0_equ;
- double Tdmbf;
- double Tdmec;
- double Tdmsks;
- double prefetch_sw_bytes;
- double bytes_pp;
- double dep_bytes;
- unsigned int max_vratio_pre;
- double min_Lsw;
- double Tsw_est1;
- double Tsw_est3;
-};
-
struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation {
unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX];
double dummy_single_array[2][DC__NUM_DPP__MAX];
@@ -355,10 +253,6 @@ struct dummy_vars {
struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull;
- struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration;
- struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath;
- struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport;
- struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule;
};
struct vba_vars_st {
@@ -757,10 +651,10 @@ struct vba_vars_st {
unsigned int OutputTypeAndRatePerState[DC__VOLTAGE_STATES][DC__NUM_DPP__MAX];
double RequiredDISPCLKPerSurface[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- unsigned int MicroTileHeightY[DC__NUM_DPP__MAX];
- unsigned int MicroTileHeightC[DC__NUM_DPP__MAX];
- unsigned int MicroTileWidthY[DC__NUM_DPP__MAX];
- unsigned int MicroTileWidthC[DC__NUM_DPP__MAX];
+ unsigned int MacroTileHeightY[DC__NUM_DPP__MAX];
+ unsigned int MacroTileHeightC[DC__NUM_DPP__MAX];
+ unsigned int MacroTileWidthY[DC__NUM_DPP__MAX];
+ unsigned int MacroTileWidthC[DC__NUM_DPP__MAX];
bool ImmediateFlipRequiredFinal;
bool DCCProgrammingAssumesScanDirectionUnknownFinal;
bool EnoughWritebackUnits;
@@ -906,8 +800,6 @@ struct vba_vars_st {
double PSCL_FACTOR[DC__NUM_DPP__MAX];
double PSCL_FACTOR_CHROMA[DC__NUM_DPP__MAX];
double MaximumVStartup[DC__VOLTAGE_STATES][2][DC__NUM_DPP__MAX];
- unsigned int MacroTileWidthY[DC__NUM_DPP__MAX];
- unsigned int MacroTileWidthC[DC__NUM_DPP__MAX];
double AlignedDCCMetaPitch[DC__NUM_DPP__MAX];
double AlignedYPitch[DC__NUM_DPP__MAX];
double AlignedCPitch[DC__NUM_DPP__MAX];
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index 5d2b028e5dad..d9f1b0a4fbd4 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -214,6 +214,7 @@ struct dummy_pstate_entry {
struct clk_bw_params {
unsigned int vram_type;
unsigned int num_channels;
+ unsigned int dram_channel_width_bytes;
unsigned int dispclk_vco_khz;
unsigned int dc_mode_softmax_memclk;
struct clk_limit_table clk_table;
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c
index db7b0b155374..226af06278ce 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c
@@ -116,7 +116,7 @@ static void setup_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx)
dto_params.timing = &pipe_ctx->stream->timing;
dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
- dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, link_enc->inst);
+ dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, stream_enc->inst);
dccg->funcs->enable_symclk32_se(dccg, stream_enc->inst, phyd32clk);
dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
stream_enc->funcs->enable_stream(stream_enc);
@@ -137,7 +137,7 @@ static void reset_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx)
stream_enc->funcs->disable(stream_enc);
dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
dccg->funcs->disable_symclk32_se(dccg, stream_enc->inst);
- dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, pipe_ctx->link_res.hpo_dp_link_enc->inst);
+ dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, stream_enc->inst);
}
static void setup_hpo_dp_stream_attribute(struct pipe_ctx *pipe_ctx)
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index ab06c7fc7452..9f3558c0ef11 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -244,13 +244,15 @@ enum {
#define ASICREV_IS_GC_10_3_7(eChipRev) ((eChipRev >= GC_10_3_7_A0) && (eChipRev < GC_10_3_7_UNKNOWN))
#define AMDGPU_FAMILY_GC_11_0_0 145
-#define AMDGPU_FAMILY_GC_11_0_2 148
+#define AMDGPU_FAMILY_GC_11_0_1 148
#define GC_11_0_0_A0 0x1
#define GC_11_0_2_A0 0x10
+#define GC_11_0_3_A0 0x20
#define GC_11_UNKNOWN 0xFF
#define ASICREV_IS_GC_11_0_0(eChipRev) (eChipRev < GC_11_0_2_A0)
-#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_UNKNOWN)
+#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_0_3_A0)
+#define ASICREV_IS_GC_11_0_3(eChipRev) (eChipRev >= GC_11_0_3_A0 && eChipRev < GC_11_UNKNOWN)
/*
* ASIC chip ID
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h
index f093b49c5e6e..3bf08a60c45c 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -119,13 +119,15 @@ enum dc_log_type {
LOG_HDMI_RETIMER_REDRIVER,
LOG_DSC,
LOG_SMU_MSG,
+ LOG_DC2RESERVED4,
+ LOG_DC2RESERVED5,
LOG_DWB,
LOG_GAMMA_DEBUG,
LOG_MAX_HW_POINTS,
LOG_ALL_TF_CHANNELS,
LOG_SAMPLE_1DLUT,
LOG_DP2,
- LOG_SECTION_TOTAL_COUNT
+ LOG_DC2RESERVED12,
};
#define DC_MIN_LOG_MASK ((1 << LOG_ERROR) | \
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index 859ffd8725c5..04f7656906ca 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -1600,6 +1600,7 @@ static void interpolate_user_regamma(uint32_t hw_points_num,
struct fixed31_32 lut2;
struct fixed31_32 delta_lut;
struct fixed31_32 delta_index;
+ const struct fixed31_32 one = dc_fixpt_from_int(1);
i = 0;
/* fixed_pt library has problems handling too small values */
@@ -1628,6 +1629,9 @@ static void interpolate_user_regamma(uint32_t hw_points_num,
} else
hw_x = coordinates_x[i].x;
+ if (dc_fixpt_le(one, hw_x))
+ hw_x = one;
+
norm_x = dc_fixpt_mul(norm_factor, hw_x);
index = dc_fixpt_floor(norm_x);
if (index < 0 || index > 255)
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index da09ba7589f7..0f39ab9dc5b4 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -613,10 +613,6 @@ static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr,
* Note: We should never go above the field rate of the mode timing set.
*/
infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
-
- /* FreeSync HDR */
- infopacket->sb[9] = 0;
- infopacket->sb[10] = 0;
}
static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
@@ -684,10 +680,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
/* PB16 : Reserved bits 7:1, FixedRate bit 0 */
infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0;
-
- //FreeSync HDR
- infopacket->sb[9] = 0;
- infopacket->sb[10] = 0;
}
static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
@@ -772,8 +764,7 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal,
/* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */
infopacket->hb2 = 0x09;
- *payload_size = 0x0A;
-
+ *payload_size = 0x09;
} else if (dc_is_dp_signal(signal)) {
/* HEADER */
@@ -822,9 +813,9 @@ static void build_vrr_infopacket_header_v3(enum signal_type signal,
infopacket->hb1 = version;
/* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length] */
- *payload_size = 0x10;
- infopacket->hb2 = *payload_size - 1; //-1 for checksum
+ infopacket->hb2 = 0x10;
+ *payload_size = 0x10;
} else if (dc_is_dp_signal(signal)) {
/* HEADER */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h
index 2ed95790a600..cf8d60c4df1b 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h
@@ -15243,6 +15243,8 @@
#define regBIF0_PCIE_TX_TRACKING_ADDR_HI_BASE_IDX 5
#define regBIF0_PCIE_TX_TRACKING_CTRL_STATUS 0x420186
#define regBIF0_PCIE_TX_TRACKING_CTRL_STATUS_BASE_IDX 5
+#define regBIF0_PCIE_TX_POWER_CTRL_1 0x420187
+#define regBIF0_PCIE_TX_POWER_CTRL_1_BASE_IDX 5
#define regBIF0_PCIE_TX_CTRL_4 0x42018b
#define regBIF0_PCIE_TX_CTRL_4_BASE_IDX 5
#define regBIF0_PCIE_TX_STATUS 0x420194
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h
index eb62a18fcc48..3d60c9e92548 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h
@@ -85627,6 +85627,19 @@
#define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_PORT_MASK 0x0000000EL
#define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_UNIT_ID_MASK 0x00007F00L
#define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_STATUS_VALID_MASK 0x00008000L
+//BIF0_PCIE_TX_POWER_CTRL_1
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN__SHIFT 0x0
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_DS_EN__SHIFT 0x1
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_SD_EN__SHIFT 0x2
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN__SHIFT 0x3
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_DS_EN__SHIFT 0x4
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_SD_EN__SHIFT 0x5
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_DS_EN_MASK 0x00000002L
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_SD_EN_MASK 0x00000004L
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_DS_EN_MASK 0x00000010L
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_SD_EN_MASK 0x00000020L
//BIF0_PCIE_TX_CTRL_4
#define BIF0_PCIE_TX_CTRL_4__TX_PORT_ACCESS_TIMER_SKEW__SHIFT 0x0
#define BIF0_PCIE_TX_CTRL_4__TX_PORT_ACCESS_TIMER_SKEW_MASK 0x0000000FL
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index 80dab1146439..50bfa513cb35 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -268,7 +268,8 @@ union MESAPI__ADD_QUEUE {
uint32_t is_tmz_queue : 1;
uint32_t map_kiq_utility_queue : 1;
uint32_t is_kfd_process : 1;
- uint32_t reserved : 22;
+ uint32_t trap_en : 1;
+ uint32_t reserved : 21;
};
struct MES_API_STATUS api_status;
uint64_t tma_addr;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 78620b0bd279..063f4a737605 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -24,12 +24,8 @@
#ifndef SMU13_DRIVER_IF_V13_0_0_H
#define SMU13_DRIVER_IF_V13_0_0_H
-// *** IMPORTANT ***
-// PMFW TEAM: Always increment the interface version on any change to this file
-#define SMU13_DRIVER_IF_VERSION 0x23
-
//Increment this version if SkuTable_t or BoardTable_t change
-#define PPTABLE_VERSION 0x1D
+#define PPTABLE_VERSION 0x24
#define NUM_GFXCLK_DPM_LEVELS 16
#define NUM_SOCCLK_DPM_LEVELS 8
@@ -1193,8 +1189,17 @@ typedef struct {
// SECTION: Advanced Options
uint32_t DebugOverrides;
+ // Section: Total Board Power idle vs active coefficients
+ uint8_t TotalBoardPowerSupport;
+ uint8_t TotalBoardPowerPadding[3];
+
+ int16_t TotalIdleBoardPowerM;
+ int16_t TotalIdleBoardPowerB;
+ int16_t TotalBoardPowerM;
+ int16_t TotalBoardPowerB;
+
// SECTION: Sku Reserved
- uint32_t Spare[64];
+ uint32_t Spare[61];
// Padding for MMHUB - do not modify this
uint32_t MmHubPadding[8];
@@ -1259,7 +1264,8 @@ typedef struct {
// SECTION: Clock Spread Spectrum
// UCLK Spread Spectrum
- uint16_t UclkSpreadPadding;
+ uint8_t UclkTrainingModeSpreadPercent;
+ uint8_t UclkSpreadPadding;
uint16_t UclkSpreadFreq; // kHz
// UCLK Spread Spectrum
@@ -1272,11 +1278,7 @@ typedef struct {
// Section: Memory Config
uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e
- uint8_t PaddingMem1[3];
-
- // Section: Total Board Power
- uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power
- uint16_t BoardPowerPadding;
+ uint8_t PaddingMem1[7];
// SECTION: UMC feature flags
uint8_t HsrEnabled;
@@ -1375,8 +1377,11 @@ typedef struct {
uint16_t Vcn1ActivityPercentage ;
uint32_t EnergyAccumulator;
- uint16_t AverageSocketPower ;
+ uint16_t AverageSocketPower;
+ uint16_t AverageTotalBoardPower;
+
uint16_t AvgTemperature[TEMP_COUNT];
+ uint16_t TempPadding;
uint8_t PcieRate ;
uint8_t PcieWidth ;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
index 76f695a1d065..ae2d337158f3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
@@ -27,7 +27,7 @@
// *** IMPORTANT ***
// SMU TEAM: Always increment the interface version if
// any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 4
+#define PMFW_DRIVER_IF_VERSION 5
typedef struct {
int32_t value;
@@ -197,6 +197,8 @@ typedef struct {
uint16_t SkinTemp;
uint16_t DeviceState;
+ uint16_t CurTemp; //[centi-Celsius]
+ uint16_t spare2;
} SmuMetrics_t;
typedef struct {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index c02e5e576728..f442bf085a31 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,9 +28,9 @@
#define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x30
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
@@ -291,5 +291,11 @@ int smu_v13_0_set_default_dpm_tables(struct smu_context *smu);
void smu_v13_0_set_smu_mailbox_registers(struct smu_context *smu);
int smu_v13_0_mode1_reset(struct smu_context *smu);
+
+int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu,
+ void **table,
+ uint32_t *size,
+ uint32_t pptable_id);
+
#endif
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index fa520d79ef67..644ea150e075 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -368,6 +368,17 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu)
smu_baco->platform_support =
(val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true :
false;
+
+ /*
+ * Disable BACO entry/exit completely on below SKUs to
+ * avoid hardware intermittent failures.
+ */
+ if (((adev->pdev->device == 0x73A1) &&
+ (adev->pdev->revision == 0x00)) ||
+ ((adev->pdev->device == 0x73BF) &&
+ (adev->pdev->revision == 0xCF)))
+ smu_baco->platform_support = false;
+
}
}
@@ -4283,6 +4294,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
.dump_pptable = sienna_cichlid_dump_pptable,
.init_microcode = smu_v11_0_init_microcode,
.load_microcode = smu_v11_0_load_microcode,
+ .fini_microcode = smu_v11_0_fini_microcode,
.init_smc_tables = sienna_cichlid_init_smc_tables,
.fini_smc_tables = smu_v11_0_fini_smc_tables,
.init_power = smu_v11_0_init_power,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index e8fe84f806d1..24488f4cb78c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -84,9 +84,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin");
static const int link_width[] = {0, 1, 2, 4, 8, 12, 16};
static const int link_speed[] = {25, 50, 80, 160};
-static int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, void **table, uint32_t *size,
- uint32_t pptable_id);
-
int smu_v13_0_init_microcode(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
@@ -212,6 +209,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
if (!adev->scpm_enabled)
return 0;
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7))
+ return 0;
+
/* override pptable_id from driver parameter */
if (amdgpu_smu_pptable_id >= 0) {
pptable_id = amdgpu_smu_pptable_id;
@@ -219,24 +219,17 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
} else {
pptable_id = smu->smu_table.boot_values.pp_table_id;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) &&
- pptable_id == 3667)
- pptable_id = 36671;
-
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) &&
- pptable_id == 3688)
- pptable_id = 36881;
/*
* Temporary solution for SMU V13.0.0 with SCPM enabled:
- * - use 36831 signed pptable when pp_table_id is 3683
- * - use 36641 signed pptable when pp_table_id is 3664 or 0
- * TODO: drop these when the pptable carried in vbios is ready.
+ * - use vbios carried pptable when pptable_id is 3664, 3715 or 3795
+ * - use 36831 soft pptable when pptable_id is 3683
*/
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) {
switch (pptable_id) {
- case 0:
case 3664:
- pptable_id = 36641;
+ case 3715:
+ case 3795:
+ pptable_id = 0;
break;
case 3683:
pptable_id = 36831;
@@ -425,8 +418,10 @@ static int smu_v13_0_get_pptable_from_vbios(struct smu_context *smu, void **tabl
return 0;
}
-static int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, void **table, uint32_t *size,
- uint32_t pptable_id)
+int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu,
+ void **table,
+ uint32_t *size,
+ uint32_t pptable_id)
{
const struct smc_firmware_header_v1_0 *hdr;
struct amdgpu_device *adev = smu->adev;
@@ -478,7 +473,7 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
/*
* Temporary solution for SMU V13.0.0 with SCPM disabled:
- * - use 3664 or 3683 on request
+ * - use 3664, 3683 or 3715 on request
* - use 3664 when pptable_id is 0
* TODO: drop these when the pptable carried in vbios is ready.
*/
@@ -489,6 +484,7 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
break;
case 3664:
case 3683:
+ case 3715:
break;
default:
dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
@@ -2344,8 +2340,8 @@ int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu)
index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG,
SMU_MSG_EnableGfxImu);
-
- return smu_cmn_send_msg_without_waiting(smu, index, 0);
+ /* Param 1 to tell PMFW to enable GFXOFF feature */
+ return smu_cmn_send_msg_without_waiting(smu, index, 1);
}
int smu_v13_0_od_edit_dpm_table(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 1bbeceeb9e3c..7db2fd9ea74a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -388,11 +388,29 @@ static int smu_v13_0_0_append_powerplay_table(struct smu_context *smu)
return 0;
}
-static int smu_v13_0_0_setup_pptable(struct smu_context *smu)
+static int smu_v13_0_0_get_pptable_from_pmfw(struct smu_context *smu,
+ void **table,
+ uint32_t *size)
{
struct smu_table_context *smu_table = &smu->smu_table;
void *combo_pptable = smu_table->combo_pptable;
+ int ret = 0;
+
+ ret = smu_cmn_get_combo_pptable(smu);
+ if (ret)
+ return ret;
+
+ *table = combo_pptable;
+ *size = sizeof(struct smu_13_0_0_powerplay_table);
+
+ return 0;
+}
+
+static int smu_v13_0_0_setup_pptable(struct smu_context *smu)
+{
+ struct smu_table_context *smu_table = &smu->smu_table;
struct amdgpu_device *adev = smu->adev;
+ uint32_t pptable_id;
int ret = 0;
/*
@@ -401,17 +419,51 @@ static int smu_v13_0_0_setup_pptable(struct smu_context *smu)
* rely on the combo pptable(and its revelant SMU message).
*/
if (adev->scpm_enabled) {
- ret = smu_cmn_get_combo_pptable(smu);
- if (ret)
- return ret;
-
- smu->smu_table.power_play_table = combo_pptable;
- smu->smu_table.power_play_table_size = sizeof(struct smu_13_0_0_powerplay_table);
+ ret = smu_v13_0_0_get_pptable_from_pmfw(smu,
+ &smu_table->power_play_table,
+ &smu_table->power_play_table_size);
} else {
- ret = smu_v13_0_setup_pptable(smu);
- if (ret)
- return ret;
+ /* override pptable_id from driver parameter */
+ if (amdgpu_smu_pptable_id >= 0) {
+ pptable_id = amdgpu_smu_pptable_id;
+ dev_info(adev->dev, "override pptable id %d\n", pptable_id);
+ } else {
+ pptable_id = smu_table->boot_values.pp_table_id;
+ }
+
+ /*
+ * Temporary solution for SMU V13.0.0 with SCPM disabled:
+ * - use vbios carried pptable when pptable_id is 3664, 3715 or 3795
+ * - use soft pptable when pptable_id is 3683
+ */
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) {
+ switch (pptable_id) {
+ case 3664:
+ case 3715:
+ case 3795:
+ pptable_id = 0;
+ break;
+ case 3683:
+ break;
+ default:
+ dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
+ return -EINVAL;
+ }
+ }
+
+ /* force using vbios pptable in sriov mode */
+ if ((amdgpu_sriov_vf(adev) || !pptable_id) && (amdgpu_emu_mode != 1))
+ ret = smu_v13_0_0_get_pptable_from_pmfw(smu,
+ &smu_table->power_play_table,
+ &smu_table->power_play_table_size);
+ else
+ ret = smu_v13_0_get_pptable_from_firmware(smu,
+ &smu_table->power_play_table,
+ &smu_table->power_play_table_size,
+ pptable_id);
}
+ if (ret)
+ return ret;
ret = smu_v13_0_0_store_powerplay_table(smu);
if (ret)
@@ -1792,7 +1844,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.dump_pptable = smu_v13_0_0_dump_pptable,
.init_microcode = smu_v13_0_init_microcode,
.load_microcode = smu_v13_0_load_microcode,
+ .fini_microcode = smu_v13_0_fini_microcode,
.init_smc_tables = smu_v13_0_0_init_smc_tables,
+ .fini_smc_tables = smu_v13_0_fini_smc_tables,
.init_power = smu_v13_0_init_power,
.fini_power = smu_v13_0_fini_power,
.check_fw_status = smu_v13_0_check_fw_status,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 82d3718d8324..97e1d55dcaad 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -71,7 +71,6 @@ static struct cmn2asic_msg_mapping smu_v13_0_4_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1),
MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1),
- MSG_MAP(EnableGfxOff, PPSMC_MSG_EnableGfxOff, 1),
MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1),
MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1),
@@ -199,6 +198,9 @@ static int smu_v13_0_4_fini_smc_tables(struct smu_context *smu)
kfree(smu_table->watermarks_table);
smu_table->watermarks_table = NULL;
+ kfree(smu_table->gpu_metrics_table);
+ smu_table->gpu_metrics_table = NULL;
+
return 0;
}
@@ -226,18 +228,6 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
return ret;
}
-static int smu_v13_0_4_post_smu_init(struct smu_context *smu)
-{
- struct amdgpu_device *adev = smu->adev;
- int ret = 0;
-
- /* allow message will be sent after enable message */
- ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxOff, NULL);
- if (ret)
- dev_err(adev->dev, "Failed to Enable GfxOff!\n");
- return ret;
-}
-
static ssize_t smu_v13_0_4_get_gpu_metrics(struct smu_context *smu,
void **table)
{
@@ -1026,7 +1016,6 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = {
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_driver_table_location = smu_v13_0_set_driver_table_location,
.gfx_off_control = smu_v13_0_gfx_off_control,
- .post_init = smu_v13_0_4_post_smu_init,
.mode2_reset = smu_v13_0_4_mode2_reset,
.get_dpm_ultimate_freq = smu_v13_0_4_get_dpm_ultimate_freq,
.od_edit_dpm_table = smu_v13_0_od_edit_dpm_table,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
index 47360ef5c175..66445964efbd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
@@ -176,6 +176,9 @@ static int smu_v13_0_5_fini_smc_tables(struct smu_context *smu)
kfree(smu_table->watermarks_table);
smu_table->watermarks_table = NULL;
+ kfree(smu_table->gpu_metrics_table);
+ smu_table->gpu_metrics_table = NULL;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 9dd56e73218b..c422bf8a09b1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -120,6 +120,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 0),
MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0),
+ MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0),
};
static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -400,11 +401,27 @@ static int smu_v13_0_7_append_powerplay_table(struct smu_context *smu)
return 0;
}
+static int smu_v13_0_7_get_pptable_from_pmfw(struct smu_context *smu,
+ void **table,
+ uint32_t *size)
+{
+ struct smu_table_context *smu_table = &smu->smu_table;
+ void *combo_pptable = smu_table->combo_pptable;
+ int ret = 0;
+
+ ret = smu_cmn_get_combo_pptable(smu);
+ if (ret)
+ return ret;
+
+ *table = combo_pptable;
+ *size = sizeof(struct smu_13_0_7_powerplay_table);
+
+ return 0;
+}
static int smu_v13_0_7_setup_pptable(struct smu_context *smu)
{
struct smu_table_context *smu_table = &smu->smu_table;
- void *combo_pptable = smu_table->combo_pptable;
struct amdgpu_device *adev = smu->adev;
int ret = 0;
@@ -413,18 +430,11 @@ static int smu_v13_0_7_setup_pptable(struct smu_context *smu)
* be used directly by driver. To get the raw pptable, we need to
* rely on the combo pptable(and its revelant SMU message).
*/
- if (adev->scpm_enabled) {
- ret = smu_cmn_get_combo_pptable(smu);
- if (ret)
- return ret;
-
- smu->smu_table.power_play_table = combo_pptable;
- smu->smu_table.power_play_table_size = sizeof(struct smu_13_0_7_powerplay_table);
- } else {
- ret = smu_v13_0_setup_pptable(smu);
- if (ret)
- return ret;
- }
+ ret = smu_v13_0_7_get_pptable_from_pmfw(smu,
+ &smu_table->power_play_table,
+ &smu_table->power_play_table_size);
+ if (ret)
+ return ret;
ret = smu_v13_0_7_store_powerplay_table(smu);
if (ret)
@@ -1567,6 +1577,16 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
return ret;
}
+static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+
+ /* SRIOV does not support SMU mode1 reset */
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+ return true;
+}
static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -1574,7 +1594,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.dump_pptable = smu_v13_0_7_dump_pptable,
.init_microcode = smu_v13_0_init_microcode,
.load_microcode = smu_v13_0_load_microcode,
+ .fini_microcode = smu_v13_0_fini_microcode,
.init_smc_tables = smu_v13_0_7_init_smc_tables,
+ .fini_smc_tables = smu_v13_0_fini_smc_tables,
.init_power = smu_v13_0_init_power,
.fini_power = smu_v13_0_fini_power,
.check_fw_status = smu_v13_0_7_check_fw_status,
@@ -1624,6 +1646,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.baco_set_state = smu_v13_0_baco_set_state,
.baco_enter = smu_v13_0_baco_enter,
.baco_exit = smu_v13_0_baco_exit,
+ .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
+ .mode1_reset = smu_v13_0_mode1_reset,
.set_mp1_state = smu_v13_0_7_set_mp1_state,
};
diff --git a/drivers/gpu/drm/bridge/lvds-codec.c b/drivers/gpu/drm/bridge/lvds-codec.c
index 702ea803a743..39e7004de720 100644
--- a/drivers/gpu/drm/bridge/lvds-codec.c
+++ b/drivers/gpu/drm/bridge/lvds-codec.c
@@ -180,7 +180,7 @@ static int lvds_codec_probe(struct platform_device *pdev)
of_node_put(bus_node);
if (ret == -ENODEV) {
dev_warn(dev, "missing 'data-mapping' DT property\n");
- } else if (ret) {
+ } else if (ret < 0) {
dev_err(dev, "invalid 'data-mapping' DT property\n");
return ret;
} else {
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 493922069c90..01ee3febb813 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -377,8 +377,8 @@ static int vrr_range_show(struct seq_file *m, void *data)
if (connector->status != connector_status_connected)
return -ENODEV;
- seq_printf(m, "Min: %u\n", (u8)connector->display_info.monitor_range.min_vfreq);
- seq_printf(m, "Max: %u\n", (u8)connector->display_info.monitor_range.max_vfreq);
+ seq_printf(m, "Min: %u\n", connector->display_info.monitor_range.min_vfreq);
+ seq_printf(m, "Max: %u\n", connector->display_info.monitor_range.max_vfreq);
return 0;
}
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index bbc25e3b7220..eaa819381281 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -5971,12 +5971,14 @@ static void drm_parse_cea_ext(struct drm_connector *connector,
}
static
-void get_monitor_range(const struct detailed_timing *timing,
- void *info_monitor_range)
+void get_monitor_range(const struct detailed_timing *timing, void *c)
{
- struct drm_monitor_range_info *monitor_range = info_monitor_range;
+ struct detailed_mode_closure *closure = c;
+ struct drm_display_info *info = &closure->connector->display_info;
+ struct drm_monitor_range_info *monitor_range = &info->monitor_range;
const struct detailed_non_pixel *data = &timing->data.other_data;
const struct detailed_data_monitor_range *range = &data->data.range;
+ const struct edid *edid = closure->drm_edid->edid;
if (!is_display_descriptor(timing, EDID_DETAIL_MONITOR_RANGE))
return;
@@ -5992,18 +5994,28 @@ void get_monitor_range(const struct detailed_timing *timing,
monitor_range->min_vfreq = range->min_vfreq;
monitor_range->max_vfreq = range->max_vfreq;
+
+ if (edid->revision >= 4) {
+ if (data->pad2 & DRM_EDID_RANGE_OFFSET_MIN_VFREQ)
+ monitor_range->min_vfreq += 255;
+ if (data->pad2 & DRM_EDID_RANGE_OFFSET_MAX_VFREQ)
+ monitor_range->max_vfreq += 255;
+ }
}
static void drm_get_monitor_range(struct drm_connector *connector,
const struct drm_edid *drm_edid)
{
- struct drm_display_info *info = &connector->display_info;
+ const struct drm_display_info *info = &connector->display_info;
+ struct detailed_mode_closure closure = {
+ .connector = connector,
+ .drm_edid = drm_edid,
+ };
if (!version_greater(drm_edid, 1, 1))
return;
- drm_for_each_detailed_block(drm_edid, get_monitor_range,
- &info->monitor_range);
+ drm_for_each_detailed_block(drm_edid, get_monitor_range, &closure);
DRM_DEBUG_KMS("Supported Monitor Refresh rate range is %d Hz - %d Hz\n",
info->monitor_range.min_vfreq,
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 86d670c71286..ad068865ba20 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -168,21 +168,6 @@ void drm_gem_private_object_init(struct drm_device *dev,
}
EXPORT_SYMBOL(drm_gem_private_object_init);
-static void
-drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp)
-{
- /*
- * Note: obj->dma_buf can't disappear as long as we still hold a
- * handle reference in obj->handle_count.
- */
- mutex_lock(&filp->prime.lock);
- if (obj->dma_buf) {
- drm_prime_remove_buf_handle_locked(&filp->prime,
- obj->dma_buf);
- }
- mutex_unlock(&filp->prime.lock);
-}
-
/**
* drm_gem_object_handle_free - release resources bound to userspace handles
* @obj: GEM object to clean up.
@@ -253,7 +238,7 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
if (obj->funcs->close)
obj->funcs->close(obj, file_priv);
- drm_gem_remove_prime_handles(obj, file_priv);
+ drm_prime_remove_buf_handle(&file_priv->prime, id);
drm_vma_node_revoke(&obj->vma_node, file_priv);
drm_gem_object_handle_put_unlocked(obj);
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 1fbbc19f1ac0..7bb98e6a446d 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -74,8 +74,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
-void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
- struct dma_buf *dma_buf);
+void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
+ uint32_t handle);
/* drm_drv.c */
struct drm_minor *drm_minor_acquire(unsigned int minor_id);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index a3f180653b8b..eb09e86044c6 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -190,29 +190,33 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
return -ENOENT;
}
-void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
- struct dma_buf *dma_buf)
+void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
+ uint32_t handle)
{
struct rb_node *rb;
- rb = prime_fpriv->dmabufs.rb_node;
+ mutex_lock(&prime_fpriv->lock);
+
+ rb = prime_fpriv->handles.rb_node;
while (rb) {
struct drm_prime_member *member;
- member = rb_entry(rb, struct drm_prime_member, dmabuf_rb);
- if (member->dma_buf == dma_buf) {
+ member = rb_entry(rb, struct drm_prime_member, handle_rb);
+ if (member->handle == handle) {
rb_erase(&member->handle_rb, &prime_fpriv->handles);
rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs);
- dma_buf_put(dma_buf);
+ dma_buf_put(member->dma_buf);
kfree(member);
- return;
- } else if (member->dma_buf < dma_buf) {
+ break;
+ } else if (member->handle < handle) {
rb = rb->rb_right;
} else {
rb = rb->rb_left;
}
}
+
+ mutex_unlock(&prime_fpriv->lock);
}
void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv)
diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index dd32b484dd82..ce96234f3df2 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -581,11 +581,9 @@ static const struct psb_offset cdv_regmap[2] = {
static int cdv_chip_setup(struct drm_device *dev)
{
struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
- struct pci_dev *pdev = to_pci_dev(dev->dev);
INIT_WORK(&dev_priv->hotplug_work, cdv_hotplug_work_func);
- if (pci_enable_msi(pdev))
- dev_warn(dev->dev, "Enabling MSI failed!\n");
+ dev_priv->use_msi = true;
dev_priv->regmap = cdv_regmap;
gma_get_core_freq(dev);
psb_intel_opregion_init(dev);
diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
index dffe37490206..4b7627a72637 100644
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -112,12 +112,12 @@ static void psb_gem_free_object(struct drm_gem_object *obj)
{
struct psb_gem_object *pobj = to_psb_gem_object(obj);
- drm_gem_object_release(obj);
-
/* Undo the mmap pin if we are destroying the object */
if (pobj->mmapping)
psb_gem_unpin(pobj);
+ drm_gem_object_release(obj);
+
WARN_ON(pobj->in_gart && !pobj->stolen);
release_resource(&pobj->resource);
diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
index bd40c040a2c9..2f52eceda3a1 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -532,15 +532,18 @@ int gma_crtc_page_flip(struct drm_crtc *crtc,
WARN_ON(drm_crtc_vblank_get(crtc) != 0);
gma_crtc->page_flip_event = event;
+ spin_unlock_irqrestore(&dev->event_lock, flags);
/* Call this locked if we want an event at vblank interrupt. */
ret = crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb);
if (ret) {
- gma_crtc->page_flip_event = NULL;
- drm_crtc_vblank_put(crtc);
+ spin_lock_irqsave(&dev->event_lock, flags);
+ if (gma_crtc->page_flip_event) {
+ gma_crtc->page_flip_event = NULL;
+ drm_crtc_vblank_put(crtc);
+ }
+ spin_unlock_irqrestore(&dev->event_lock, flags);
}
-
- spin_unlock_irqrestore(&dev->event_lock, flags);
} else {
ret = crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb);
}
diff --git a/drivers/gpu/drm/gma500/oaktrail_device.c b/drivers/gpu/drm/gma500/oaktrail_device.c
index 5923a9c89312..f90e628cb482 100644
--- a/drivers/gpu/drm/gma500/oaktrail_device.c
+++ b/drivers/gpu/drm/gma500/oaktrail_device.c
@@ -501,12 +501,9 @@ static const struct psb_offset oaktrail_regmap[2] = {
static int oaktrail_chip_setup(struct drm_device *dev)
{
struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
- struct pci_dev *pdev = to_pci_dev(dev->dev);
int ret;
- if (pci_enable_msi(pdev))
- dev_warn(dev->dev, "Enabling MSI failed!\n");
-
+ dev_priv->use_msi = true;
dev_priv->regmap = oaktrail_regmap;
ret = mid_chip_setup(dev);
diff --git a/drivers/gpu/drm/gma500/power.c b/drivers/gpu/drm/gma500/power.c
index b91de6d36e41..66873085d450 100644
--- a/drivers/gpu/drm/gma500/power.c
+++ b/drivers/gpu/drm/gma500/power.c
@@ -139,8 +139,6 @@ static void gma_suspend_pci(struct pci_dev *pdev)
dev_priv->regs.saveBSM = bsm;
pci_read_config_dword(pdev, 0xFC, &vbt);
dev_priv->regs.saveVBT = vbt;
- pci_read_config_dword(pdev, PSB_PCIx_MSI_ADDR_LOC, &dev_priv->msi_addr);
- pci_read_config_dword(pdev, PSB_PCIx_MSI_DATA_LOC, &dev_priv->msi_data);
pci_disable_device(pdev);
pci_set_power_state(pdev, PCI_D3hot);
@@ -168,9 +166,6 @@ static bool gma_resume_pci(struct pci_dev *pdev)
pci_restore_state(pdev);
pci_write_config_dword(pdev, 0x5c, dev_priv->regs.saveBSM);
pci_write_config_dword(pdev, 0xFC, dev_priv->regs.saveVBT);
- /* restoring MSI address and data in PCIx space */
- pci_write_config_dword(pdev, PSB_PCIx_MSI_ADDR_LOC, dev_priv->msi_addr);
- pci_write_config_dword(pdev, PSB_PCIx_MSI_DATA_LOC, dev_priv->msi_data);
ret = pci_enable_device(pdev);
if (ret != 0)
@@ -223,8 +218,7 @@ int gma_power_resume(struct device *_dev)
mutex_lock(&power_mutex);
gma_resume_pci(pdev);
gma_resume_display(pdev);
- gma_irq_preinstall(dev);
- gma_irq_postinstall(dev);
+ gma_irq_install(dev);
mutex_unlock(&power_mutex);
return 0;
}
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 1d8744f3e702..54e756b48606 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -383,7 +383,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long flags)
PSB_WVDC32(0xFFFFFFFF, PSB_INT_MASK_R);
spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags);
- gma_irq_install(dev, pdev->irq);
+ gma_irq_install(dev);
dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index 0ea3d23575f3..731cc356c07a 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -490,6 +490,7 @@ struct drm_psb_private {
int rpm_enabled;
/* MID specific */
+ bool use_msi;
bool has_gct;
struct oaktrail_gct_data gct_data;
@@ -499,10 +500,6 @@ struct drm_psb_private {
/* Register state */
struct psb_save_area regs;
- /* MSI reg save */
- uint32_t msi_addr;
- uint32_t msi_data;
-
/* Hotplug handling */
struct work_struct hotplug_work;
diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c
index e6e6d61bbeab..038f18ed0a95 100644
--- a/drivers/gpu/drm/gma500/psb_irq.c
+++ b/drivers/gpu/drm/gma500/psb_irq.c
@@ -316,17 +316,24 @@ void gma_irq_postinstall(struct drm_device *dev)
spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags);
}
-int gma_irq_install(struct drm_device *dev, unsigned int irq)
+int gma_irq_install(struct drm_device *dev)
{
+ struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
int ret;
- if (irq == IRQ_NOTCONNECTED)
+ if (dev_priv->use_msi && pci_enable_msi(pdev)) {
+ dev_warn(dev->dev, "Enabling MSI failed!\n");
+ dev_priv->use_msi = false;
+ }
+
+ if (pdev->irq == IRQ_NOTCONNECTED)
return -ENOTCONN;
gma_irq_preinstall(dev);
/* PCI devices require shared interrupts. */
- ret = request_irq(irq, gma_irq_handler, IRQF_SHARED, dev->driver->name, dev);
+ ret = request_irq(pdev->irq, gma_irq_handler, IRQF_SHARED, dev->driver->name, dev);
if (ret)
return ret;
@@ -369,6 +376,8 @@ void gma_irq_uninstall(struct drm_device *dev)
spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags);
free_irq(pdev->irq, dev);
+ if (dev_priv->use_msi)
+ pci_disable_msi(pdev);
}
int gma_crtc_enable_vblank(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/gma500/psb_irq.h b/drivers/gpu/drm/gma500/psb_irq.h
index b51e395194ff..7648f69824a5 100644
--- a/drivers/gpu/drm/gma500/psb_irq.h
+++ b/drivers/gpu/drm/gma500/psb_irq.h
@@ -17,7 +17,7 @@ struct drm_device;
void gma_irq_preinstall(struct drm_device *dev);
void gma_irq_postinstall(struct drm_device *dev);
-int gma_irq_install(struct drm_device *dev, unsigned int irq);
+int gma_irq_install(struct drm_device *dev);
void gma_irq_uninstall(struct drm_device *dev);
int gma_crtc_enable_vblank(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
index 6d11e7938c83..f84d39762a72 100644
--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
@@ -23,9 +23,6 @@
#define DRIVER_MAJOR 1
#define DRIVER_MINOR 0
-#define PCI_VENDOR_ID_MICROSOFT 0x1414
-#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353
-
DEFINE_DRM_GEM_FOPS(hv_fops);
static struct drm_driver hyperv_driver = {
@@ -133,7 +130,6 @@ static int hyperv_vmbus_probe(struct hv_device *hdev,
}
ret = hyperv_setup_vram(hv, hdev);
-
if (ret)
goto err_vmbus_close;
@@ -150,18 +146,20 @@ static int hyperv_vmbus_probe(struct hv_device *hdev,
ret = hyperv_mode_config_init(hv);
if (ret)
- goto err_vmbus_close;
+ goto err_free_mmio;
ret = drm_dev_register(dev, 0);
if (ret) {
drm_err(dev, "Failed to register drm driver.\n");
- goto err_vmbus_close;
+ goto err_free_mmio;
}
drm_fbdev_generic_setup(dev, 0);
return 0;
+err_free_mmio:
+ vmbus_free_mmio(hv->mem->start, hv->fb_size);
err_vmbus_close:
vmbus_close(hdev->channel);
err_hv_set_drv_data:
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 5dcfa7feffa9..1390729401a0 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1629,6 +1629,8 @@ static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder,
/* FIXME: initialize from VBT */
vdsc_cfg->rc_model_size = DSC_RC_MODEL_SIZE_CONST;
+ vdsc_cfg->pic_height = crtc_state->hw.adjusted_mode.crtc_vdisplay;
+
ret = intel_dsc_compute_params(crtc_state);
if (ret)
return ret;
@@ -2070,7 +2072,14 @@ void icl_dsi_init(struct drm_i915_private *dev_priv)
else
intel_dsi->ports = BIT(port);
+ if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports))
+ intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports;
+
intel_dsi->dcs_backlight_ports = intel_connector->panel.vbt.dsi.bl_ports;
+
+ if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports))
+ intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports;
+
intel_dsi->dcs_cabc_ports = intel_connector->panel.vbt.dsi.cabc_ports;
for_each_dsi_port(port, intel_dsi->ports) {
diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
index 110fc98ec280..f5e1d692976e 100644
--- a/drivers/gpu/drm/i915/display/intel_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_backlight.c
@@ -16,6 +16,7 @@
#include "intel_dsi_dcs_backlight.h"
#include "intel_panel.h"
#include "intel_pci_config.h"
+#include "intel_pps.h"
/**
* scale - scale values from one range to another
@@ -971,26 +972,24 @@ int intel_backlight_device_register(struct intel_connector *connector)
if (!name)
return -ENOMEM;
- bd = backlight_device_register(name, connector->base.kdev, connector,
- &intel_backlight_device_ops, &props);
-
- /*
- * Using the same name independent of the drm device or connector
- * prevents registration of multiple backlight devices in the
- * driver. However, we need to use the default name for backward
- * compatibility. Use unique names for subsequent backlight devices as a
- * fallback when the default name already exists.
- */
- if (IS_ERR(bd) && PTR_ERR(bd) == -EEXIST) {
+ bd = backlight_device_get_by_name(name);
+ if (bd) {
+ put_device(&bd->dev);
+ /*
+ * Using the same name independent of the drm device or connector
+ * prevents registration of multiple backlight devices in the
+ * driver. However, we need to use the default name for backward
+ * compatibility. Use unique names for subsequent backlight devices as a
+ * fallback when the default name already exists.
+ */
kfree(name);
name = kasprintf(GFP_KERNEL, "card%d-%s-backlight",
i915->drm.primary->index, connector->base.name);
if (!name)
return -ENOMEM;
-
- bd = backlight_device_register(name, connector->base.kdev, connector,
- &intel_backlight_device_ops, &props);
}
+ bd = backlight_device_register(name, connector->base.kdev, connector,
+ &intel_backlight_device_ops, &props);
if (IS_ERR(bd)) {
drm_err(&i915->drm,
@@ -1773,9 +1772,13 @@ void intel_backlight_init_funcs(struct intel_panel *panel)
panel->backlight.pwm_funcs = &i9xx_pwm_funcs;
}
- if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP &&
- intel_dp_aux_init_backlight_funcs(connector) == 0)
- return;
+ if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) {
+ if (intel_dp_aux_init_backlight_funcs(connector) == 0)
+ return;
+
+ if (!(dev_priv->quirks & QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK))
+ connector->panel.backlight.power = intel_pps_backlight_power;
+ }
/* We're using a standard PWM backlight interface */
panel->backlight.funcs = &pwm_bl_funcs;
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 51dde5bfd956..7d6eb9ad7a02 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -479,6 +479,13 @@ init_bdb_block(struct drm_i915_private *i915,
block_size = get_blocksize(block);
+ /*
+ * Version number and new block size are considered
+ * part of the header for MIPI sequenece block v3+.
+ */
+ if (section_id == BDB_MIPI_SEQUENCE && *(const u8 *)block >= 3)
+ block_size += 5;
+
entry = kzalloc(struct_size(entry, data, max(min_size, block_size) + 3),
GFP_KERNEL);
if (!entry) {
@@ -1596,6 +1603,8 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915,
struct intel_panel *panel,
enum port port)
{
+ enum port port_bc = DISPLAY_VER(i915) >= 11 ? PORT_B : PORT_C;
+
if (!panel->vbt.dsi.config->dual_link || i915->vbt.version < 197) {
panel->vbt.dsi.bl_ports = BIT(port);
if (panel->vbt.dsi.config->cabc_supported)
@@ -1609,11 +1618,11 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915,
panel->vbt.dsi.bl_ports = BIT(PORT_A);
break;
case DL_DCS_PORT_C:
- panel->vbt.dsi.bl_ports = BIT(PORT_C);
+ panel->vbt.dsi.bl_ports = BIT(port_bc);
break;
default:
case DL_DCS_PORT_A_AND_C:
- panel->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(PORT_C);
+ panel->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(port_bc);
break;
}
@@ -1625,12 +1634,12 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915,
panel->vbt.dsi.cabc_ports = BIT(PORT_A);
break;
case DL_DCS_PORT_C:
- panel->vbt.dsi.cabc_ports = BIT(PORT_C);
+ panel->vbt.dsi.cabc_ports = BIT(port_bc);
break;
default:
case DL_DCS_PORT_A_AND_C:
panel->vbt.dsi.cabc_ports =
- BIT(PORT_A) | BIT(PORT_C);
+ BIT(PORT_A) | BIT(port_bc);
break;
}
}
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 79269d2c476b..3699869ab2db 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -404,15 +404,17 @@ static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
int clpchgroup;
int j;
- if (i < num_groups - 1)
- bi_next = &dev_priv->max_bw[i + 1];
-
clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
- if (i < num_groups - 1 && clpchgroup < clperchgroup)
- bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
- else
- bi_next->num_planes = 0;
+ if (i < num_groups - 1) {
+ bi_next = &dev_priv->max_bw[i + 1];
+
+ if (clpchgroup < clperchgroup)
+ bi_next->num_planes = (ipqdepth - clpchgroup) /
+ clpchgroup + 1;
+ else
+ bi_next->num_planes = 0;
+ }
bi->num_qgv_points = qi.num_points;
bi->num_psf_gv_points = qi.num_psf_points;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 32292c0be2bd..3ed7eeacc706 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -389,23 +389,13 @@ static int dg2_max_source_rate(struct intel_dp *intel_dp)
return intel_dp_is_edp(intel_dp) ? 810000 : 1350000;
}
-static bool is_low_voltage_sku(struct drm_i915_private *i915, enum phy phy)
-{
- u32 voltage;
-
- voltage = intel_de_read(i915, ICL_PORT_COMP_DW3(phy)) & VOLTAGE_INFO_MASK;
-
- return voltage == VOLTAGE_INFO_0_85V;
-}
-
static int icl_max_source_rate(struct intel_dp *intel_dp)
{
struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port);
- if (intel_phy_is_combo(dev_priv, phy) &&
- (is_low_voltage_sku(dev_priv, phy) || !intel_dp_is_edp(intel_dp)))
+ if (intel_phy_is_combo(dev_priv, phy) && !intel_dp_is_edp(intel_dp))
return 540000;
return 810000;
@@ -413,23 +403,7 @@ static int icl_max_source_rate(struct intel_dp *intel_dp)
static int ehl_max_source_rate(struct intel_dp *intel_dp)
{
- struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
- struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
- enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port);
-
- if (intel_dp_is_edp(intel_dp) || is_low_voltage_sku(dev_priv, phy))
- return 540000;
-
- return 810000;
-}
-
-static int dg1_max_source_rate(struct intel_dp *intel_dp)
-{
- struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
- struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
- enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
-
- if (intel_phy_is_combo(i915, phy) && is_low_voltage_sku(i915, phy))
+ if (intel_dp_is_edp(intel_dp))
return 540000;
return 810000;
@@ -491,7 +465,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp)
max_rate = dg2_max_source_rate(intel_dp);
else if (IS_ALDERLAKE_P(dev_priv) || IS_ALDERLAKE_S(dev_priv) ||
IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv))
- max_rate = dg1_max_source_rate(intel_dp);
+ max_rate = 810000;
else if (IS_JSL_EHL(dev_priv))
max_rate = ehl_max_source_rate(intel_dp);
else
@@ -1395,6 +1369,7 @@ static int intel_dp_dsc_compute_params(struct intel_encoder *encoder,
* DP_DSC_RC_BUF_SIZE for this.
*/
vdsc_cfg->rc_model_size = DSC_RC_MODEL_SIZE_CONST;
+ vdsc_cfg->pic_height = crtc_state->hw.adjusted_mode.crtc_vdisplay;
/*
* Slice Height of 8 works for all currently available panels. So start
@@ -5293,8 +5268,6 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
intel_panel_init(intel_connector);
- if (!(dev_priv->quirks & QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK))
- intel_connector->panel.backlight.power = intel_pps_backlight_power;
intel_backlight_setup(intel_connector, pipe);
intel_edp_add_properties(intel_dp);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 9feaf1a589f3..d213d8ad1ea5 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -671,6 +671,28 @@ intel_dp_prepare_link_train(struct intel_dp *intel_dp,
intel_dp_compute_rate(intel_dp, crtc_state->port_clock,
&link_bw, &rate_select);
+ /*
+ * WaEdpLinkRateDataReload
+ *
+ * Parade PS8461E MUX (used on varius TGL+ laptops) needs
+ * to snoop the link rates reported by the sink when we
+ * use LINK_RATE_SET in order to operate in jitter cleaning
+ * mode (as opposed to redriver mode). Unfortunately it
+ * loses track of the snooped link rates when powered down,
+ * so we need to make it re-snoop often. Without this high
+ * link rates are not stable.
+ */
+ if (!link_bw) {
+ struct intel_connector *connector = intel_dp->attached_connector;
+ __le16 sink_rates[DP_MAX_SUPPORTED_RATES];
+
+ drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Reloading eDP link rates\n",
+ connector->base.base.id, connector->base.name);
+
+ drm_dp_dpcd_read(&intel_dp->aux, DP_SUPPORTED_LINK_RATES,
+ sink_rates, sizeof(sink_rates));
+ }
+
if (link_bw)
drm_dbg_kms(&i915->drm,
"[ENCODER:%d:%s] Using LINK_BW_SET value %02x\n",
diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c
index c8488f5ebd04..e415cd7c0b84 100644
--- a/drivers/gpu/drm/i915/display/intel_quirks.c
+++ b/drivers/gpu/drm/i915/display/intel_quirks.c
@@ -191,6 +191,9 @@ static struct intel_quirk intel_quirks[] = {
/* ASRock ITX*/
{ 0x3185, 0x1849, 0x2212, quirk_increase_ddi_disabled_time },
{ 0x3184, 0x1849, 0x2212, quirk_increase_ddi_disabled_time },
+ /* ECS Liva Q2 */
+ { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
+ { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time },
};
void intel_init_quirks(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 43e1bbc1e303..ca530f0733e0 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -460,7 +460,6 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config)
u8 i = 0;
vdsc_cfg->pic_width = pipe_config->hw.adjusted_mode.crtc_hdisplay;
- vdsc_cfg->pic_height = pipe_config->hw.adjusted_mode.crtc_vdisplay;
vdsc_cfg->slice_width = DIV_ROUND_UP(vdsc_cfg->pic_width,
pipe_config->dsc.slice_count);
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index b9b1fed99874..35136d26e517 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -1933,7 +1933,14 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
else
intel_dsi->ports = BIT(port);
+ if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports))
+ intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports;
+
intel_dsi->dcs_backlight_ports = intel_connector->panel.vbt.dsi.bl_ports;
+
+ if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports))
+ intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports;
+
intel_dsi->dcs_cabc_ports = intel_connector->panel.vbt.dsi.cabc_ports;
/* Create a DSI host (and a device) for each port. */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ccec4055fde3..85482a04d158 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -268,7 +268,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
*/
void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
{
- assert_object_held(obj);
+ assert_object_held_shared(obj);
if (!list_empty(&obj->vma.list)) {
struct i915_vma *vma;
@@ -331,15 +331,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
continue;
}
- if (!i915_gem_object_trylock(obj, NULL)) {
- /* busy, toss it back to the pile */
- if (llist_add(&obj->freed, &i915->mm.free_list))
- queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10));
- continue;
- }
-
__i915_gem_object_pages_fini(obj);
- i915_gem_object_unlock(obj);
__i915_gem_free_object(obj);
/* But keep the pointer alive for RCU-protected lookups */
@@ -359,7 +351,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915)
static void __i915_gem_free_work(struct work_struct *work)
{
struct drm_i915_private *i915 =
- container_of(work, struct drm_i915_private, mm.free_work.work);
+ container_of(work, struct drm_i915_private, mm.free_work);
i915_gem_flush_free_objects(i915);
}
@@ -391,7 +383,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
*/
if (llist_add(&obj->freed, &i915->mm.free_list))
- queue_delayed_work(i915->wq, &i915->mm.free_work, 0);
+ queue_work(i915->wq, &i915->mm.free_work);
}
void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
@@ -731,6 +723,9 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj)
bool lmem_placement = false;
int i;
+ if (!HAS_FLAT_CCS(to_i915(obj->base.dev)))
+ return false;
+
for (i = 0; i < obj->mm.n_placements; i++) {
/* Compression is not allowed for the objects with smem placement */
if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM)
@@ -745,7 +740,7 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj)
void i915_gem_init__objects(struct drm_i915_private *i915)
{
- INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work);
+ INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
}
void i915_objects_module_exit(void)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5cf36a130061..9f6b14ec189a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -335,7 +335,6 @@ struct drm_i915_gem_object {
#define I915_BO_READONLY BIT(7)
#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
/**
* @mem_flags - Mutable placement-related flags
*
@@ -616,6 +615,8 @@ struct drm_i915_gem_object {
* pages were last acquired.
*/
bool dirty:1;
+
+ u32 tlb;
} mm;
struct {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 97c820eee115..8357dbdcab5c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -6,14 +6,15 @@
#include <drm/drm_cache.h>
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+
#include "i915_drv.h"
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"
-#include "gt/intel_gt.h"
-
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
@@ -190,6 +191,18 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
vunmap(ptr);
}
+static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_gt *gt = to_gt(i915);
+
+ if (!obj->mm.tlb)
+ return;
+
+ intel_gt_invalidate_tlb(gt, obj->mm.tlb);
+ obj->mm.tlb = 0;
+}
+
struct sg_table *
__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
{
@@ -215,13 +228,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_reset_page_iter(obj);
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
- if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- intel_wakeref_t wakeref;
-
- with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
- intel_gt_invalidate_tlbs(to_gt(i915));
- }
+ flush_tlb_invalidate(obj);
return pages;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index f131dc065f47..6f3ab7ade41a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -297,7 +297,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
i915_tt->is_shmem = true;
}
- if (HAS_FLAT_CCS(i915) && i915_gem_object_needs_ccs_pages(obj))
+ if (i915_gem_object_needs_ccs_pages(obj))
ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size,
NUM_BYTES_PER_CCS_BYTE),
PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 68c2b0d8f187..f435e06125aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -11,7 +11,9 @@
#include "pxp/intel_pxp.h"
#include "i915_drv.h"
+#include "i915_perf_oa_regs.h"
#include "intel_context.h"
+#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_ggtt_gmch.h"
#include "intel_gt.h"
@@ -36,8 +38,6 @@ static void __intel_gt_init_early(struct intel_gt *gt)
{
spin_lock_init(&gt->irq_lock);
- mutex_init(&gt->tlb_invalidate_lock);
-
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
@@ -48,6 +48,8 @@ static void __intel_gt_init_early(struct intel_gt *gt)
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
intel_gt_init_timelines(gt);
+ mutex_init(&gt->tlb.invalidate_lock);
+ seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
intel_gt_pm_init_early(gt);
intel_uc_init_early(&gt->uc);
@@ -768,6 +770,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
intel_gt_fini_timelines(gt);
+ mutex_destroy(&gt->tlb.invalidate_lock);
intel_engines_free(gt);
}
}
@@ -906,7 +909,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
return rb;
}
-void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+static void mmio_invalidate_full(struct intel_gt *gt)
{
static const i915_reg_t gen8_regs[] = {
[RENDER_CLASS] = GEN8_RTCR,
@@ -924,13 +927,11 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
+ intel_engine_mask_t awake, tmp;
enum intel_engine_id id;
const i915_reg_t *regs;
unsigned int num = 0;
- if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
- return;
-
if (GRAPHICS_VER(i915) == 12) {
regs = gen12_regs;
num = ARRAY_SIZE(gen12_regs);
@@ -945,28 +946,41 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
"Platform does not implement TLB invalidation!"))
return;
- GEM_TRACE("\n");
-
- assert_rpm_wakelock_held(&i915->runtime_pm);
-
- mutex_lock(&gt->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+ awake = 0;
for_each_engine(engine, gt, id) {
struct reg_and_bit rb;
+ if (!intel_engine_pm_is_awake(engine))
+ continue;
+
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ awake |= engine->mask;
}
+ GT_TRACE(gt, "invalidated engines %08x\n", awake);
+
+ /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
+ if (awake &&
+ (IS_TIGERLAKE(i915) ||
+ IS_DG1(i915) ||
+ IS_ROCKETLAKE(i915) ||
+ IS_ALDERLAKE_S(i915) ||
+ IS_ALDERLAKE_P(i915)))
+ intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
+
spin_unlock_irq(&uncore->lock);
- for_each_engine(engine, gt, id) {
+ for_each_engine_masked(engine, gt, awake, tmp) {
+ struct reg_and_bit rb;
+
/*
* HW architecture suggest typical invalidation time at 40us,
* with pessimistic cases up to 100us and a recommendation to
@@ -974,12 +988,8 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
*/
const unsigned int timeout_us = 100;
const unsigned int timeout_ms = 4;
- struct reg_and_bit rb;
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- if (!i915_mmio_reg_offset(rb.reg))
- continue;
-
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
@@ -996,5 +1006,38 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
* transitions.
*/
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
- mutex_unlock(&gt->tlb_invalidate_lock);
+}
+
+static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+{
+ u32 cur = intel_gt_tlb_seqno(gt);
+
+ /* Only skip if a *full* TLB invalidate barrier has passed */
+ return (s32)(cur - ALIGN(seqno, 2)) > 0;
+}
+
+void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
+{
+ intel_wakeref_t wakeref;
+
+ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ return;
+
+ if (intel_gt_is_wedged(gt))
+ return;
+
+ if (tlb_seqno_passed(gt, seqno))
+ return;
+
+ with_intel_gt_pm_if_awake(gt, wakeref) {
+ mutex_lock(&gt->tlb.invalidate_lock);
+ if (tlb_seqno_passed(gt, seqno))
+ goto unlock;
+
+ mmio_invalidate_full(gt);
+
+ write_seqcount_invalidate(&gt->tlb.seqno);
+unlock:
+ mutex_unlock(&gt->tlb.invalidate_lock);
+ }
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 82d6f248d876..40b06adf509a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -101,6 +101,16 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
-void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
+{
+ return seqprop_sequence(&gt->tlb.seqno);
+}
+
+static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
+{
+ return intel_gt_tlb_seqno(gt) | 1;
+}
+
+void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index bc898df7a48c..a334787a4939 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
intel_gt_pm_put(gt), tmp = 0)
+#define with_intel_gt_pm_if_awake(gt, wf) \
+ for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index df708802889d..3804a583382b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -11,6 +11,7 @@
#include <linux/llist.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
+#include <linux/seqlock.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/workqueue.h>
@@ -83,7 +84,22 @@ struct intel_gt {
struct intel_uc uc;
struct intel_gsc gsc;
- struct mutex tlb_invalidate_lock;
+ struct {
+ /* Serialize global tlb invalidations */
+ struct mutex invalidate_lock;
+
+ /*
+ * Batch TLB invalidations
+ *
+ * After unbinding the PTE, we need to ensure the TLB
+ * are invalidated prior to releasing the physical pages.
+ * But we only need one such invalidation for all unbinds,
+ * so we track how many TLB invalidations have been
+ * performed since unbind the PTE and only emit an extra
+ * invalidate if no full barrier has been passed.
+ */
+ seqcount_mutex_t seqno;
+ } tlb;
struct i915_wa_list wa_list;
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
index 14fe65812e42..1d19c073ba2e 100644
--- a/drivers/gpu/drm/i915/gt/intel_llc.c
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -12,6 +12,7 @@
#include "intel_llc.h"
#include "intel_mchbar_regs.h"
#include "intel_pcode.h"
+#include "intel_rps.h"
struct ia_constants {
unsigned int min_gpu_freq;
@@ -55,9 +56,6 @@ static bool get_ia_constants(struct intel_llc *llc,
if (!HAS_LLC(i915) || IS_DGFX(i915))
return false;
- if (rps->max_freq <= rps->min_freq)
- return false;
-
consts->max_ia_freq = cpu_max_MHz();
consts->min_ring_freq =
@@ -65,13 +63,8 @@ static bool get_ia_constants(struct intel_llc *llc,
/* convert DDR frequency from units of 266.6MHz to bandwidth */
consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3);
- consts->min_gpu_freq = rps->min_freq;
- consts->max_gpu_freq = rps->max_freq;
- if (GRAPHICS_VER(i915) >= 9) {
- /* Convert GT frequency to 50 HZ units */
- consts->min_gpu_freq /= GEN9_FREQ_SCALER;
- consts->max_gpu_freq /= GEN9_FREQ_SCALER;
- }
+ consts->min_gpu_freq = intel_rps_get_min_raw_freq(rps);
+ consts->max_gpu_freq = intel_rps_get_max_raw_freq(rps);
return true;
}
@@ -131,6 +124,12 @@ static void gen6_update_ring_freq(struct intel_llc *llc)
return;
/*
+ * Although this is unlikely on any platform during initialization,
+ * let's ensure we don't get accidentally into infinite loop
+ */
+ if (consts.max_gpu_freq <= consts.min_gpu_freq)
+ return;
+ /*
* For each potential GPU frequency, load a ring frequency we'd like
* to use for memory access. We do this by specifying the IA frequency
* the PCU should use as a reference to determine the ring frequency.
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 2c35324b5f68..933648cc90ff 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -638,9 +638,9 @@ static int emit_copy(struct i915_request *rq,
return 0;
}
-static int scatter_list_length(struct scatterlist *sg)
+static u64 scatter_list_length(struct scatterlist *sg)
{
- int len = 0;
+ u64 len = 0;
while (sg && sg_dma_len(sg)) {
len += sg_dma_len(sg);
@@ -650,28 +650,26 @@ static int scatter_list_length(struct scatterlist *sg)
return len;
}
-static void
+static int
calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
- int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy)
+ u64 bytes_to_cpy, u64 ccs_bytes_to_cpy)
{
- if (ccs_bytes_to_cpy) {
- if (!src_is_lmem)
- /*
- * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
- * will be taken for the blt. in Flat-ccs supported
- * platform Smem obj will have more pages than required
- * for main meory hence limit it to the required size
- * for main memory
- */
- *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ);
- } else { /* ccs handling is not required */
- *src_sz = CHUNK_SZ;
- }
+ if (ccs_bytes_to_cpy && !src_is_lmem)
+ /*
+ * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
+ * will be taken for the blt. in Flat-ccs supported
+ * platform Smem obj will have more pages than required
+ * for main meory hence limit it to the required size
+ * for main memory
+ */
+ return min_t(u64, bytes_to_cpy, CHUNK_SZ);
+ else
+ return CHUNK_SZ;
}
-static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy)
+static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy)
{
- u32 len;
+ u64 len;
do {
GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg));
@@ -702,13 +700,13 @@ intel_context_migrate_copy(struct intel_context *ce,
{
struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
struct drm_i915_private *i915 = ce->engine->i915;
- u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
+ u64 ccs_bytes_to_cpy = 0, bytes_to_cpy;
enum i915_cache_level ccs_cache_level;
u32 src_offset, dst_offset;
u8 src_access, dst_access;
struct i915_request *rq;
- int src_sz, dst_sz;
- bool ccs_is_src;
+ u64 src_sz, dst_sz;
+ bool ccs_is_src, overwrite_ccs;
int err;
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
@@ -749,6 +747,8 @@ intel_context_migrate_copy(struct intel_context *ce,
get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
}
+ overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem;
+
src_offset = 0;
dst_offset = CHUNK_SZ;
if (HAS_64K_PAGES(ce->engine->i915)) {
@@ -788,8 +788,8 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
- calculate_chunk_sz(i915, src_is_lmem, &src_sz,
- bytes_to_cpy, ccs_bytes_to_cpy);
+ src_sz = calculate_chunk_sz(i915, src_is_lmem,
+ bytes_to_cpy, ccs_bytes_to_cpy);
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
src_offset, src_sz);
@@ -852,6 +852,25 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
ccs_bytes_to_cpy -= ccs_sz;
+ } else if (overwrite_ccs) {
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ /*
+ * While we can't always restore/manage the CCS state,
+ * we still need to ensure we don't leak the CCS state
+ * from the previous user, so make sure we overwrite it
+ * with something.
+ */
+ err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS,
+ dst_offset, DIRECT_ACCESS, len);
+ if (err)
+ goto out_rq;
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
}
/* Arbitration is re-enabled between requests. */
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index d8b94d638559..6ee8d1127016 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
void ppgtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res)
{
- if (vma_res->allocated)
- vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+ if (!vma_res->allocated)
+ return;
+
+ vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+ if (vma_res->tlb)
+ vma_invalidate_tlb(vm, vma_res->tlb);
}
static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 6e90032e12e9..aa6aed837194 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -15,6 +15,7 @@
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
+#ifdef CONFIG_64BIT
static void _release_bars(struct pci_dev *pdev)
{
int resno;
@@ -111,6 +112,9 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t
pci_assign_unassigned_bus_resources(pdev->bus);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
}
+#else
+static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {}
+#endif
static int
region_lmem_release(struct intel_memory_region *mem)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index fb3f57ee450b..7bb967034679 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2126,6 +2126,31 @@ u32 intel_rps_get_max_frequency(struct intel_rps *rps)
return intel_gpu_freq(rps, rps->max_freq_softlimit);
}
+/**
+ * intel_rps_get_max_raw_freq - returns the max frequency in some raw format.
+ * @rps: the intel_rps structure
+ *
+ * Returns the max frequency in a raw format. In newer platforms raw is in
+ * units of 50 MHz.
+ */
+u32 intel_rps_get_max_raw_freq(struct intel_rps *rps)
+{
+ struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+ u32 freq;
+
+ if (rps_uses_slpc(rps)) {
+ return DIV_ROUND_CLOSEST(slpc->rp0_freq,
+ GT_FREQUENCY_MULTIPLIER);
+ } else {
+ freq = rps->max_freq;
+ if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
+ /* Convert GT frequency to 50 MHz units */
+ freq /= GEN9_FREQ_SCALER;
+ }
+ return freq;
+ }
+}
+
u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
{
struct intel_guc_slpc *slpc = rps_to_slpc(rps);
@@ -2214,6 +2239,31 @@ u32 intel_rps_get_min_frequency(struct intel_rps *rps)
return intel_gpu_freq(rps, rps->min_freq_softlimit);
}
+/**
+ * intel_rps_get_min_raw_freq - returns the min frequency in some raw format.
+ * @rps: the intel_rps structure
+ *
+ * Returns the min frequency in a raw format. In newer platforms raw is in
+ * units of 50 MHz.
+ */
+u32 intel_rps_get_min_raw_freq(struct intel_rps *rps)
+{
+ struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+ u32 freq;
+
+ if (rps_uses_slpc(rps)) {
+ return DIV_ROUND_CLOSEST(slpc->min_freq,
+ GT_FREQUENCY_MULTIPLIER);
+ } else {
+ freq = rps->min_freq;
+ if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
+ /* Convert GT frequency to 50 MHz units */
+ freq /= GEN9_FREQ_SCALER;
+ }
+ return freq;
+ }
+}
+
static int set_min_freq(struct intel_rps *rps, u32 val)
{
int ret = 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index 1e8d56491308..4509dfdc52e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -37,8 +37,10 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
u32 intel_rps_get_min_frequency(struct intel_rps *rps);
+u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val);
u32 intel_rps_get_max_frequency(struct intel_rps *rps);
+u32 intel_rps_get_max_raw_freq(struct intel_rps *rps);
int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val);
u32 intel_rps_get_rp0_frequency(struct intel_rps *rps);
u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 76916aed897a..3e91f44829e9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1438,7 +1438,12 @@ void intel_guc_busyness_park(struct intel_gt *gt)
if (!guc_submission_initialized(guc))
return;
- cancel_delayed_work(&guc->timestamp.work);
+ /*
+ * There is a race with suspend flow where the worker runs after suspend
+ * and causes an unclaimed register access warning. Cancel the worker
+ * synchronously here.
+ */
+ cancel_delayed_work_sync(&guc->timestamp.work);
/*
* Before parking, we should sample engine busyness stats if we need to.
@@ -4027,6 +4032,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
xa_destroy(&guc->context_lookup);
/*
+ * A reset might have occurred while we had a pending stalled request,
+ * so make sure we clean that up.
+ */
+ guc->stalled_request = NULL;
+ guc->submission_stall_reason = STALL_NONE;
+
+ /*
* Some contexts might have been pinned before we enabled GuC
* submission, so we need to add them to the GuC bookeeping.
* Also, after a reset the of the GuC we want to make sure that the
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 557f3314291a..3b81a6d35a7b 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -298,7 +298,7 @@ no_enough_resource:
}
/**
- * inte_gvt_free_vgpu_resource - free HW resource owned by a vGPU
+ * intel_vgpu_free_resource() - free HW resource owned by a vGPU
* @vgpu: a vGPU
*
* This function is used to free the HW resource owned by a vGPU.
@@ -328,7 +328,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu)
}
/**
- * intel_alloc_vgpu_resource - allocate HW resource for a vGPU
+ * intel_vgpu_alloc_resource() - allocate HW resource for a vGPU
* @vgpu: vGPU
* @param: vGPU creation params
*
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index b4f69364f9a1..ce0eb03709c3 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2341,7 +2341,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
gvt_vgpu_err("fail to populate guest ggtt entry\n");
/* guest driver may read/write the entry when partial
* update the entry in this situation p2m will fail
- * settting the shadow entry to point to a scratch page
+ * setting the shadow entry to point to a scratch page
*/
ops->set_pfn(&m, gvt->gtt.scratch_mfn);
} else
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index beea5895e499..61423da36710 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -905,7 +905,7 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu,
else if (FDI_RX_IMR_TO_PIPE(offset) != INVALID_INDEX)
index = FDI_RX_IMR_TO_PIPE(offset);
else {
- gvt_vgpu_err("Unsupport registers %x\n", offset);
+ gvt_vgpu_err("Unsupported registers %x\n", offset);
return -EINVAL;
}
@@ -3052,7 +3052,7 @@ int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
}
/**
- * intel_t_default_mmio_write - default MMIO write handler
+ * intel_vgpu_default_mmio_write() - default MMIO write handler
* @vgpu: a vGPU
* @offset: access offset
* @p_data: write data buffer
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index c85bafe7539e..1c6e941c9666 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -546,7 +546,7 @@ static void switch_mmio(struct intel_vgpu *pre,
}
/**
- * intel_gvt_switch_render_mmio - switch mmio context of specific engine
+ * intel_gvt_switch_mmio - switch mmio context of specific engine
* @pre: the last vGPU that own the engine
* @next: the vGPU to switch to
* @engine: the engine
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d25647be25d1..086bbe8945d6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -247,7 +247,7 @@ struct i915_gem_mm {
* List of objects which are pending destruction.
*/
struct llist_head free_list;
- struct delayed_work free_work;
+ struct work_struct free_work;
/**
* Count of objects pending destructions. Used to skip needlessly
* waiting on an RCU barrier if no objects are waiting to be freed.
@@ -1378,7 +1378,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
* armed the work again.
*/
while (atomic_read(&i915->mm.free_count)) {
- flush_delayed_work(&i915->mm.free_work);
+ flush_work(&i915->mm.free_work);
flush_delayed_work(&i915->bdev.wq);
rcu_barrier();
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3168d7007e10..135d04c2d41c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1857,14 +1857,14 @@
#define GT0_PERF_LIMIT_REASONS _MMIO(0x1381a8)
#define GT0_PERF_LIMIT_REASONS_MASK 0xde3
-#define PROCHOT_MASK REG_BIT(1)
-#define THERMAL_LIMIT_MASK REG_BIT(2)
-#define RATL_MASK REG_BIT(6)
-#define VR_THERMALERT_MASK REG_BIT(7)
-#define VR_TDC_MASK REG_BIT(8)
-#define POWER_LIMIT_4_MASK REG_BIT(9)
-#define POWER_LIMIT_1_MASK REG_BIT(11)
-#define POWER_LIMIT_2_MASK REG_BIT(12)
+#define PROCHOT_MASK REG_BIT(0)
+#define THERMAL_LIMIT_MASK REG_BIT(1)
+#define RATL_MASK REG_BIT(5)
+#define VR_THERMALERT_MASK REG_BIT(6)
+#define VR_TDC_MASK REG_BIT(7)
+#define POWER_LIMIT_4_MASK REG_BIT(8)
+#define POWER_LIMIT_1_MASK REG_BIT(10)
+#define POWER_LIMIT_2_MASK REG_BIT(11)
#define CHV_CLK_CTL1 _MMIO(0x101100)
#define VLV_CLK_CTL2 _MMIO(0x101104)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ef3b04c7e153..373582cfd8f3 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -538,8 +538,6 @@ int i915_vma_bind(struct i915_vma *vma,
bind_flags);
}
- set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
-
atomic_or(bind_flags, &vma->flags);
return 0;
}
@@ -1310,6 +1308,19 @@ err_unpin:
return err;
}
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb)
+{
+ /*
+ * Before we release the pages that were bound by this vma, we
+ * must invalidate all the TLBs that may still have a reference
+ * back to our physical address. It only needs to be done once,
+ * so after updating the PTE to point away from the pages, record
+ * the most recent TLB invalidation seqno, and if we have not yet
+ * flushed the TLBs upon release, perform a full invalidation.
+ */
+ WRITE_ONCE(*tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
+}
+
static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
{
/* We allocate under vma_get_pages, so beware the shrinker */
@@ -1871,12 +1882,13 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
enum dma_resv_usage usage;
int idx;
- obj->read_domains = 0;
if (flags & EXEC_OBJECT_WRITE) {
usage = DMA_RESV_USAGE_WRITE;
obj->write_domain = I915_GEM_DOMAIN_RENDER;
+ obj->read_domains = 0;
} else {
usage = DMA_RESV_USAGE_READ;
+ obj->write_domain = 0;
}
dma_fence_array_for_each(curr, idx, fence)
@@ -1941,7 +1953,12 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
vma->vm->skip_pte_rewrite;
trace_i915_vma_unbind(vma);
- unbind_fence = i915_vma_resource_unbind(vma_res);
+ if (async)
+ unbind_fence = i915_vma_resource_unbind(vma_res,
+ &vma->obj->mm.tlb);
+ else
+ unbind_fence = i915_vma_resource_unbind(vma_res, NULL);
+
vma->resource = NULL;
atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
@@ -1949,10 +1966,13 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
i915_vma_detach(vma);
- if (!async && unbind_fence) {
- dma_fence_wait(unbind_fence, false);
- dma_fence_put(unbind_fence);
- unbind_fence = NULL;
+ if (!async) {
+ if (unbind_fence) {
+ dma_fence_wait(unbind_fence, false);
+ dma_fence_put(unbind_fence);
+ unbind_fence = NULL;
+ }
+ vma_invalidate_tlb(vma->vm, &vma->obj->mm.tlb);
}
/*
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 88ca0bd9c900..33a58f605d75 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -213,6 +213,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
void i915_vma_revoke_mmap(struct i915_vma *vma);
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb);
struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
int __i915_vma_unbind(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index 27c55027387a..5a67995ea5fe 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -223,10 +223,13 @@ i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
* Return: A refcounted pointer to a dma-fence that signals when unbinding is
* complete.
*/
-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
+ u32 *tlb)
{
struct i915_address_space *vm = vma_res->vm;
+ vma_res->tlb = tlb;
+
/* Reference for the sw fence */
i915_vma_resource_get(vma_res);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 5d8427caa2ba..06923d1816e7 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -67,6 +67,7 @@ struct i915_page_sizes {
* taken when the unbind is scheduled.
* @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting
* needs to be skipped for unbind.
+ * @tlb: pointer for obj->mm.tlb, if async unbind. Otherwise, NULL
*
* The lifetime of a struct i915_vma_resource is from a binding request to
* the actual possible asynchronous unbind has completed.
@@ -119,6 +120,8 @@ struct i915_vma_resource {
bool immediate_unbind:1;
bool needs_wakeref:1;
bool skip_pte_rewrite:1;
+
+ u32 *tlb;
};
bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
@@ -131,7 +134,8 @@ struct i915_vma_resource *i915_vma_resource_alloc(void);
void i915_vma_resource_free(struct i915_vma_resource *vma_res);
-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
+ u32 *tlb);
void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
index 157e166672d7..5595639d0033 100644
--- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
+++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
@@ -1076,7 +1076,8 @@ static int iterate_skl_plus_mmio(struct intel_gvt_mmio_table_iter *iter)
MMIO_D(GEN8_HDC_CHICKEN1);
MMIO_D(GEN9_WM_CHICKEN3);
- if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
+ if (IS_KABYLAKE(dev_priv) ||
+ IS_COFFEELAKE(dev_priv) || IS_COMETLAKE(dev_priv))
MMIO_D(GAMT_CHKN_BIT_REG);
if (!IS_BROXTON(dev_priv))
MMIO_D(GEN9_CTX_PREEMPT_REG);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f06babdb3a8c..9fe4b583cc28 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6561,7 +6561,10 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
enum plane_id plane_id;
u8 slices;
- skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
+ memset(&crtc_state->wm.skl.optimal, 0,
+ sizeof(crtc_state->wm.skl.optimal));
+ if (crtc_state->hw.active)
+ skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal);
crtc_state->wm.skl.raw = crtc_state->wm.skl.optimal;
memset(&dbuf_state->ddb[pipe], 0, sizeof(dbuf_state->ddb[pipe]));
@@ -6572,6 +6575,9 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv)
struct skl_ddb_entry *ddb_y =
&crtc_state->wm.skl.plane_ddb_y[plane_id];
+ if (!crtc_state->hw.active)
+ continue;
+
skl_ddb_get_hw_plane_state(dev_priv, crtc->pipe,
plane_id, ddb, ddb_y);
diff --git a/drivers/gpu/drm/imx/dcss/dcss-kms.c b/drivers/gpu/drm/imx/dcss/dcss-kms.c
index 9b84df34a6a1..8cf3352d8858 100644
--- a/drivers/gpu/drm/imx/dcss/dcss-kms.c
+++ b/drivers/gpu/drm/imx/dcss/dcss-kms.c
@@ -142,8 +142,6 @@ struct dcss_kms_dev *dcss_kms_attach(struct dcss_dev *dcss)
drm_kms_helper_poll_init(drm);
- drm_bridge_connector_enable_hpd(kms->connector);
-
ret = drm_dev_register(drm, 0);
if (ret)
goto cleanup_crtc;
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 1b70938cfd2c..bd4ca11d3ff5 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -115,8 +115,11 @@ static bool meson_vpu_has_available_connectors(struct device *dev)
for_each_endpoint_of_node(dev->of_node, ep) {
/* If the endpoint node exists, consider it enabled */
remote = of_graph_get_remote_port(ep);
- if (remote)
+ if (remote) {
+ of_node_put(remote);
+ of_node_put(ep);
return true;
+ }
}
return false;
diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
index b9ac932af8d0..03acc68abf2c 100644
--- a/drivers/gpu/drm/meson/meson_plane.c
+++ b/drivers/gpu/drm/meson/meson_plane.c
@@ -170,7 +170,7 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
/* Enable OSD and BLK0, set max global alpha */
priv->viu.osd1_ctrl_stat = OSD_ENABLE |
- (0xFF << OSD_GLOBAL_ALPHA_SHIFT) |
+ (0x100 << OSD_GLOBAL_ALPHA_SHIFT) |
OSD_BLK0_ENABLE;
priv->viu.osd1_ctrl_stat2 = readl(priv->io_base +
diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
index bb7e109534de..d4b907889a21 100644
--- a/drivers/gpu/drm/meson/meson_viu.c
+++ b/drivers/gpu/drm/meson/meson_viu.c
@@ -94,7 +94,7 @@ static void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv,
priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF11_12));
writel(((m[9] & 0x1fff) << 16) | (m[10] & 0x1fff),
priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF20_21));
- writel((m[11] & 0x1fff) << 16,
+ writel((m[11] & 0x1fff),
priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF22));
writel(((m[18] & 0xfff) << 16) | (m[19] & 0xfff),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index c682d4e02d1b..52a626117f70 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2061,6 +2061,12 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
intf_cfg.stream_sel = 0; /* Don't care value for video mode */
intf_cfg.mode_3d = dpu_encoder_helper_get_3d_blend_mode(phys_enc);
+
+ if (phys_enc->hw_intf)
+ intf_cfg.intf = phys_enc->hw_intf->idx;
+ if (phys_enc->hw_wb)
+ intf_cfg.wb = phys_enc->hw_wb->idx;
+
if (phys_enc->hw_pp->merge_3d)
intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx;
diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index ab6aa13b1639..013ca02e17cb 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1214,7 +1214,7 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl,
if (ret)
return ret;
- dp_ctrl_train_pattern_set(ctrl, pattern | DP_RECOVERED_CLOCK_OUT_EN);
+ dp_ctrl_train_pattern_set(ctrl, pattern);
for (tries = 0; tries <= maximum_retries; tries++) {
drm_dp_link_train_channel_eq_delay(ctrl->aux, ctrl->panel->dpcd);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 2c23324a2296..72c018e26f47 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -109,7 +109,7 @@ static const char * const dsi_8996_bus_clk_names[] = {
static const struct msm_dsi_config msm8996_dsi_cfg = {
.io_offset = DSI_6G_REG_SHIFT,
.reg_cfg = {
- .num = 2,
+ .num = 3,
.regs = {
{"vdda", 18160, 1 }, /* 1.25 V */
{"vcca", 17000, 32 }, /* 0.925 V */
@@ -148,7 +148,7 @@ static const char * const dsi_sdm660_bus_clk_names[] = {
static const struct msm_dsi_config sdm660_dsi_cfg = {
.io_offset = DSI_6G_REG_SHIFT,
.reg_cfg = {
- .num = 2,
+ .num = 1,
.regs = {
{"vdda", 12560, 4 }, /* 1.2 V */
},
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index a39de3bdc7fa..56dfa2d24be1 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -347,7 +347,7 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing,
} else {
timing->shared_timings.clk_pre =
linear_inter(tmax, tmin, pcnt2, 0, false);
- timing->shared_timings.clk_pre_inc_by_2 = 0;
+ timing->shared_timings.clk_pre_inc_by_2 = 0;
}
timing->ta_go = 3;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 1ed4cd09dbf8..16884db272de 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -469,6 +469,8 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
}
}
+ drm_helper_move_panel_connectors_to_head(ddev);
+
ddev->mode_config.funcs = &mode_config_funcs;
ddev->mode_config.helper_private = &mode_config_helper_funcs;
diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
index d1f70426f554..85c443a37e4e 100644
--- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
@@ -213,6 +213,8 @@ void msm_devfreq_init(struct msm_gpu *gpu)
if (IS_ERR(df->devfreq)) {
DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
+ dev_pm_qos_remove_request(&df->idle_freq);
+ dev_pm_qos_remove_request(&df->boost_freq);
df->devfreq = NULL;
return;
}
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index a92ffde53f0b..db2f847c8535 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -196,6 +196,9 @@ static int rd_open(struct inode *inode, struct file *file)
file->private_data = rd;
rd->open = true;
+ /* Reset fifo to clear any previously unread data: */
+ rd->fifo.head = rd->fifo.tail = 0;
+
/* the parsing tools need to know gpu-id to know which
* register database to load.
*
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 05076e530e7d..e29175e4b44c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
if (ret == 0) {
ret = nouveau_fence_new(chan, false, &fence);
if (ret == 0) {
+ /* TODO: figure out a better solution here
+ *
+ * wait on the fence here explicitly as going through
+ * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+ *
+ * Without this the operation can timeout and we'll fallback to a
+ * software copy, which might take several minutes to finish.
+ */
+ nouveau_fence_wait(fence, false, false);
ret = ttm_bo_move_accel_cleanup(bo,
&fence->base,
evict, false,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 568182e68dd7..d8cf71fb0512 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2605,6 +2605,27 @@ nv172_chipset = {
};
static const struct nvkm_device_chip
+nv173_chipset = {
+ .name = "GA103",
+ .bar = { 0x00000001, tu102_bar_new },
+ .bios = { 0x00000001, nvkm_bios_new },
+ .devinit = { 0x00000001, ga100_devinit_new },
+ .fb = { 0x00000001, ga102_fb_new },
+ .gpio = { 0x00000001, ga102_gpio_new },
+ .i2c = { 0x00000001, gm200_i2c_new },
+ .imem = { 0x00000001, nv50_instmem_new },
+ .mc = { 0x00000001, ga100_mc_new },
+ .mmu = { 0x00000001, tu102_mmu_new },
+ .pci = { 0x00000001, gp100_pci_new },
+ .privring = { 0x00000001, gm200_privring_new },
+ .timer = { 0x00000001, gk20a_timer_new },
+ .top = { 0x00000001, ga100_top_new },
+ .disp = { 0x00000001, ga102_disp_new },
+ .dma = { 0x00000001, gv100_dma_new },
+ .fifo = { 0x00000001, ga102_fifo_new },
+};
+
+static const struct nvkm_device_chip
nv174_chipset = {
.name = "GA104",
.bar = { 0x00000001, tu102_bar_new },
@@ -3067,6 +3088,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
case 0x167: device->chip = &nv167_chipset; break;
case 0x168: device->chip = &nv168_chipset; break;
case 0x172: device->chip = &nv172_chipset; break;
+ case 0x173: device->chip = &nv173_chipset; break;
case 0x174: device->chip = &nv174_chipset; break;
case 0x176: device->chip = &nv176_chipset; break;
case 0x177: device->chip = &nv177_chipset; break;
diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index cdb154c8b866..b75c690bb0cc 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -1295,7 +1295,8 @@ static const struct panel_desc innolux_n116bca_ea1 = {
},
.delay = {
.hpd_absent = 200,
- .prepare_to_enable = 80,
+ .enable = 80,
+ .disable = 50,
.unprepare = 500,
},
};
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index 5110cd9b2425..fe5f12f16a63 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -131,6 +131,17 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev)
return PTR_ERR(opp);
panfrost_devfreq_profile.initial_freq = cur_freq;
+
+ /*
+ * Set the recommend OPP this will enable and configure the regulator
+ * if any and will avoid a switch off by regulator_late_cleanup()
+ */
+ ret = dev_pm_opp_set_opp(dev, opp);
+ if (ret) {
+ DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n");
+ return ret;
+ }
+
dev_pm_opp_put(opp);
/*
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 2b12389f841a..ee0165687239 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1605,6 +1605,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
if (r) {
/* delay GPU reset to resume */
radeon_fence_driver_force_completion(rdev, i);
+ } else {
+ /* finish executing delayed work */
+ flush_delayed_work(&rdev->fence_drv[i].lockup_work);
}
}
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index c204e9b95c1f..518ee13b1d6f 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -283,8 +283,9 @@ static int cdn_dp_connector_get_modes(struct drm_connector *connector)
return ret;
}
-static int cdn_dp_connector_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+static enum drm_mode_status
+cdn_dp_connector_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
{
struct cdn_dp_device *dp = connector_to_dp(connector);
struct drm_display_info *display_info = &dp->connector.display_info;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index e4631f515ba4..f9aa8b96c695 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1439,11 +1439,15 @@ static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
die &= ~RK3568_SYS_DSP_INFACE_EN_HDMI_MUX;
die |= RK3568_SYS_DSP_INFACE_EN_HDMI |
FIELD_PREP(RK3568_SYS_DSP_INFACE_EN_HDMI_MUX, vp->id);
+ dip &= ~RK3568_DSP_IF_POL__HDMI_PIN_POL;
+ dip |= FIELD_PREP(RK3568_DSP_IF_POL__HDMI_PIN_POL, polflags);
break;
case ROCKCHIP_VOP2_EP_EDP0:
die &= ~RK3568_SYS_DSP_INFACE_EN_EDP_MUX;
die |= RK3568_SYS_DSP_INFACE_EN_EDP |
FIELD_PREP(RK3568_SYS_DSP_INFACE_EN_EDP_MUX, vp->id);
+ dip &= ~RK3568_DSP_IF_POL__EDP_PIN_POL;
+ dip |= FIELD_PREP(RK3568_DSP_IF_POL__EDP_PIN_POL, polflags);
break;
case ROCKCHIP_VOP2_EP_MIPI0:
die &= ~RK3568_SYS_DSP_INFACE_EN_MIPI0_MUX;
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
index b4dfa166eccd..34234a144e87 100644
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
@@ -531,7 +531,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
struct drm_display_mode *mode)
{
struct mipi_dsi_device *device = dsi->device;
- unsigned int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8;
+ int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8;
u16 hbp = 0, hfp = 0, hsa = 0, hblk = 0, vblk = 0;
u32 basic_ctl = 0;
size_t bytes;
@@ -555,7 +555,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
* (4 bytes). Its minimal size is therefore 10 bytes
*/
#define HSA_PACKET_OVERHEAD 10
- hsa = max((unsigned int)HSA_PACKET_OVERHEAD,
+ hsa = max(HSA_PACKET_OVERHEAD,
(mode->hsync_end - mode->hsync_start) * Bpp - HSA_PACKET_OVERHEAD);
/*
@@ -564,7 +564,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
* therefore 6 bytes
*/
#define HBP_PACKET_OVERHEAD 6
- hbp = max((unsigned int)HBP_PACKET_OVERHEAD,
+ hbp = max(HBP_PACKET_OVERHEAD,
(mode->htotal - mode->hsync_end) * Bpp - HBP_PACKET_OVERHEAD);
/*
@@ -574,7 +574,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
* 16 bytes
*/
#define HFP_PACKET_OVERHEAD 16
- hfp = max((unsigned int)HFP_PACKET_OVERHEAD,
+ hfp = max(HFP_PACKET_OVERHEAD,
(mode->hsync_start - mode->hdisplay) * Bpp - HFP_PACKET_OVERHEAD);
/*
@@ -583,7 +583,7 @@ static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
* bytes). Its minimal size is therefore 10 bytes.
*/
#define HBLK_PACKET_OVERHEAD 10
- hblk = max((unsigned int)HBLK_PACKET_OVERHEAD,
+ hblk = max(HBLK_PACKET_OVERHEAD,
(mode->htotal - (mode->hsync_end - mode->hsync_start)) * Bpp -
HBLK_PACKET_OVERHEAD);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 0e210df65c30..97184c333526 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -912,7 +912,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
/*
* We might need to add a TTM.
*/
- if (bo->resource->mem_type == TTM_PL_SYSTEM) {
+ if (!bo->resource || bo->resource->mem_type == TTM_PL_SYSTEM) {
ret = ttm_tt_create(bo, true);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 1cbfb00c1d65..911141d16e95 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -236,16 +236,19 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
if (bo->type != ttm_bo_type_sg)
fbo->base.base.resv = &fbo->base.base._resv;
- if (fbo->base.resource) {
- ttm_resource_set_bo(fbo->base.resource, &fbo->base);
- bo->resource = NULL;
- }
-
dma_resv_init(&fbo->base.base._resv);
fbo->base.base.dev = NULL;
ret = dma_resv_trylock(&fbo->base.base._resv);
WARN_ON(!ret);
+ if (fbo->base.resource) {
+ ttm_resource_set_bo(fbo->base.resource, &fbo->base);
+ bo->resource = NULL;
+ ttm_bo_set_bulk_move(&fbo->base, NULL);
+ } else {
+ fbo->base.bulk_move = NULL;
+ }
+
ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
if (ret) {
kfree(fbo);
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
index 061be9a6619d..b0f3117102ca 100644
--- a/drivers/gpu/drm/vc4/Kconfig
+++ b/drivers/gpu/drm/vc4/Kconfig
@@ -8,6 +8,7 @@ config DRM_VC4
depends on DRM
depends on SND && SND_SOC
depends on COMMON_CLK
+ depends on PM
select DRM_DISPLAY_HDMI_HELPER
select DRM_DISPLAY_HELPER
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 592c3b5d03e6..1e5f68704d7d 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -2855,7 +2855,7 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi)
return 0;
}
-static int __maybe_unused vc4_hdmi_runtime_suspend(struct device *dev)
+static int vc4_hdmi_runtime_suspend(struct device *dev)
{
struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
@@ -2972,17 +2972,15 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
vc4_hdmi->disable_4kp60 = true;
}
+ pm_runtime_enable(dev);
+
/*
- * We need to have the device powered up at this point to call
- * our reset hook and for the CEC init.
+ * We need to have the device powered up at this point to call
+ * our reset hook and for the CEC init.
*/
- ret = vc4_hdmi_runtime_resume(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret)
- goto err_put_ddc;
-
- pm_runtime_get_noresume(dev);
- pm_runtime_set_active(dev);
- pm_runtime_enable(dev);
+ goto err_disable_runtime_pm;
if ((of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi0") ||
of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi1")) &&
@@ -3028,6 +3026,7 @@ err_destroy_conn:
err_destroy_encoder:
drm_encoder_cleanup(encoder);
pm_runtime_put_sync(dev);
+err_disable_runtime_pm:
pm_runtime_disable(dev);
err_put_ddc:
put_device(&vc4_hdmi->ddc->dev);