108 files changed, 2772 insertions, 671 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f8c58c425eb9..fdd0ca4b0f0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -23,7 +23,7 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-FULL_AMD_PATH=$(src)/..
+FULL_AMD_PATH=$(srctree)/$(src)/..
 DISPLAY_FOLDER_NAME=display
 FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 4376b17ca594..56f8ca2a3bb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -464,8 +464,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 			}
 		}
 		if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
-			if ((adev->flags & AMD_IS_PX) &&
-			    amdgpu_atpx_dgpu_req_power_for_displays()) {
+			if (adev->flags & AMD_IS_PX) {
 				pm_runtime_get_sync(adev->ddev->dev);
 				/* Just fire off a uevent and let userspace tell us what to do */
 				drm_helper_hpd_irq_event(adev->ddev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index acf8ae0cee9a..aeead072fa79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -335,6 +335,43 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 	amdgpu_bo_unref(&(bo));
 }
 
+uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
+				      enum kgd_engine_type type)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	switch (type) {
+	case KGD_ENGINE_PFP:
+		return adev->gfx.pfp_fw_version;
+
+	case KGD_ENGINE_ME:
+		return adev->gfx.me_fw_version;
+
+	case KGD_ENGINE_CE:
+		return adev->gfx.ce_fw_version;
+
+	case KGD_ENGINE_MEC1:
+		return adev->gfx.mec_fw_version;
+
+	case KGD_ENGINE_MEC2:
+		return adev->gfx.mec2_fw_version;
+
+	case KGD_ENGINE_RLC:
+		return adev->gfx.rlc_fw_version;
+
+	case KGD_ENGINE_SDMA1:
+		return adev->sdma.instance[0].fw_version;
+
+	case KGD_ENGINE_SDMA2:
+		return adev->sdma.instance[1].fw_version;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
 void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
 				      struct kfd_local_mem_info *mem_info)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 775f815f9521..4e37fa7e85b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -81,6 +81,18 @@ struct amdgpu_kfd_dev {
 	uint64_t vram_used;
 };
 
+enum kgd_engine_type {
+	KGD_ENGINE_PFP = 1,
+	KGD_ENGINE_ME,
+	KGD_ENGINE_CE,
+	KGD_ENGINE_MEC1,
+	KGD_ENGINE_MEC2,
+	KGD_ENGINE_RLC,
+	KGD_ENGINE_SDMA1,
+	KGD_ENGINE_SDMA2,
+	KGD_ENGINE_MAX
+};
+
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
 						       struct mm_struct *mm);
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
@@ -142,6 +154,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 				void **mem_obj, uint64_t *gpu_addr,
 				void **cpu_ptr, bool mqd_gfx9);
 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
+				      enum kgd_engine_type type);
 void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
 				      struct kfd_local_mem_info *mem_info);
 uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ff7fac7df34b..fa09e11a600c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -22,14 +22,12 @@
 
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
-#include <linux/firmware.h>
 #include <linux/mmu_context.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "cikd.h"
 #include "cik_sdma.h"
-#include "amdgpu_ucode.h"
 #include "gfx_v7_0.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_enum.h"
@@ -139,7 +137,6 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 							uint8_t vmid);
 
-static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
@@ -191,7 +188,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_get_offset = kgd_address_watch_get_offset,
 	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
-	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
@@ -792,63 +788,6 @@ static void set_scratch_backing_va(struct kgd_dev *kgd,
 	unlock_srbm(kgd);
 }
 
-static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	const union amdgpu_firmware_header *hdr;
-
-	switch (type) {
-	case KGD_ENGINE_PFP:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.pfp_fw->data;
-		break;
-
-	case KGD_ENGINE_ME:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.me_fw->data;
-		break;
-
-	case KGD_ENGINE_CE:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.ce_fw->data;
-		break;
-
-	case KGD_ENGINE_MEC1:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.mec_fw->data;
-		break;
-
-	case KGD_ENGINE_MEC2:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.mec2_fw->data;
-		break;
-
-	case KGD_ENGINE_RLC:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.rlc_fw->data;
-		break;
-
-	case KGD_ENGINE_SDMA1:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->sdma.instance[0].fw->data;
-		break;
-
-	case KGD_ENGINE_SDMA2:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->sdma.instance[1].fw->data;
-		break;
-
-	default:
-		return 0;
-	}
-
-	if (hdr == NULL)
-		return 0;
-
-	/* Only 12 bit in use*/
-	return hdr->common.ucode_version;
-}
-
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 			uint64_t page_table_base)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 56ea929f524b..fec3a6aa1de6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -23,12 +23,10 @@
 #include <linux/module.h>
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
-#include <linux/firmware.h>
 #include <linux/mmu_context.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
-#include "amdgpu_ucode.h"
 #include "gfx_v8_0.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
@@ -95,7 +93,6 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 		uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
-static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
@@ -148,7 +145,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
 			get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid =
 			get_atc_vmid_pasid_mapping_valid,
-	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
@@ -751,63 +747,6 @@ static void set_scratch_backing_va(struct kgd_dev *kgd,
 	unlock_srbm(kgd);
 }
 
-static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	const union amdgpu_firmware_header *hdr;
-
-	switch (type) {
-	case KGD_ENGINE_PFP:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.pfp_fw->data;
-		break;
-
-	case KGD_ENGINE_ME:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.me_fw->data;
-		break;
-
-	case KGD_ENGINE_CE:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.ce_fw->data;
-		break;
-
-	case KGD_ENGINE_MEC1:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.mec_fw->data;
-		break;
-
-	case KGD_ENGINE_MEC2:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.mec2_fw->data;
-		break;
-
-	case KGD_ENGINE_RLC:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->gfx.rlc_fw->data;
-		break;
-
-	case KGD_ENGINE_SDMA1:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->sdma.instance[0].fw->data;
-		break;
-
-	case KGD_ENGINE_SDMA2:
-		hdr = (const union amdgpu_firmware_header *)
-						adev->sdma.instance[1].fw->data;
-		break;
-
-	default:
-		return 0;
-	}
-
-	if (hdr == NULL)
-		return 0;
-
-	/* Only 12 bit in use*/
-	return hdr->common.ucode_version;
-}
-
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint64_t page_table_base)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 5c51d4910650..ef3d93b995b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -25,12 +25,10 @@
 #include <linux/module.h>
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
-#include <linux/firmware.h>
 #include <linux/mmu_context.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
-#include "amdgpu_ucode.h"
 #include "soc15_hw_ip.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
@@ -111,7 +109,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint64_t page_table_base);
-static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
@@ -158,7 +155,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
 			get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid =
 			get_atc_vmid_pasid_mapping_valid,
-	.get_fw_version = get_fw_version,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = amdgpu_amdkfd_get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
@@ -874,56 +870,6 @@ static void set_scratch_backing_va(struct kgd_dev *kgd,
 	 */
 }
 
-/* FIXME: Does this need to be ASIC-specific code? */
-static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-	const union amdgpu_firmware_header *hdr;
-
-	switch (type) {
-	case KGD_ENGINE_PFP:
-		hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
-		break;
-
-	case KGD_ENGINE_ME:
-		hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
-		break;
-
-	case KGD_ENGINE_CE:
-		hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
-		break;
-
-	case KGD_ENGINE_MEC1:
-		hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
-		break;
-
-	case KGD_ENGINE_MEC2:
-		hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
-		break;
-
-	case KGD_ENGINE_RLC:
-		hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
-		break;
-
-	case KGD_ENGINE_SDMA1:
-		hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
-		break;
-
-	case KGD_ENGINE_SDMA2:
-		hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
-		break;
-
-	default:
-		return 0;
-	}
-
-	if (hdr == NULL)
-		return 0;
-
-	/* Only 12 bit in use*/
-	return hdr->common.ucode_version;
-}
-
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint64_t page_table_base)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d47354997e3c..9ec6356d3f0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3437,7 +3437,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 
 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
 				if (vram_lost) {
-					DRM_ERROR("VRAM is lost!\n");
+					DRM_INFO("VRAM is lost due to GPU reset!\n");
 					atomic_inc(&tmp_adev->vram_lost_counter);
 				}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ec9e45004bff..93b2c5a48a71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -88,12 +88,14 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
 	if (bo->gem_base.import_attach)
 		drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
 	drm_gem_object_release(&bo->gem_base);
-	amdgpu_bo_unref(&bo->parent);
+	/* in case amdgpu_device_recover_vram got NULL of bo->parent */
 	if (!list_empty(&bo->shadow_list)) {
 		mutex_lock(&adev->shadow_list_lock);
 		list_del_init(&bo->shadow_list);
 		mutex_unlock(&adev->shadow_list_lock);
 	}
+	amdgpu_bo_unref(&bo->parent);
+
 	kfree(bo->metadata);
 	kfree(bo);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 4b7a076eea9c..34471dbaa872 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -144,7 +144,7 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type pm;
 
-	if (adev->smu.ppt_funcs->get_current_power_state)
+	if (is_support_sw_smu(adev) && adev->smu.ppt_funcs->get_current_power_state)
 		pm = amdgpu_smu_get_current_power_state(adev);
 	else if (adev->powerplay.pp_funcs->get_current_power_state)
 		pm = amdgpu_dpm_get_current_power_state(adev);
@@ -342,6 +342,16 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	if (current_level == level)
 		return count;
 
+	/* profile_exit setting is valid only when current mode is in profile mode */
+	if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD |
+	    AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK |
+	    AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK |
+	    AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) &&
+	    (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) {
+		pr_err("Currently not in any profile mode!\n");
+		return -EINVAL;
+	}
+
 	if (is_support_sw_smu(adev)) {
 		mutex_lock(&adev->pm.mutex);
 		if (adev->pm.dpm.thermal_active) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 905cce1814f3..05897b05766b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -38,18 +38,10 @@ static void psp_set_funcs(struct amdgpu_device *adev);
 static int psp_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct psp_context *psp = &adev->psp;
 
 	psp_set_funcs(adev);
 
-	return 0;
-}
-
-static int psp_sw_init(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct psp_context *psp = &adev->psp;
-	int ret;
-
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
@@ -67,6 +59,15 @@ static int psp_sw_init(void *handle)
 
 	psp->adev = adev;
 
+	return 0;
+}
+
+static int psp_sw_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct psp_context *psp = &adev->psp;
+	int ret;
+
 	ret = psp_init_microcode(psp);
 	if (ret) {
 		DRM_ERROR("Failed to load psp firmware!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7e7f9ed89ee1..7d484fad3909 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -36,6 +36,7 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 	/* enable virtual display */
 	adev->mode_info.num_crtc = 1;
 	adev->enable_virtual_display = true;
+	adev->ddev->driver->driver_features &= ~DRIVER_ATOMIC;
 	adev->cg_flags = 0;
 	adev->pg_flags = 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a07c85815b7a..4f10f5aba00b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2757,6 +2757,37 @@ error_free_sched_entity:
 }
 
 /**
+ * amdgpu_vm_check_clean_reserved - check if a VM is clean
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: the VM to check
+ *
+ * check all entries of the root PD, if any subsequent PDs are allocated,
+ * it means there are page table creating and filling, and is no a clean
+ * VM
+ *
+ * Returns:
+ *	0 if this VM is clean
+ */
+static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
+	struct amdgpu_vm *vm)
+{
+	enum amdgpu_vm_level root = adev->vm_manager.root_level;
+	unsigned int entries = amdgpu_vm_num_entries(adev, root);
+	unsigned int i = 0;
+
+	if (!(vm->root.entries))
+		return 0;
+
+	for (i = 0; i < entries; i++) {
+		if (vm->root.entries[i].base.bo)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
  * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
  *
  * @adev: amdgpu_device pointer
@@ -2786,10 +2817,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 		return r;
 
 	/* Sanity checks */
-	if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
-		r = -EINVAL;
+	r = amdgpu_vm_check_clean_reserved(adev, vm);
+	if (r)
 		goto unreserve_bo;
-	}
 
 	if (pasid) {
 		unsigned long flags;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 8dbad496b29f..2471e7cf75ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -372,6 +372,9 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
 		if (amdgpu_sriov_runtime(adev))
 			schedule_work(&adev->virt.flr_work);
 		break;
+		case IDH_QUERY_ALIVE:
+			xgpu_ai_mailbox_send_ack(adev);
+			break;
 		/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
 		 * it byfar since that polling thread will handle it,
 		 * other msg like flr complete is not handled here.
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 39d151b79153..077e91a33d62 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -49,6 +49,7 @@ enum idh_event {
 	IDH_FLR_NOTIFICATION_CMPL,
 	IDH_SUCCESS,
 	IDH_FAIL,
+	IDH_QUERY_ALIVE,
 	IDH_EVENT_MAX
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 6a0fcd67662a..aef9d059ae52 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -515,7 +515,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
 
 	/* wait until RCV_MSG become 3 */
 	if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
-		pr_err("failed to recieve FLR_CMPL\n");
+		pr_err("failed to receive FLR_CMPL\n");
 		return;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 1ec60f54b992..9c88ce513d78 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -156,7 +156,6 @@ static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
-	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xFE000000, 0x00000000),
 };
 
 static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
@@ -186,7 +185,6 @@ static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
-	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xFE000000, 0x00000000),
 };
 
 static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
@@ -851,7 +849,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
 	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
 				       SDMA0_GFX_RB_WPTR_POLL_CNTL,
-				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
+				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
 	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
 
 	/* enable DMA RB */
@@ -942,7 +940,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
 	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
 	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
 				       SDMA0_PAGE_RB_WPTR_POLL_CNTL,
-				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
+				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
 	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
 
 	/* enable DMA RB */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index bdb5ad93990d..4900e4958dec 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -470,6 +470,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 	case CHIP_VEGA12:
 		soc15_asic_get_baco_capability(adev, &baco_reset);
 		break;
+	case CHIP_VEGA20:
+		if (adev->psp.sos_fw_version >= 0x80067)
+			soc15_asic_get_baco_capability(adev, &baco_reset);
+		else
+			baco_reset = false;
+		break;
 	default:
 		baco_reset = false;
 		break;
@@ -895,7 +901,8 @@ static int soc15_common_early_init(void *handle)
 
 			adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
 		} else if (adev->pdev->device == 0x15d8) {
-			adev->cg_flags = AMD_CG_SUPPORT_GFX_MGLS |
+			adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+				AMD_CG_SUPPORT_GFX_MGLS |
 				AMD_CG_SUPPORT_GFX_CP_LS |
 				AMD_CG_SUPPORT_GFX_3D_CGCG |
 				AMD_CG_SUPPORT_GFX_3D_CGLS |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index dc461df48da0..2191d3d0a219 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -787,10 +787,13 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
 							   0xFFFFFFFF, 0x00000004);
 			/* mc resume*/
 			if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
-							    lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
-							    upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i,
+							mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+							adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo);
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i,
+							mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+							adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi);
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0);
 				offset = 0;
 			} else {
 				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
@@ -798,10 +801,11 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
 				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 							    upper_32_bits(adev->uvd.inst[i].gpu_addr));
 				offset = size;
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+							AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+
 			}
 
-			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
-						    AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
 
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index bed78a778e3f..40363ca6c5f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -283,7 +283,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev)
 	}
 
 	if (vce_v2_0_wait_for_idle(adev)) {
-		DRM_INFO("VCE is busy, Can't set clock gateing");
+		DRM_INFO("VCE is busy, Can't set clock gating");
 		return 0;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index aadc3e66ebd7..c0ec27991c22 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -244,13 +244,18 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 
+		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
+			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
+			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
+
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
-						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
-						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
-						(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
+						(tmr_mc_addr >> 40) & 0xff);
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
 		} else {
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
@@ -258,6 +263,9 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 						(adev->vce.gpu_addr >> 40) & 0xff);
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
+						offset & ~0x0f000000);
+
 		}
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
@@ -272,10 +280,7 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
 						(adev->vce.gpu_addr >> 40) & 0xff);
 
-		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 		size = VCE_V4_0_FW_SIZE;
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
-					offset & ~0x0f000000);
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 
 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
@@ -382,6 +387,7 @@ static int vce_v4_0_start(struct amdgpu_device *adev)
 static int vce_v4_0_stop(struct amdgpu_device *adev)
 {
 
+	/* Disable VCPU */
 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
 
 	/* hold on ECPU */
@@ -389,8 +395,8 @@ static int vce_v4_0_stop(struct amdgpu_device *adev)
 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 
-	/* clear BUSY flag */
-	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
+	/* clear VCE_STATUS */
+	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
 
 	/* Set Clock-Gating off */
 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
@@ -922,6 +928,7 @@ static int vce_v4_0_set_clockgating_state(void *handle,
 
 	return 0;
 }
+#endif
 
 static int vce_v4_0_set_powergating_state(void *handle,
 					  enum amd_powergating_state state)
@@ -935,16 +942,11 @@ static int vce_v4_0_set_powergating_state(void *handle,
 	 */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
-		return 0;
-
 	if (state == AMD_PG_STATE_GATE)
-		/* XXX do we need a vce_v4_0_stop()? */
-		return 0;
+		return vce_v4_0_stop(adev);
 	else
 		return vce_v4_0_start(adev);
 }
-#endif
 
 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
 					struct amdgpu_ib *ib, uint32_t flags)
@@ -1059,7 +1061,7 @@ const struct amd_ip_funcs vce_v4_0_ip_funcs = {
 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
-	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
+	.set_powergating_state = vce_v4_0_set_powergating_state,
 };
 
 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 1b2f69a9a24e..8d89ab7f0ae8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -31,7 +31,7 @@
 #include "soc15_common.h"
 #include "vega10_ih.h"
 
-
+#define MAX_REARM_RETRY 10
 
 static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
 
@@ -382,6 +382,38 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev,
 }
 
 /**
+ * vega10_ih_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+static void vega10_ih_irq_rearm(struct amdgpu_device *adev,
+			       struct amdgpu_ih_ring *ih)
+{
+	uint32_t reg_rptr = 0;
+	uint32_t v = 0;
+	uint32_t i = 0;
+
+	if (ih == &adev->irq.ih)
+		reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR);
+	else if (ih == &adev->irq.ih1)
+		reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1);
+	else if (ih == &adev->irq.ih2)
+		reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2);
+	else
+		return;
+
+	/* Rearm IRQ / re-wwrite doorbell if doorbell write is lost */
+	for (i = 0; i < MAX_REARM_RETRY; i++) {
+		v = RREG32_NO_KIQ(reg_rptr);
+		if ((v < ih->ring_size) && (v != ih->rptr))
+			WDOORBELL32(ih->doorbell_index, ih->rptr);
+		else
+			break;
+	}
+}
+
+/**
  * vega10_ih_set_rptr - set the IH ring buffer rptr
  *
  * @adev: amdgpu_device pointer
@@ -395,6 +427,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
 		/* XXX check if swapping is necessary on BE */
 		*ih->rptr_cpu = ih->rptr;
 		WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+		if (amdgpu_sriov_vf(adev))
+			vega10_ih_irq_rearm(adev, ih);
 	} else if (ih == &adev->irq.ih) {
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr);
 	} else if (ih == &adev->irq.ih1) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 2fee3063a0d6..c1e4d44d6137 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -494,9 +494,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 {
 	unsigned int size;
 
-	kfd->mec_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd,
+	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 			KGD_ENGINE_MEC1);
-	kfd->sdma_fw_version = kfd->kfd2kgd->get_fw_version(kfd->kgd,
+	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
 			KGD_ENGINE_SDMA1);
 	kfd->shared_resources = *gpu_resources;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 923cb5ca5a57..995f9df66142 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -364,6 +364,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
 	struct amdgpu_device *adev = irq_params->adev;
 	struct amdgpu_crtc *acrtc;
 	struct dm_crtc_state *acrtc_state;
+	unsigned long flags;
 
 	acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VUPDATE);
 
@@ -379,8 +380,25 @@ static void dm_vupdate_high_irq(void *interrupt_params)
 		 * page-flip completion events that have been queued to us
 		 * if a pageflip happened inside front-porch.
 		 */
-		if (amdgpu_dm_vrr_active(acrtc_state))
+		if (amdgpu_dm_vrr_active(acrtc_state)) {
 			drm_crtc_handle_vblank(&acrtc->base);
+
+			/* BTR processing for pre-DCE12 ASICs */
+			if (acrtc_state->stream &&
+			    adev->family < AMDGPU_FAMILY_AI) {
+				spin_lock_irqsave(&adev->ddev->event_lock, flags);
+				mod_freesync_handle_v_update(
+				    adev->dm.freesync_module,
+				    acrtc_state->stream,
+				    &acrtc_state->vrr_params);
+
+				dc_stream_adjust_vmin_vmax(
+				    adev->dm.dc,
+				    acrtc_state->stream,
+				    &acrtc_state->vrr_params.adjust);
+				spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
+			}
+		}
 	}
 }
 
@@ -390,6 +408,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
 	struct amdgpu_device *adev = irq_params->adev;
 	struct amdgpu_crtc *acrtc;
 	struct dm_crtc_state *acrtc_state;
+	unsigned long flags;
 
 	acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
 
@@ -412,9 +431,10 @@ static void dm_crtc_high_irq(void *interrupt_params)
 		 */
 		amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
 
-		if (acrtc_state->stream &&
+		if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI &&
 		    acrtc_state->vrr_params.supported &&
 		    acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
+			spin_lock_irqsave(&adev->ddev->event_lock, flags);
 			mod_freesync_handle_v_update(
 				adev->dm.freesync_module,
 				acrtc_state->stream,
@@ -424,6 +444,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
 				adev->dm.dc,
 				acrtc_state->stream,
 				&acrtc_state->vrr_params.adjust);
+			spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 		}
 	}
 }
@@ -534,6 +555,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	if (amdgpu_dc_feature_mask & DC_FBC_MASK)
 		init_data.flags.fbc_support = true;
 
+	init_data.flags.power_down_display_on_boot = true;
+
 	/* Display Core create. */
 	adev->dm.dc = dc_create(&init_data);
 
@@ -3438,6 +3461,8 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
 		dc_stream_retain(state->stream);
 	}
 
+	state->active_planes = cur->active_planes;
+	state->interrupts_enabled = cur->interrupts_enabled;
 	state->vrr_params = cur->vrr_params;
 	state->vrr_infopacket = cur->vrr_infopacket;
 	state->abm_level = cur->abm_level;
@@ -3862,7 +3887,20 @@ static void dm_crtc_helper_disable(struct drm_crtc *crtc)
 {
 }
 
-static bool does_crtc_have_active_plane(struct drm_crtc_state *new_crtc_state)
+static bool does_crtc_have_active_cursor(struct drm_crtc_state *new_crtc_state)
+{
+	struct drm_device *dev = new_crtc_state->crtc->dev;
+	struct drm_plane *plane;
+
+	drm_for_each_plane_mask(plane, dev, new_crtc_state->plane_mask) {
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
+			return true;
+	}
+
+	return false;
+}
+
+static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state)
 {
 	struct drm_atomic_state *state = new_crtc_state->state;
 	struct drm_plane *plane;
@@ -3891,7 +3929,32 @@ static bool does_crtc_have_active_plane(struct drm_crtc_state *new_crtc_state)
 		num_active += (new_plane_state->fb != NULL);
 	}
 
-	return num_active > 0;
+	return num_active;
+}
+
+/*
+ * Sets whether interrupts should be enabled on a specific CRTC.
+ * We require that the stream be enabled and that there exist active
+ * DC planes on the stream.
+ */
+static void
+dm_update_crtc_interrupt_state(struct drm_crtc *crtc,
+			       struct drm_crtc_state *new_crtc_state)
+{
+	struct dm_crtc_state *dm_new_crtc_state =
+		to_dm_crtc_state(new_crtc_state);
+
+	dm_new_crtc_state->active_planes = 0;
+	dm_new_crtc_state->interrupts_enabled = false;
+
+	if (!dm_new_crtc_state->stream)
+		return;
+
+	dm_new_crtc_state->active_planes =
+		count_crtc_active_planes(new_crtc_state);
+
+	dm_new_crtc_state->interrupts_enabled =
+		dm_new_crtc_state->active_planes > 0;
 }
 
 static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
@@ -3902,6 +3965,14 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
 	struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(state);
 	int ret = -EINVAL;
 
+	/*
+	 * Update interrupt state for the CRTC. This needs to happen whenever
+	 * the CRTC has changed or whenever any of its planes have changed.
+	 * Atomic check satisfies both of these requirements since the CRTC
+	 * is added to the state by DRM during drm_atomic_helper_check_planes.
+	 */
+	dm_update_crtc_interrupt_state(crtc, state);
+
 	if (unlikely(!dm_crtc_state->stream &&
 		     modeset_required(state, NULL, dm_crtc_state->stream))) {
 		WARN_ON(1);
@@ -3912,9 +3983,13 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
 	if (!dm_crtc_state->stream)
 		return 0;
 
-	/* We want at least one hardware plane enabled to use the stream. */
+	/*
+	 * We want at least one hardware plane enabled to use
+	 * the stream with a cursor enabled.
+	 */
 	if (state->enable && state->active &&
-	    !does_crtc_have_active_plane(state))
+	    does_crtc_have_active_cursor(state) &&
+	    dm_crtc_state->active_planes == 0)
 		return -EINVAL;
 
 	if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK)
@@ -4188,6 +4263,7 @@ static const uint32_t rgb_formats[] = {
 	DRM_FORMAT_ABGR2101010,
 	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_RGB565,
 };
 
 static const uint32_t overlay_formats[] = {
@@ -4196,6 +4272,7 @@ static const uint32_t overlay_formats[] = {
 	DRM_FORMAT_RGBA8888,
 	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_RGB565
 };
 
 static const u32 cursor_formats[] = {
@@ -4999,8 +5076,10 @@ static void update_freesync_state_on_stream(
 	struct dc_plane_state *surface,
 	u32 flip_timestamp_in_us)
 {
-	struct mod_vrr_params vrr_params = new_crtc_state->vrr_params;
+	struct mod_vrr_params vrr_params;
 	struct dc_info_packet vrr_infopacket = {0};
+	struct amdgpu_device *adev = dm->adev;
+	unsigned long flags;
 
 	if (!new_stream)
 		return;
@@ -5013,6 +5092,9 @@ static void update_freesync_state_on_stream(
 	if (!new_stream->timing.h_total || !new_stream->timing.v_total)
 		return;
 
+	spin_lock_irqsave(&adev->ddev->event_lock, flags);
+	vrr_params = new_crtc_state->vrr_params;
+
 	if (surface) {
 		mod_freesync_handle_preflip(
 			dm->freesync_module,
@@ -5020,6 +5102,12 @@ static void update_freesync_state_on_stream(
 			new_stream,
 			flip_timestamp_in_us,
 			&vrr_params);
+
+		if (adev->family < AMDGPU_FAMILY_AI &&
+		    amdgpu_dm_vrr_active(new_crtc_state)) {
+			mod_freesync_handle_v_update(dm->freesync_module,
+						     new_stream, &vrr_params);
+		}
 	}
 
 	mod_freesync_build_vrr_infopacket(
@@ -5051,6 +5139,8 @@ static void update_freesync_state_on_stream(
 			      new_crtc_state->base.crtc->base.id,
 			      (int)new_crtc_state->base.vrr_enabled,
 			      (int)vrr_params.state);
+
+	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 }
 
 static void pre_update_freesync_state_on_stream(
@@ -5058,8 +5148,10 @@ static void pre_update_freesync_state_on_stream(
 	struct dm_crtc_state *new_crtc_state)
 {
 	struct dc_stream_state *new_stream = new_crtc_state->stream;
-	struct mod_vrr_params vrr_params = new_crtc_state->vrr_params;
+	struct mod_vrr_params vrr_params;
 	struct mod_freesync_config config = new_crtc_state->freesync_config;
+	struct amdgpu_device *adev = dm->adev;
+	unsigned long flags;
 
 	if (!new_stream)
 		return;
@@ -5071,6 +5163,9 @@ static void pre_update_freesync_state_on_stream(
 	if (!new_stream->timing.h_total || !new_stream->timing.v_total)
 		return;
 
+	spin_lock_irqsave(&adev->ddev->event_lock, flags);
+	vrr_params = new_crtc_state->vrr_params;
+
 	if (new_crtc_state->vrr_supported &&
 	    config.min_refresh_in_uhz &&
 	    config.max_refresh_in_uhz) {
@@ -5091,6 +5186,7 @@ static void pre_update_freesync_state_on_stream(
 			sizeof(vrr_params.adjust)) != 0);
 
 	new_crtc_state->vrr_params = vrr_params;
+	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 }
 
 static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
@@ -5123,6 +5219,22 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
 	}
 }
 
+static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state)
+{
+	struct drm_plane *plane;
+	struct drm_plane_state *old_plane_state, *new_plane_state;
+	int i;
+
+	/*
+	 * TODO: Make this per-stream so we don't issue redundant updates for
+	 * commits with multiple streams.
+	 */
+	for_each_oldnew_plane_in_state(state, plane, old_plane_state,
+				       new_plane_state, i)
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
+			handle_cursor_update(plane, old_plane_state);
+}
+
 static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 				    struct dc_state *dc_state,
 				    struct drm_device *dev,
@@ -5130,7 +5242,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 				    struct drm_crtc *pcrtc,
 				    bool wait_for_vblank)
 {
-	uint32_t i, r;
+	uint32_t i;
 	uint64_t timestamp_ns;
 	struct drm_plane *plane;
 	struct drm_plane_state *old_plane_state, *new_plane_state;
@@ -5141,6 +5253,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 	struct dm_crtc_state *dm_old_crtc_state =
 			to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc));
 	int planes_count = 0, vpos, hpos;
+	long r;
 	unsigned long flags;
 	struct amdgpu_bo *abo;
 	uint64_t tiling_flags;
@@ -5162,6 +5275,14 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		goto cleanup;
 	}
 
+	/*
+	 * Disable the cursor first if we're disabling all the planes.
+	 * It'll remain on the screen after the planes are re-enabled
+	 * if we don't.
+	 */
+	if (acrtc_state->active_planes == 0)
+		amdgpu_dm_commit_cursors(state);
+
 	/* update planes when needed */
 	for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
 		struct drm_crtc *crtc = new_plane_state->crtc;
@@ -5205,22 +5326,28 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			continue;
 		}
 
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
+
+		/*
+		 * Wait for all fences on this FB. Do limited wait to avoid
+		 * deadlock during GPU reset when this fence will not signal
+		 * but we hold reservation lock for the BO.
+		 */
+		r = reservation_object_wait_timeout_rcu(abo->tbo.resv, true,
+							false,
+							msecs_to_jiffies(5000));
+		if (unlikely(r <= 0))
+			DRM_ERROR("Waiting for fences timed out or interrupted!");
+
 		/*
 		 * TODO This might fail and hence better not used, wait
 		 * explicitly on fences instead
 		 * and in general should be called for
 		 * blocking commit to as per framework helpers
 		 */
-		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
-		if (unlikely(r != 0)) {
+		if (unlikely(r != 0))
 			DRM_ERROR("failed to reserve buffer before flip\n");
-			WARN_ON(1);
-		}
-
-		/* Wait for all fences on this FB */
-		WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false,
-									    MAX_SCHEDULE_TIMEOUT) < 0);
 
 		amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
 
@@ -5329,7 +5456,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		}
 	}
 
-	if (planes_count) {
+	/* Update the planes if changed or disable if we don't have any. */
+	if (planes_count || acrtc_state->active_planes == 0) {
 		if (new_pcrtc_state->mode_changed) {
 			bundle->stream_update.src = acrtc_state->stream->src;
 			bundle->stream_update.dst = acrtc_state->stream->dst;
@@ -5352,15 +5480,72 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		mutex_unlock(&dm->dc_lock);
 	}
 
-	for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i)
-		if (plane->type == DRM_PLANE_TYPE_CURSOR)
-			handle_cursor_update(plane, old_plane_state);
+	/*
+	 * Update cursor state *after* programming all the planes.
+	 * This avoids redundant programming in the case where we're going
+	 * to be disabling a single plane - those pipes are being disabled.
+	 */
+	if (acrtc_state->active_planes)
+		amdgpu_dm_commit_cursors(state);
 
 cleanup:
 	kfree(bundle);
 }
 
 /*
+ * Enable interrupts on CRTCs that are newly active, undergone
+ * a modeset, or have active planes again.
+ *
+ * Done in two passes, based on the for_modeset flag:
+ * Pass 1: For CRTCs going through modeset
+ * Pass 2: For CRTCs going from 0 to n active planes
+ *
+ * Interrupts can only be enabled after the planes are programmed,
+ * so this requires a two-pass approach since we don't want to
+ * just defer the interrupts until after commit planes every time.
+ */
+static void amdgpu_dm_enable_crtc_interrupts(struct drm_device *dev,
+					     struct drm_atomic_state *state,
+					     bool for_modeset)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+	int i;
+
+	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+				      new_crtc_state, i) {
+		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+		struct dm_crtc_state *dm_new_crtc_state =
+			to_dm_crtc_state(new_crtc_state);
+		struct dm_crtc_state *dm_old_crtc_state =
+			to_dm_crtc_state(old_crtc_state);
+		bool modeset = drm_atomic_crtc_needs_modeset(new_crtc_state);
+		bool run_pass;
+
+		run_pass = (for_modeset && modeset) ||
+			   (!for_modeset && !modeset &&
+			    !dm_old_crtc_state->interrupts_enabled);
+
+		if (!run_pass)
+			continue;
+
+		if (!dm_new_crtc_state->interrupts_enabled)
+			continue;
+
+		manage_dm_interrupts(adev, acrtc, true);
+
+#ifdef CONFIG_DEBUG_FS
+		/* The stream has changed so CRC capture needs to re-enabled. */
+		if (dm_new_crtc_state->crc_enabled) {
+			dm_new_crtc_state->crc_enabled = false;
+			amdgpu_dm_crtc_set_crc_source(crtc, "auto");
+		}
+#endif
+	}
+}
+
+/*
  * amdgpu_dm_crtc_copy_transient_flags - copy mirrored flags from DRM to DC
  * @crtc_state: the DRM CRTC state
  * @stream_state: the DC stream state.
@@ -5384,30 +5569,41 @@ static int amdgpu_dm_atomic_commit(struct drm_device *dev,
 	int i;
 
 	/*
-	 * We evade vblanks and pflips on crtc that
-	 * should be changed. We do it here to flush & disable
-	 * interrupts before drm_swap_state is called in drm_atomic_helper_commit
-	 * it will update crtc->dm_crtc_state->stream pointer which is used in
-	 * the ISRs.
+	 * We evade vblank and pflip interrupts on CRTCs that are undergoing
+	 * a modeset, being disabled, or have no active planes.
+	 *
+	 * It's done in atomic commit rather than commit tail for now since
+	 * some of these interrupt handlers access the current CRTC state and
+	 * potentially the stream pointer itself.
+	 *
+	 * Since the atomic state is swapped within atomic commit and not within
+	 * commit tail this would leave to new state (that hasn't been committed yet)
+	 * being accesssed from within the handlers.
+	 *
+	 * TODO: Fix this so we can do this in commit tail and not have to block
+	 * in atomic check.
 	 */
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
 		struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 		struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
-		if (drm_atomic_crtc_needs_modeset(new_crtc_state)
-		    && dm_old_crtc_state->stream) {
+		if (dm_old_crtc_state->interrupts_enabled &&
+		    (!dm_new_crtc_state->interrupts_enabled ||
+		     drm_atomic_crtc_needs_modeset(new_crtc_state))) {
 			/*
-			 * If the stream is removed and CRC capture was
-			 * enabled on the CRTC the extra vblank reference
-			 * needs to be dropped since CRC capture will be
-			 * disabled.
+			 * Drop the extra vblank reference added by CRC
+			 * capture if applicable.
 			 */
-			if (!dm_new_crtc_state->stream
-			    && dm_new_crtc_state->crc_enabled) {
+			if (dm_new_crtc_state->crc_enabled)
 				drm_crtc_vblank_put(crtc);
+
+			/*
+			 * Only keep CRC capture enabled if there's
+			 * still a stream for the CRTC.
+			 */
+			if (!dm_new_crtc_state->stream)
 				dm_new_crtc_state->crc_enabled = false;
-			}
 
 			manage_dm_interrupts(adev, acrtc, false);
 		}
@@ -5623,47 +5819,26 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		mutex_unlock(&dm->dc_lock);
 	}
 
-	/* Update freesync state before amdgpu_dm_handle_vrr_transition(). */
-	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
-		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
-		pre_update_freesync_state_on_stream(dm, dm_new_crtc_state);
-	}
-
+	/* Count number of newly disabled CRTCs for dropping PM refs later. */
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
-			new_crtc_state, i) {
-		/*
-		 * loop to enable interrupts on newly arrived crtc
-		 */
-		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
-		bool modeset_needed;
-
+				      new_crtc_state, i) {
 		if (old_crtc_state->active && !new_crtc_state->active)
 			crtc_disable_count++;
 
 		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 		dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 
+		/* Update freesync active state. */
+		pre_update_freesync_state_on_stream(dm, dm_new_crtc_state);
+
 		/* Handle vrr on->off / off->on transitions */
 		amdgpu_dm_handle_vrr_transition(dm_old_crtc_state,
 						dm_new_crtc_state);
-
-		modeset_needed = modeset_required(
-				new_crtc_state,
-				dm_new_crtc_state->stream,
-				dm_old_crtc_state->stream);
-
-		if (dm_new_crtc_state->stream == NULL || !modeset_needed)
-			continue;
-
-		manage_dm_interrupts(adev, acrtc, true);
-
-#ifdef CONFIG_DEBUG_FS
-		/* The stream has changed so CRC capture needs to re-enabled. */
-		if (dm_new_crtc_state->crc_enabled)
-			amdgpu_dm_crtc_set_crc_source(crtc, "auto");
-#endif
 	}
 
+	/* Enable interrupts for CRTCs going through a modeset. */
+	amdgpu_dm_enable_crtc_interrupts(dev, state, true);
+
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, j)
 		if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)
 			wait_for_vblank = false;
@@ -5677,6 +5852,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 						dm, crtc, wait_for_vblank);
 	}
 
+	/* Enable interrupts for CRTCs going from 0 to n active planes. */
+	amdgpu_dm_enable_crtc_interrupts(dev, state, false);
 
 	/*
 	 * send vblank event on all events not handled in flip and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 3a0b6164c755..978ff14a7d45 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -271,6 +271,9 @@ struct dm_crtc_state {
 	struct drm_crtc_state base;
 	struct dc_stream_state *stream;
 
+	int active_planes;
+	bool interrupts_enabled;
+
 	int crc_skip_count;
 	bool crc_enabled;
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 3ef68a249f4d..b37ecc3ede61 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -514,6 +514,40 @@ static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *lin
 }
 
 
+static void read_edp_current_link_settings_on_detect(struct dc_link *link)
+{
+	union lane_count_set lane_count_set = { {0} };
+	uint8_t link_bw_set;
+	uint8_t link_rate_set;
+
+	// Read DPCD 00101h to find out the number of lanes currently set
+	core_link_read_dpcd(link, DP_LANE_COUNT_SET,
+			&lane_count_set.raw, sizeof(lane_count_set));
+	link->cur_link_settings.lane_count = lane_count_set.bits.LANE_COUNT_SET;
+
+	// Read DPCD 00100h to find if standard link rates are set
+	core_link_read_dpcd(link, DP_LINK_BW_SET,
+			&link_bw_set, sizeof(link_bw_set));
+
+	if (link_bw_set == 0) {
+		/* If standard link rates are not being used,
+		 * Read DPCD 00115h to find the link rate set used
+		 */
+		core_link_read_dpcd(link, DP_LINK_RATE_SET,
+				&link_rate_set, sizeof(link_rate_set));
+
+		if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
+			link->cur_link_settings.link_rate =
+				link->dpcd_caps.edp_supported_link_rates[link_rate_set];
+			link->cur_link_settings.link_rate_set = link_rate_set;
+			link->cur_link_settings.use_link_rate_set = true;
+		}
+	} else {
+		link->cur_link_settings.link_rate = link_bw_set;
+		link->cur_link_settings.use_link_rate_set = false;
+	}
+}
+
 static bool detect_dp(
 	struct dc_link *link,
 	struct display_sink_capability *sink_caps,
@@ -648,9 +682,14 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 		return false;
 	}
 
-	if (link->connector_signal == SIGNAL_TYPE_EDP &&
-			link->local_sink)
-		return true;
+	if (link->connector_signal == SIGNAL_TYPE_EDP) {
+		/* On detect, we want to make sure current link settings are
+		 * up to date, especially if link was powered on by GOP.
+		 */
+		read_edp_current_link_settings_on_detect(link);
+		if (link->local_sink)
+			return true;
+	}
 
 	if (link->connector_signal == SIGNAL_TYPE_LVDS &&
 			link->local_sink)
@@ -1396,13 +1435,19 @@ static enum dc_status enable_link_dp(
 	/* get link settings for video mode timing */
 	decide_link_settings(stream, &link_settings);
 
-	/* If link settings are different than current and link already enabled
-	 * then need to disable before programming to new rate.
-	 */
-	if (link->link_status.link_active &&
-		(link->cur_link_settings.lane_count != link_settings.lane_count ||
-		 link->cur_link_settings.link_rate != link_settings.link_rate)) {
-		dp_disable_link_phy(link, pipe_ctx->stream->signal);
+	if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) {
+		/* If link settings are different than current and link already enabled
+		 * then need to disable before programming to new rate.
+		 */
+		if (link->link_status.link_active &&
+			(link->cur_link_settings.lane_count != link_settings.lane_count ||
+			 link->cur_link_settings.link_rate != link_settings.link_rate)) {
+			dp_disable_link_phy(link, pipe_ctx->stream->signal);
+		}
+
+		/*in case it is not on*/
+		link->dc->hwss.edp_power_control(link, true);
+		link->dc->hwss.edp_wait_for_hpd_ready(link, true);
 	}
 
 	pipe_ctx->stream_res.pix_clk_params.requested_sym_clk =
@@ -1448,15 +1493,9 @@ static enum dc_status enable_link_edp(
 		struct pipe_ctx *pipe_ctx)
 {
 	enum dc_status status;
-	struct dc_stream_state *stream = pipe_ctx->stream;
-	struct dc_link *link = stream->link;
-	/*in case it is not on*/
-	link->dc->hwss.edp_power_control(link, true);
-	link->dc->hwss.edp_wait_for_hpd_ready(link, true);
 
 	status = enable_link_dp(state, pipe_ctx);
 
-
 	return status;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index a6424c70f4c5..1ee544a32ebb 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -2185,6 +2185,30 @@ static int translate_dpcd_max_bpc(enum dpcd_downstream_port_max_bpc bpc)
 	return -1;
 }
 
+static void read_dp_device_vendor_id(struct dc_link *link)
+{
+	struct dp_device_vendor_id dp_id;
+
+	/* read IEEE branch device id */
+	core_link_read_dpcd(
+		link,
+		DP_BRANCH_OUI,
+		(uint8_t *)&dp_id,
+		sizeof(dp_id));
+
+	link->dpcd_caps.branch_dev_id =
+		(dp_id.ieee_oui[0] << 16) +
+		(dp_id.ieee_oui[1] << 8) +
+		dp_id.ieee_oui[2];
+
+	memmove(
+		link->dpcd_caps.branch_dev_name,
+		dp_id.ieee_device_id,
+		sizeof(dp_id.ieee_device_id));
+}
+
+
+
 static void get_active_converter_info(
 	uint8_t data, struct dc_link *link)
 {
@@ -2271,27 +2295,6 @@ static void get_active_converter_info(
 	ddc_service_set_dongle_type(link->ddc, link->dpcd_caps.dongle_type);
 
 	{
-		struct dp_device_vendor_id dp_id;
-
-		/* read IEEE branch device id */
-		core_link_read_dpcd(
-			link,
-			DP_BRANCH_OUI,
-			(uint8_t *)&dp_id,
-			sizeof(dp_id));
-
-		link->dpcd_caps.branch_dev_id =
-			(dp_id.ieee_oui[0] << 16) +
-			(dp_id.ieee_oui[1] << 8) +
-			dp_id.ieee_oui[2];
-
-		memmove(
-			link->dpcd_caps.branch_dev_name,
-			dp_id.ieee_device_id,
-			sizeof(dp_id.ieee_device_id));
-	}
-
-	{
 		struct dp_sink_hw_fw_revision dp_hw_fw_revision;
 
 		core_link_read_dpcd(
@@ -2455,6 +2458,8 @@ static bool retrieve_link_cap(struct dc_link *link)
 	ds_port.byte = dpcd_data[DP_DOWNSTREAMPORT_PRESENT -
 				 DP_DPCD_REV];
 
+	read_dp_device_vendor_id(link);
+
 	get_active_converter_info(ds_port.byte, link);
 
 	dp_wa_power_up_0010FA(link, dpcd_data, sizeof(dpcd_data));
@@ -2586,9 +2591,6 @@ void detect_edp_sink_caps(struct dc_link *link)
 	uint32_t entry;
 	uint32_t link_rate_in_khz;
 	enum dc_link_rate link_rate = LINK_RATE_UNKNOWN;
-	union lane_count_set lane_count_set = { {0} };
-	uint8_t link_bw_set;
-	uint8_t link_rate_set;
 
 	retrieve_link_cap(link);
 	link->dpcd_caps.edp_supported_link_rates_count = 0;
@@ -2614,33 +2616,6 @@ void detect_edp_sink_caps(struct dc_link *link)
 		}
 	}
 	link->verified_link_cap = link->reported_link_cap;
-
-	// Read DPCD 00101h to find out the number of lanes currently set
-	core_link_read_dpcd(link, DP_LANE_COUNT_SET,
-			&lane_count_set.raw, sizeof(lane_count_set));
-	link->cur_link_settings.lane_count = lane_count_set.bits.LANE_COUNT_SET;
-
-	// Read DPCD 00100h to find if standard link rates are set
-	core_link_read_dpcd(link, DP_LINK_BW_SET,
-			&link_bw_set, sizeof(link_bw_set));
-
-	if (link_bw_set == 0) {
-		/* If standard link rates are not being used,
-		 * Read DPCD 00115h to find the link rate set used
-		 */
-		core_link_read_dpcd(link, DP_LINK_RATE_SET,
-				&link_rate_set, sizeof(link_rate_set));
-
-		if (link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
-			link->cur_link_settings.link_rate =
-				link->dpcd_caps.edp_supported_link_rates[link_rate_set];
-			link->cur_link_settings.link_rate_set = link_rate_set;
-			link->cur_link_settings.use_link_rate_set = true;
-		}
-	} else {
-		link->cur_link_settings.link_rate = link_bw_set;
-		link->cur_link_settings.use_link_rate_set = false;
-	}
 }
 
 void dc_link_dp_enable_hpd(const struct dc_link *link)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
index f7f7515f65f4..b0dea759cd86 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
@@ -58,6 +58,8 @@ void dp_enable_link_phy(
 	const struct dc_link_settings *link_settings)
 {
 	struct link_encoder *link_enc = link->link_enc;
+	struct dc  *core_dc = link->ctx->dc;
+	struct dmcu *dmcu = core_dc->res_pool->dmcu;
 
 	struct pipe_ctx *pipes =
 			link->dc->current_state->res_ctx.pipe_ctx;
@@ -84,6 +86,9 @@ void dp_enable_link_phy(
 		}
 	}
 
+	if (dmcu != NULL && dmcu->funcs->lock_phy)
+		dmcu->funcs->lock_phy(dmcu);
+
 	if (dc_is_dp_sst_signal(signal)) {
 		link_enc->funcs->enable_dp_output(
 						link_enc,
@@ -95,6 +100,10 @@ void dp_enable_link_phy(
 						link_settings,
 						clock_source);
 	}
+
+	if (dmcu != NULL && dmcu->funcs->unlock_phy)
+		dmcu->funcs->unlock_phy(dmcu);
+
 	link->cur_link_settings = *link_settings;
 
 	dp_receiver_power_ctrl(link, true);
@@ -150,15 +159,25 @@ bool edp_receiver_ready_T7(struct dc_link *link)
 
 void dp_disable_link_phy(struct dc_link *link, enum signal_type signal)
 {
+	struct dc  *core_dc = link->ctx->dc;
+	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+
 	if (!link->wa_flags.dp_keep_receiver_powered)
 		dp_receiver_power_ctrl(link, false);
 
 	if (signal == SIGNAL_TYPE_EDP) {
 		link->link_enc->funcs->disable_output(link->link_enc, signal);
 		link->dc->hwss.edp_power_control(link, false);
-	} else
+	} else {
+		if (dmcu != NULL && dmcu->funcs->lock_phy)
+			dmcu->funcs->lock_phy(dmcu);
+
 		link->link_enc->funcs->disable_output(link->link_enc, signal);
 
+		if (dmcu != NULL && dmcu->funcs->unlock_phy)
+			dmcu->funcs->unlock_phy(dmcu);
+	}
+
 	/* Clear current link setting.*/
 	memset(&link->cur_link_settings, 0,
 			sizeof(link->cur_link_settings));
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index e10479d58c11..96e97d25d639 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -163,6 +163,27 @@ struct dc_stream_state *dc_create_stream_for_sink(
 	return stream;
 }
 
+struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
+{
+	struct dc_stream_state *new_stream;
+
+	new_stream = kzalloc(sizeof(struct dc_stream_state), GFP_KERNEL);
+	if (!new_stream)
+		return NULL;
+
+	memcpy(new_stream, stream, sizeof(struct dc_stream_state));
+
+	if (new_stream->sink)
+		dc_sink_retain(new_stream->sink);
+
+	if (new_stream->out_transfer_func)
+		dc_transfer_func_retain(new_stream->out_transfer_func);
+
+	kref_init(&new_stream->refcount);
+
+	return new_stream;
+}
+
 /**
  * dc_stream_get_status_from_state - Get stream status from given dc state
  * @state: DC state to find the stream status in
@@ -312,7 +333,7 @@ bool dc_stream_set_cursor_position(
 				(!pipe_ctx->plane_res.mi  && !pipe_ctx->plane_res.hubp) ||
 				!pipe_ctx->plane_state ||
 				(!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp) ||
-				!pipe_ctx->plane_res.ipp)
+				(!pipe_ctx->plane_res.ipp && !pipe_ctx->plane_res.dpp))
 			continue;
 
 		if (!pipe_to_program) {
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 3459e39714bc..70edd9ea5afe 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -39,7 +39,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.26"
+#define DC_VER "3.2.27"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -204,6 +204,7 @@ struct dc_config {
 	bool optimize_edp_link_rate;
 	bool disable_fractional_pwm;
 	bool allow_seamless_boot_optimization;
+	bool power_down_display_on_boot;
 };
 
 enum visual_confirm {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index cc7ffac64c96..7b9429e30d82 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -120,6 +120,7 @@ struct dc_link {
 	/* MST record stream using this link */
 	struct link_flags {
 		bool dp_keep_receiver_powered;
+		bool dp_skip_DID2;
 	} wa_flags;
 	struct link_mst_stream_allocation_table mst_stream_alloc_table;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 17fa3bf6cf7b..189bdab929a5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -307,6 +307,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
  */
 struct dc_stream_state *dc_create_stream_for_sink(struct dc_sink *dc_sink);
 
+struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream);
+
 void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink);
 
 void dc_stream_retain(struct dc_stream_state *dc_stream);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 855360b1414f..da96229db53a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -50,7 +50,6 @@
 #define MCP_ABM_LEVEL_SET 0x65
 #define MCP_ABM_PIPE_SET 0x66
 #define MCP_BL_SET 0x67
-#define MCP_BL_SET_PWM_FRAC     0x6A  /* Enable or disable Fractional PWM */
 
 #define MCP_DISABLE_ABM_IMMEDIATELY 255
 
@@ -391,23 +390,6 @@ static bool dce_abm_init_backlight(struct abm *abm)
 	REG_UPDATE(BL_PWM_GRP1_REG_LOCK,
 			BL_PWM_GRP1_REG_LOCK, 0);
 
-	/* Wait until microcontroller is ready to process interrupt */
-	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800);
-
-	/* Set PWM fractional enable/disable */
-	value = (abm->ctx->dc->config.disable_fractional_pwm == false) ? 1 : 0;
-	REG_WRITE(MASTER_COMM_DATA_REG1, value);
-
-	/* Set command to enable or disable fractional PWM microcontroller */
-	REG_UPDATE(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0,
-			MCP_BL_SET_PWM_FRAC);
-
-	/* Notify microcontroller of new command */
-	REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1);
-
-	/* Ensure command has been executed before continuing */
-	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800);
-
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index aa586672e8cd..818536eea00a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -51,6 +51,9 @@
 #define PSR_SET_WAITLOOP 0x31
 #define MCP_INIT_DMCU 0x88
 #define MCP_INIT_IRAM 0x89
+#define MCP_SYNC_PHY_LOCK 0x90
+#define MCP_SYNC_PHY_UNLOCK 0x91
+#define MCP_BL_SET_PWM_FRAC 0x6A  /* Enable or disable Fractional PWM */
 #define MASTER_COMM_CNTL_REG__MASTER_COMM_INTERRUPT_MASK   0x00000001L
 
 static bool dce_dmcu_init(struct dmcu *dmcu)
@@ -339,9 +342,32 @@ static void dcn10_get_dmcu_version(struct dmcu *dmcu)
 			IRAM_RD_ADDR_AUTO_INC, 0);
 }
 
+static void dcn10_dmcu_enable_fractional_pwm(struct dmcu *dmcu,
+		uint32_t fractional_pwm)
+{
+	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
+
+	/* Wait until microcontroller is ready to process interrupt */
+	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800);
+
+	/* Set PWM fractional enable/disable */
+	REG_WRITE(MASTER_COMM_DATA_REG1, fractional_pwm);
+
+	/* Set command to enable or disable fractional PWM microcontroller */
+	REG_UPDATE(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0,
+			MCP_BL_SET_PWM_FRAC);
+
+	/* Notify microcontroller of new command */
+	REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1);
+
+	/* Ensure command has been executed before continuing */
+	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 100, 800);
+}
+
 static bool dcn10_dmcu_init(struct dmcu *dmcu)
 {
 	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
+	const struct dc_config *config = &dmcu->ctx->dc->config;
 	bool status = false;
 
 	/*  Definition of DC_DMCU_SCRATCH
@@ -379,9 +405,14 @@ static bool dcn10_dmcu_init(struct dmcu *dmcu)
 		if (dmcu->dmcu_state == DMCU_RUNNING) {
 			/* Retrieve and cache the DMCU firmware version. */
 			dcn10_get_dmcu_version(dmcu);
+
+			/* Initialize DMCU to use fractional PWM or not */
+			dcn10_dmcu_enable_fractional_pwm(dmcu,
+				(config->disable_fractional_pwm == false) ? 1 : 0);
 			status = true;
-		} else
+		} else {
 			status = false;
+		}
 
 		break;
 	case DMCU_RUNNING:
@@ -690,7 +721,7 @@ static bool dcn10_is_dmcu_initialized(struct dmcu *dmcu)
 	return true;
 }
 
-#endif
+#endif //(CONFIG_DRM_AMD_DC_DCN1_0)
 
 static const struct dmcu_funcs dce_funcs = {
 	.dmcu_init = dce_dmcu_init,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
index 0d9bee8d5ab9..2b2de1d913c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
@@ -151,9 +151,6 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 	struct dc *dc = clk_mgr->ctx->dc;
 	struct dc_debug_options *debug = &dc->debug;
 	struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
-	struct pp_smu_display_requirement_rv *smu_req_cur =
-			&dc->res_pool->pp_smu_req;
-	struct pp_smu_display_requirement_rv smu_req = *smu_req_cur;
 	struct pp_smu_funcs_rv *pp_smu = NULL;
 	bool send_request_to_increase = false;
 	bool send_request_to_lower = false;
@@ -175,8 +172,6 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 		 */
 		if (pp_smu && pp_smu->set_display_count)
 			pp_smu->set_display_count(&pp_smu->pp_smu, display_count);
-
-		smu_req.display_count = display_count;
 	}
 
 	if (new_clocks->dispclk_khz > clk_mgr->clks.dispclk_khz
@@ -187,7 +182,6 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 
 	if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr->clks.phyclk_khz)) {
 		clk_mgr->clks.phyclk_khz = new_clocks->phyclk_khz;
-
 		send_request_to_lower = true;
 	}
 
@@ -197,24 +191,18 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 
 	if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr->clks.fclk_khz)) {
 		clk_mgr->clks.fclk_khz = new_clocks->fclk_khz;
-		smu_req.hard_min_fclk_mhz = new_clocks->fclk_khz / 1000;
-
 		send_request_to_lower = true;
 	}
 
 	//DCF Clock
 	if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr->clks.dcfclk_khz)) {
 		clk_mgr->clks.dcfclk_khz = new_clocks->dcfclk_khz;
-		smu_req.hard_min_dcefclk_mhz = new_clocks->dcfclk_khz / 1000;
-
 		send_request_to_lower = true;
 	}
 
 	if (should_set_clock(safe_to_lower,
 			new_clocks->dcfclk_deep_sleep_khz, clk_mgr->clks.dcfclk_deep_sleep_khz)) {
 		clk_mgr->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
-		smu_req.min_deep_sleep_dcefclk_mhz = (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000;
-
 		send_request_to_lower = true;
 	}
 
@@ -227,9 +215,9 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 				pp_smu->set_hard_min_dcfclk_by_freq &&
 				pp_smu->set_min_deep_sleep_dcfclk) {
 
-			pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_fclk_mhz);
-			pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_dcefclk_mhz);
-			pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, smu_req.min_deep_sleep_dcefclk_mhz);
+			pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, new_clocks->fclk_khz / 1000);
+			pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, new_clocks->dcfclk_khz / 1000);
+			pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000);
 		}
 	}
 
@@ -239,7 +227,6 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 			|| new_clocks->dispclk_khz == clk_mgr->clks.dispclk_khz) {
 		dcn1_ramp_up_dispclk_with_dpp(clk_mgr, new_clocks);
 		clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
-
 		send_request_to_lower = true;
 	}
 
@@ -249,13 +236,11 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 				pp_smu->set_hard_min_dcfclk_by_freq &&
 				pp_smu->set_min_deep_sleep_dcfclk) {
 
-			pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_fclk_mhz);
-			pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, smu_req.hard_min_dcefclk_mhz);
-			pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, smu_req.min_deep_sleep_dcefclk_mhz);
+			pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, new_clocks->fclk_khz / 1000);
+			pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, new_clocks->dcfclk_khz / 1000);
+			pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000);
 		}
 	}
-
-	*smu_req_cur = smu_req;
 }
 static const struct clk_mgr_funcs dcn1_funcs = {
 	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index 295cbd5b843f..0db2a6e96fc0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -283,7 +283,8 @@ void hubbub1_program_watermarks(
 		hubbub1->watermarks.a.urgent_ns = watermarks->a.urgent_ns;
 		prog_wm_value = convert_and_clamp(watermarks->a.urgent_ns,
 				refclk_mhz, 0x1fffff);
-		REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value);
+		REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0,
+				DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value);
 
 		DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_A calculated =%d\n"
 			"HW register value = 0x%x\n",
@@ -310,7 +311,8 @@ void hubbub1_program_watermarks(
 			prog_wm_value = convert_and_clamp(
 					watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
 					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
+			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
+					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
 			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
 				"HW register value = 0x%x\n",
 				watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
@@ -323,7 +325,8 @@ void hubbub1_program_watermarks(
 			prog_wm_value = convert_and_clamp(
 					watermarks->a.cstate_pstate.cstate_exit_ns,
 					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
+			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
+					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
 			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
 				"HW register value = 0x%x\n",
 				watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value);
@@ -337,7 +340,8 @@ void hubbub1_program_watermarks(
 		prog_wm_value = convert_and_clamp(
 				watermarks->a.cstate_pstate.pstate_change_ns,
 				refclk_mhz, 0x1fffff);
-		REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
+		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0,
+				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
 		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
 			"HW register value = 0x%x\n\n",
 			watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value);
@@ -348,7 +352,8 @@ void hubbub1_program_watermarks(
 		hubbub1->watermarks.b.urgent_ns = watermarks->b.urgent_ns;
 		prog_wm_value = convert_and_clamp(watermarks->b.urgent_ns,
 				refclk_mhz, 0x1fffff);
-		REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, prog_wm_value);
+		REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0,
+				DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, prog_wm_value);
 
 		DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_B calculated =%d\n"
 			"HW register value = 0x%x\n",
@@ -375,7 +380,8 @@ void hubbub1_program_watermarks(
 			prog_wm_value = convert_and_clamp(
 					watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
 					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
+			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
+					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
 			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
 				"HW register value = 0x%x\n",
 				watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
@@ -388,7 +394,8 @@ void hubbub1_program_watermarks(
 			prog_wm_value = convert_and_clamp(
 					watermarks->b.cstate_pstate.cstate_exit_ns,
 					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
+			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
+					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
 			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
 				"HW register value = 0x%x\n",
 				watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value);
@@ -402,7 +409,8 @@ void hubbub1_program_watermarks(
 		prog_wm_value = convert_and_clamp(
 				watermarks->b.cstate_pstate.pstate_change_ns,
 				refclk_mhz, 0x1fffff);
-		REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
+		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0,
+				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
 		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
 			"HW register value = 0x%x\n\n",
 			watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value);
@@ -413,7 +421,8 @@ void hubbub1_program_watermarks(
 		hubbub1->watermarks.c.urgent_ns = watermarks->c.urgent_ns;
 		prog_wm_value = convert_and_clamp(watermarks->c.urgent_ns,
 				refclk_mhz, 0x1fffff);
-		REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, prog_wm_value);
+		REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, 0,
+				DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, prog_wm_value);
 
 		DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_C calculated =%d\n"
 			"HW register value = 0x%x\n",
@@ -440,7 +449,8 @@ void hubbub1_program_watermarks(
 			prog_wm_value = convert_and_clamp(
 					watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
 					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
+			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
+					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
 			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
 				"HW register value = 0x%x\n",
 				watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
@@ -453,7 +463,8 @@ void hubbub1_program_watermarks(
 			prog_wm_value = convert_and_clamp(
 					watermarks->c.cstate_pstate.cstate_exit_ns,
 					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
+			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
+					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
 			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
 				"HW register value = 0x%x\n",
 				watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value);
@@ -467,7 +478,8 @@ void hubbub1_program_watermarks(
 		prog_wm_value = convert_and_clamp(
 				watermarks->c.cstate_pstate.pstate_change_ns,
 				refclk_mhz, 0x1fffff);
-		REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
+		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0,
+				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
 		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n"
 			"HW register value = 0x%x\n\n",
 			watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value);
@@ -478,7 +490,8 @@ void hubbub1_program_watermarks(
 		hubbub1->watermarks.d.urgent_ns = watermarks->d.urgent_ns;
 		prog_wm_value = convert_and_clamp(watermarks->d.urgent_ns,
 				refclk_mhz, 0x1fffff);
-		REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, prog_wm_value);
+		REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, 0,
+				DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, prog_wm_value);
 
 		DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_D calculated =%d\n"
 			"HW register value = 0x%x\n",
@@ -505,7 +518,8 @@ void hubbub1_program_watermarks(
 			prog_wm_value = convert_and_clamp(
 					watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
 					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
+			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
+					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
 			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
 				"HW register value = 0x%x\n",
 				watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
@@ -518,7 +532,8 @@ void hubbub1_program_watermarks(
 			prog_wm_value = convert_and_clamp(
 					watermarks->d.cstate_pstate.cstate_exit_ns,
 					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
+			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
+					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
 			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
 				"HW register value = 0x%x\n",
 				watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value);
@@ -532,7 +547,8 @@ void hubbub1_program_watermarks(
 		prog_wm_value = convert_and_clamp(
 				watermarks->d.cstate_pstate.pstate_change_ns,
 				refclk_mhz, 0x1fffff);
-		REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, prog_wm_value);
+		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, 0,
+				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, prog_wm_value);
 		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_D calculated =%d\n"
 			"HW register value = 0x%x\n\n",
 			watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value);
@@ -867,6 +883,7 @@ static const struct hubbub_funcs hubbub1_funcs = {
 	.dcc_support_pixel_format = hubbub1_dcc_support_pixel_format,
 	.get_dcc_compression_cap = hubbub1_get_dcc_compression_cap,
 	.wm_read_state = hubbub1_wm_read_state,
+	.program_watermarks = hubbub1_program_watermarks,
 };
 
 void hubbub1_construct(struct hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index 9cd4a5194154..85811b24a497 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -32,18 +32,14 @@
 #define TO_DCN10_HUBBUB(hubbub)\
 	container_of(hubbub, struct dcn10_hubbub, base)
 
-#define HUBHUB_REG_LIST_DCN()\
+#define HUBBUB_REG_LIST_DCN_COMMON()\
 	SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\
-	SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A),\
 	SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A),\
 	SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B),\
-	SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B),\
 	SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B),\
 	SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C),\
-	SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C),\
 	SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C),\
 	SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D),\
-	SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D),\
 	SR(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D),\
 	SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL),\
 	SR(DCHUBBUB_ARB_DRAM_STATE_CNTL),\
@@ -54,6 +50,12 @@
 	SR(DCHUBBUB_TEST_DEBUG_DATA),\
 	SR(DCHUBBUB_SOFT_RESET)
 
+#define HUBBUB_VM_REG_LIST() \
+	SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A),\
+	SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B),\
+	SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C),\
+	SR(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)
+
 #define HUBBUB_SR_WATERMARK_REG_LIST()\
 	SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A),\
 	SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A),\
@@ -65,7 +67,8 @@
 	SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D)
 
 #define HUBBUB_REG_LIST_DCN10(id)\
-	HUBHUB_REG_LIST_DCN(), \
+	HUBBUB_REG_LIST_DCN_COMMON(), \
+	HUBBUB_VM_REG_LIST(), \
 	HUBBUB_SR_WATERMARK_REG_LIST(), \
 	SR(DCHUBBUB_SDPIF_FB_TOP),\
 	SR(DCHUBBUB_SDPIF_FB_BASE),\
@@ -122,8 +125,7 @@ struct dcn_hubbub_registers {
 #define HUBBUB_SF(reg_name, field_name, post_fix)\
 	.field_name = reg_name ## __ ## field_name ## post_fix
 
-
-#define HUBBUB_MASK_SH_LIST_DCN(mask_sh)\
+#define HUBBUB_MASK_SH_LIST_DCN_COMMON(mask_sh)\
 		HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_SOFT_RESET, DCHUBBUB_GLOBAL_SOFT_RESET, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, mask_sh), \
@@ -133,10 +135,29 @@ struct dcn_hubbub_registers {
 		HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_ARB_SAT_LEVEL, DCHUBBUB_ARB_SAT_LEVEL, mask_sh), \
-		HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, mask_sh)
+		HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, mask_sh)
+
+#define HUBBUB_MASK_SH_LIST_STUTTER(mask_sh) \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, mask_sh)
 
 #define HUBBUB_MASK_SH_LIST_DCN10(mask_sh)\
-		HUBBUB_MASK_SH_LIST_DCN(mask_sh), \
+		HUBBUB_MASK_SH_LIST_DCN_COMMON(mask_sh), \
+		HUBBUB_MASK_SH_LIST_STUTTER(mask_sh), \
 		HUBBUB_SF(DCHUBBUB_SDPIF_FB_TOP, SDPIF_FB_TOP, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_SDPIF_FB_BASE, SDPIF_FB_BASE, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_SDPIF_FB_OFFSET, SDPIF_FB_OFFSET, mask_sh), \
@@ -167,15 +188,35 @@ struct dcn_hubbub_registers {
 		type FB_OFFSET;\
 		type AGP_BOT;\
 		type AGP_TOP;\
-		type AGP_BASE
+		type AGP_BASE;\
+		type DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A;\
+		type DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B;\
+		type DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C;\
+		type DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D;\
+		type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A;\
+		type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B;\
+		type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C;\
+		type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D
+
+#define HUBBUB_STUTTER_REG_FIELD_LIST(type) \
+		type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\
+		type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B;\
+		type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C;\
+		type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D;\
+		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A;\
+		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B;\
+		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C;\
+		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D
 
 
 struct dcn_hubbub_shift {
 	DCN_HUBBUB_REG_FIELD_LIST(uint8_t);
+	HUBBUB_STUTTER_REG_FIELD_LIST(uint8_t);
 };
 
 struct dcn_hubbub_mask {
 	DCN_HUBBUB_REG_FIELD_LIST(uint32_t);
+	HUBBUB_STUTTER_REG_FIELD_LIST(uint32_t);
 };
 
 struct dc;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 0ba68d41b9c3..54b219a710d8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -1178,6 +1178,10 @@ void hubp1_vtg_sel(struct hubp *hubp, uint32_t otg_inst)
 	REG_UPDATE(DCHUBP_CNTL, HUBP_VTG_SEL, otg_inst);
 }
 
+void hubp1_init(struct hubp *hubp)
+{
+	//do nothing
+}
 static const struct hubp_funcs dcn10_hubp_funcs = {
 	.hubp_program_surface_flip_and_addr =
 			hubp1_program_surface_flip_and_addr,
@@ -1201,7 +1205,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = {
 	.hubp_clear_underflow = hubp1_clear_underflow,
 	.hubp_disable_control =  hubp1_disable_control,
 	.hubp_get_underflow_status = hubp1_get_underflow_status,
-
+	.hubp_init = hubp1_init,
 };
 
 /*****************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
index db98ba361686..99d2b7e2a578 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
@@ -34,6 +34,7 @@
 #define HUBP_REG_LIST_DCN(id)\
 	SRI(DCHUBP_CNTL, HUBP, id),\
 	SRI(HUBPREQ_DEBUG_DB, HUBP, id),\
+	SRI(HUBPREQ_DEBUG, HUBP, id),\
 	SRI(DCSURF_ADDR_CONFIG, HUBP, id),\
 	SRI(DCSURF_TILING_CONFIG, HUBP, id),\
 	SRI(DCSURF_SURFACE_PITCH, HUBPREQ, id),\
@@ -138,6 +139,7 @@
 #define HUBP_COMMON_REG_VARIABLE_LIST \
 	uint32_t DCHUBP_CNTL; \
 	uint32_t HUBPREQ_DEBUG_DB; \
+	uint32_t HUBPREQ_DEBUG; \
 	uint32_t DCSURF_ADDR_CONFIG; \
 	uint32_t DCSURF_TILING_CONFIG; \
 	uint32_t DCSURF_SURFACE_PITCH; \
@@ -749,4 +751,6 @@ enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch);
 void hubp1_vready_workaround(struct hubp *hubp,
 		struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest);
 
+void hubp1_init(struct hubp *hubp);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index dab370676bfa..33d311cea28c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -1118,14 +1118,17 @@ static void dcn10_init_hw(struct dc *dc)
 	 * Otherwise, if taking control is not possible, we need to power
 	 * everything down.
 	 */
-	if (dcb->funcs->is_accelerated_mode(dcb)) {
+	if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) {
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct hubp *hubp = dc->res_pool->hubps[i];
 			struct dpp *dpp = dc->res_pool->dpps[i];
 
+			hubp->funcs->hubp_init(hubp);
 			dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst;
 			plane_atomic_power_down(dc, dpp, hubp);
 		}
+
+		apply_DEGVIDCN10_253_wa(dc);
 	}
 
 	for (i = 0; i < dc->res_pool->audio_count; i++) {
@@ -2436,6 +2439,8 @@ static void dcn10_prepare_bandwidth(
 		struct dc *dc,
 		struct dc_state *context)
 {
+	struct hubbub *hubbub = dc->res_pool->hubbub;
+
 	if (dc->debug.sanity_checks)
 		dcn10_verify_allow_pstate_change_high(dc);
 
@@ -2449,7 +2454,7 @@ static void dcn10_prepare_bandwidth(
 				false);
 	}
 
-	hubbub1_program_watermarks(dc->res_pool->hubbub,
+	hubbub->funcs->program_watermarks(hubbub,
 			&context->bw_ctx.bw.dcn.watermarks,
 			dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 			true);
@@ -2466,6 +2471,8 @@ static void dcn10_optimize_bandwidth(
 		struct dc *dc,
 		struct dc_state *context)
 {
+	struct hubbub *hubbub = dc->res_pool->hubbub;
+
 	if (dc->debug.sanity_checks)
 		dcn10_verify_allow_pstate_change_high(dc);
 
@@ -2479,7 +2486,7 @@ static void dcn10_optimize_bandwidth(
 				true);
 	}
 
-	hubbub1_program_watermarks(dc->res_pool->hubbub,
+	hubbub->funcs->program_watermarks(hubbub,
 			&context->bw_ctx.bw.dcn.watermarks,
 			dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 			true);
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index cc6891b8ea69..4fc4208d1472 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -74,29 +74,6 @@ struct pp_smu_wm_range_sets {
 	struct pp_smu_wm_set_range writer_wm_sets[MAX_WATERMARK_SETS];
 };
 
-struct pp_smu_display_requirement_rv {
-	/* PPSMC_MSG_SetDisplayCount: count
-	 *  0 triggers S0i2 optimization
-	 */
-	unsigned int display_count;
-
-	/* PPSMC_MSG_SetHardMinFclkByFreq: mhz
-	 *  FCLK will vary with DPM, but never below requested hard min
-	 */
-	unsigned int hard_min_fclk_mhz;
-
-	/* PPSMC_MSG_SetHardMinDcefclkByFreq: mhz
-	 *  fixed clock at requested freq, either from FCH bypass or DFS
-	 */
-	unsigned int hard_min_dcefclk_mhz;
-
-	/* PPSMC_MSG_SetMinDeepSleepDcefclk: mhz
-	 *  when DF is in cstate, dcf clock is further divided down
-	 *  to just above given frequency
-	 */
-	unsigned int min_deep_sleep_dcefclk_mhz;
-};
-
 struct pp_smu_funcs_rv {
 	struct pp_smu pp_smu;
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 88a82a23d259..6f5ab05d6467 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -145,7 +145,6 @@ struct resource_pool {
 	struct hubbub *hubbub;
 	struct mpc *mpc;
 	struct pp_smu_funcs *pp_smu;
-	struct pp_smu_display_requirement_rv pp_smu_req;
 	struct dce_aux *engines[MAX_PIPES];
 	struct dce_i2c_hw *hw_i2cs[MAX_PIPES];
 	struct dce_i2c_sw *sw_i2cs[MAX_PIPES];
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index 5e8fead3c09a..93667e8b23b3 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -77,6 +77,12 @@ struct hubbub_funcs {
 	void (*get_dchub_ref_freq)(struct hubbub *hubbub,
 			unsigned int dccg_ref_freq_inKhz,
 			unsigned int *dchub_ref_freq_inKhz);
+
+	void (*program_watermarks)(
+			struct hubbub *hubbub,
+			struct dcn_watermark_set *watermarks,
+			unsigned int refclk_mhz,
+			bool safe_to_lower);
 };
 
 struct hubbub {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
index cbaa43853611..c68f0ce346c7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
@@ -70,6 +70,8 @@ struct dmcu_funcs {
 	void (*get_psr_wait_loop)(struct dmcu *dmcu,
 			unsigned int *psr_wait_loop_number);
 	bool (*is_dmcu_initialized)(struct dmcu *dmcu);
+	bool (*lock_phy)(struct dmcu *dmcu);
+	bool (*unlock_phy)(struct dmcu *dmcu);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 1cd07e94ee63..455df4999797 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -130,6 +130,7 @@ struct hubp_funcs {
 	void (*hubp_clear_underflow)(struct hubp *hubp);
 	void (*hubp_disable_control)(struct hubp *hubp, bool disable_hubp);
 	unsigned int (*hubp_get_underflow_status)(struct hubp *hubp);
+	void (*hubp_init)(struct hubp *hubp);
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 3d867e34f8b3..19b1eaebe484 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -437,10 +437,8 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
 			inserted_frame_duration_in_us = last_render_time_in_us /
 							frames_to_insert;
 
-		if (inserted_frame_duration_in_us <
-			(1000000 / in_out_vrr->max_refresh_in_uhz))
-			inserted_frame_duration_in_us =
-				(1000000 / in_out_vrr->max_refresh_in_uhz);
+		if (inserted_frame_duration_in_us < in_out_vrr->min_duration_in_us)
+			inserted_frame_duration_in_us = in_out_vrr->min_duration_in_us;
 
 		/* Cache the calculated variables */
 		in_out_vrr->btr.inserted_duration_in_us =
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
index 721c61171045..5a44e614ab7e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
@@ -2347,6 +2347,8 @@
 #define mmHUBP0_DCHUBP_VMPG_CONFIG_BASE_IDX                                                            2
 #define mmHUBP0_HUBPREQ_DEBUG_DB                                                                       0x0569
 #define mmHUBP0_HUBPREQ_DEBUG_DB_BASE_IDX                                                              2
+#define mmHUBP0_HUBPREQ_DEBUG                                                                          0x056a
+#define mmHUBP0_HUBPREQ_DEBUG_BASE_IDX                                                                 2
 #define mmHUBP0_HUBP_MEASURE_WIN_CTRL_DCFCLK                                                           0x056e
 #define mmHUBP0_HUBP_MEASURE_WIN_CTRL_DCFCLK_BASE_IDX                                                  2
 #define mmHUBP0_HUBP_MEASURE_WIN_CTRL_DPPCLK                                                           0x056f
@@ -2631,6 +2633,8 @@
 #define mmHUBP1_DCHUBP_VMPG_CONFIG_BASE_IDX                                                            2
 #define mmHUBP1_HUBPREQ_DEBUG_DB                                                                       0x062d
 #define mmHUBP1_HUBPREQ_DEBUG_DB_BASE_IDX                                                              2
+#define mmHUBP1_HUBPREQ_DEBUG                                                                          0x062e
+#define mmHUBP1_HUBPREQ_DEBUG_BASE_IDX                                                                 2
 #define mmHUBP1_HUBP_MEASURE_WIN_CTRL_DCFCLK                                                           0x0632
 #define mmHUBP1_HUBP_MEASURE_WIN_CTRL_DCFCLK_BASE_IDX                                                  2
 #define mmHUBP1_HUBP_MEASURE_WIN_CTRL_DPPCLK                                                           0x0633
@@ -2915,6 +2919,8 @@
 #define mmHUBP2_DCHUBP_VMPG_CONFIG_BASE_IDX                                                            2
 #define mmHUBP2_HUBPREQ_DEBUG_DB                                                                       0x06f1
 #define mmHUBP2_HUBPREQ_DEBUG_DB_BASE_IDX                                                              2
+#define mmHUBP2_HUBPREQ_DEBUG                                                                          0x06f2
+#define mmHUBP2_HUBPREQ_DEBUG_BASE_IDX                                                                 2
 #define mmHUBP2_HUBP_MEASURE_WIN_CTRL_DCFCLK                                                           0x06f6
 #define mmHUBP2_HUBP_MEASURE_WIN_CTRL_DCFCLK_BASE_IDX                                                  2
 #define mmHUBP2_HUBP_MEASURE_WIN_CTRL_DPPCLK                                                           0x06f7
@@ -3199,6 +3205,8 @@
 #define mmHUBP3_DCHUBP_VMPG_CONFIG_BASE_IDX                                                            2
 #define mmHUBP3_HUBPREQ_DEBUG_DB                                                                       0x07b5
 #define mmHUBP3_HUBPREQ_DEBUG_DB_BASE_IDX                                                              2
+#define mmHUBP3_HUBPREQ_DEBUG                                                                          0x07b6
+#define mmHUBP3_HUBPREQ_DEBUG_BASE_IDX                                                                 2
 #define mmHUBP3_HUBP_MEASURE_WIN_CTRL_DCFCLK                                                           0x07ba
 #define mmHUBP3_HUBP_MEASURE_WIN_CTRL_DCFCLK_BASE_IDX                                                  2
 #define mmHUBP3_HUBP_MEASURE_WIN_CTRL_DPPCLK                                                           0x07bb
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 08769b4b7a74..d3075adb3297 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -718,6 +718,7 @@ enum atom_encoder_caps_def
   ATOM_ENCODER_CAP_RECORD_HBR2_EN               =0x02,         // DP1.2 HBR2 setting is qualified and HBR2 can be enabled 
   ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN          =0x04,         // HDMI2.0 6Gbps enable or not. 
   ATOM_ENCODER_CAP_RECORD_HBR3_EN               =0x08,         // DP1.3 HBR3 is supported by board. 
+  ATOM_ENCODER_CAP_RECORD_USB_C_TYPE            =0x100,        // the DP connector is a USB-C type.
 };
 
 struct  atom_encoder_caps_record
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 5f3c10ebff08..b897aca9b4c9 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -85,18 +85,6 @@ enum kgd_memory_pool {
 	KGD_POOL_FRAMEBUFFER = 3,
 };
 
-enum kgd_engine_type {
-	KGD_ENGINE_PFP = 1,
-	KGD_ENGINE_ME,
-	KGD_ENGINE_CE,
-	KGD_ENGINE_MEC1,
-	KGD_ENGINE_MEC2,
-	KGD_ENGINE_RLC,
-	KGD_ENGINE_SDMA1,
-	KGD_ENGINE_SDMA2,
-	KGD_ENGINE_MAX
-};
-
 /**
  * enum kfd_sched_policy
  *
@@ -230,8 +218,6 @@ struct tile_config {
  * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that
  * SDMA hqd slot.
  *
- * @get_fw_version: Returns FW versions from the header
- *
  * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID.
  * Only used for no cp scheduling mode
  *
@@ -311,8 +297,6 @@ struct kfd2kgd_calls {
 					struct kgd_dev *kgd,
 					uint8_t vmid);
 
-	uint16_t (*get_fw_version)(struct kgd_dev *kgd,
-				enum kgd_engine_type type);
 	void (*set_scratch_backing_va)(struct kgd_dev *kgd,
 				uint64_t va, uint32_t vmid);
 	int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index f32e3d0aaea6..9a595f7525e6 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -35,6 +35,7 @@
 #include "smu10_hwmgr.h"
 #include "power_state.h"
 #include "soc15_common.h"
+#include "smu10.h"
 
 #define SMU10_MAX_DEEPSLEEP_DIVIDER_ID     5
 #define SMU10_MINIMUM_ENGINE_CLOCK         800   /* 8Mhz, the low boundary of engine clock allowed on this chip */
@@ -204,18 +205,13 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input)
 	return 0;
 }
 
-static inline uint32_t convert_10k_to_mhz(uint32_t clock)
-{
-	return (clock + 99) / 100;
-}
-
 static int smu10_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
 
 	if (smu10_data->need_min_deep_sleep_dcefclk &&
-		smu10_data->deep_sleep_dcefclk != convert_10k_to_mhz(clock)) {
-		smu10_data->deep_sleep_dcefclk = convert_10k_to_mhz(clock);
+		smu10_data->deep_sleep_dcefclk != clock) {
+		smu10_data->deep_sleep_dcefclk = clock;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetMinDeepSleepDcefclk,
 					smu10_data->deep_sleep_dcefclk);
@@ -228,8 +224,8 @@ static int smu10_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t c
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
 
 	if (smu10_data->dcf_actual_hard_min_freq &&
-		smu10_data->dcf_actual_hard_min_freq != convert_10k_to_mhz(clock)) {
-		smu10_data->dcf_actual_hard_min_freq = convert_10k_to_mhz(clock);
+		smu10_data->dcf_actual_hard_min_freq != clock) {
+		smu10_data->dcf_actual_hard_min_freq = clock;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetHardMinDcefclkByFreq,
 					smu10_data->dcf_actual_hard_min_freq);
@@ -242,8 +238,8 @@ static int smu10_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cloc
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
 
 	if (smu10_data->f_actual_hard_min_freq &&
-		smu10_data->f_actual_hard_min_freq != convert_10k_to_mhz(clock)) {
-		smu10_data->f_actual_hard_min_freq = convert_10k_to_mhz(clock);
+		smu10_data->f_actual_hard_min_freq != clock) {
+		smu10_data->f_actual_hard_min_freq = clock;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetHardMinFclkByFreq,
 					smu10_data->f_actual_hard_min_freq);
@@ -572,7 +568,6 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 				enum amd_dpm_forced_level level)
 {
 	struct smu10_hwmgr *data = hwmgr->backend;
-	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t min_sclk = hwmgr->display_config->min_core_set_clock;
 	uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100;
 
@@ -581,11 +576,6 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 		return 0;
 	}
 
-	/* Disable UMDPSTATE support on rv2 temporarily */
-	if ((adev->asic_type == CHIP_RAVEN) &&
-	    (adev->rev_id >= 8))
-		return 0;
-
 	if (min_sclk < data->gfx_min_freq_limit)
 		min_sclk = data->gfx_min_freq_limit;
 
@@ -1200,6 +1190,94 @@ static void smu10_powergate_vcn(struct pp_hwmgr *hwmgr, bool bgate)
 	}
 }
 
+static int conv_power_profile_to_pplib_workload(int power_profile)
+{
+	int pplib_workload = 0;
+
+	switch (power_profile) {
+	case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT:
+		pplib_workload = WORKLOAD_DEFAULT_BIT;
+		break;
+	case PP_SMC_POWER_PROFILE_FULLSCREEN3D:
+		pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT;
+		break;
+	case PP_SMC_POWER_PROFILE_POWERSAVING:
+		pplib_workload = WORKLOAD_PPLIB_POWER_SAVING_BIT;
+		break;
+	case PP_SMC_POWER_PROFILE_VIDEO:
+		pplib_workload = WORKLOAD_PPLIB_VIDEO_BIT;
+		break;
+	case PP_SMC_POWER_PROFILE_VR:
+		pplib_workload = WORKLOAD_PPLIB_VR_BIT;
+		break;
+	case PP_SMC_POWER_PROFILE_COMPUTE:
+		pplib_workload = WORKLOAD_PPLIB_COMPUTE_BIT;
+		break;
+	}
+
+	return pplib_workload;
+}
+
+static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
+{
+	uint32_t i, size = 0;
+	static const uint8_t
+		profile_mode_setting[6][4] = {{70, 60, 0, 0,},
+						{70, 60, 1, 3,},
+						{90, 60, 0, 0,},
+						{70, 60, 0, 0,},
+						{70, 90, 0, 0,},
+						{30, 60, 0, 6,},
+						};
+	static const char *profile_name[6] = {
+					"BOOTUP_DEFAULT",
+					"3D_FULL_SCREEN",
+					"POWER_SAVING",
+					"VIDEO",
+					"VR",
+					"COMPUTE"};
+	static const char *title[6] = {"NUM",
+			"MODE_NAME",
+			"BUSY_SET_POINT",
+			"FPS",
+			"USE_RLC_BUSY",
+			"MIN_ACTIVE_LEVEL"};
+
+	if (!buf)
+		return -EINVAL;
+
+	size += sprintf(buf + size, "%s %16s %s %s %s %s\n",title[0],
+			title[1], title[2], title[3], title[4], title[5]);
+
+	for (i = 0; i <= PP_SMC_POWER_PROFILE_COMPUTE; i++)
+		size += sprintf(buf + size, "%3d %14s%s: %14d %3d %10d %14d\n",
+			i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ",
+			profile_mode_setting[i][0], profile_mode_setting[i][1],
+			profile_mode_setting[i][2], profile_mode_setting[i][3]);
+
+	return size;
+}
+
+static int smu10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size)
+{
+	int workload_type = 0;
+
+	if (input[size] > PP_SMC_POWER_PROFILE_COMPUTE) {
+		pr_err("Invalid power profile mode %ld\n", input[size]);
+		return -EINVAL;
+	}
+	hwmgr->power_profile_mode = input[size];
+
+	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+	workload_type =
+		conv_power_profile_to_pplib_workload(hwmgr->power_profile_mode);
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ActiveProcessNotify,
+						1 << workload_type);
+
+	return 0;
+}
+
+
 static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
 	.backend_init = smu10_hwmgr_backend_init,
 	.backend_fini = smu10_hwmgr_backend_fini,
@@ -1241,6 +1319,8 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
 	.powergate_sdma = smu10_powergate_sdma,
 	.set_hard_min_dcefclk_by_freq = smu10_set_hard_min_dcefclk_by_freq,
 	.set_hard_min_fclk_by_freq = smu10_set_hard_min_fclk_by_freq,
+	.get_power_profile_mode = smu10_get_power_profile_mode,
+	.set_power_profile_mode = smu10_set_power_profile_mode,
 };
 
 int smu10_init_function_pointers(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 39a547084e90..9b9f87b84910 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3460,7 +3460,18 @@ static void vega20_power_gate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 		return ;
 
 	data->vce_power_gated = bgate;
-	vega20_enable_disable_vce_dpm(hwmgr, !bgate);
+	if (bgate) {
+		vega20_enable_disable_vce_dpm(hwmgr, !bgate);
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
+						AMD_IP_BLOCK_TYPE_VCE,
+						AMD_PG_STATE_GATE);
+	} else {
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
+						AMD_IP_BLOCK_TYPE_VCE,
+						AMD_PG_STATE_UNGATE);
+		vega20_enable_disable_vce_dpm(hwmgr, !bgate);
+	}
+
 }
 
 static void vega20_power_gate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
index a2991fa2e6f8..90879e4092a3 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
@@ -85,7 +85,6 @@
 #define PPSMC_MSG_SetRccPfcPmeRestoreRegister   0x36
 #define PPSMC_Message_Count                     0x37
 
-
 typedef uint16_t PPSMC_Result;
 typedef int      PPSMC_Msg;
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu10.h b/drivers/gpu/drm/amd/powerplay/inc/smu10.h
index 9e837a5014c5..b96520528240 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu10.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu10.h
@@ -136,12 +136,14 @@
 #define FEATURE_CORE_CSTATES_MASK     (1 << FEATURE_CORE_CSTATES_BIT)
 
 /* Workload bits */
-#define WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT 0
-#define WORKLOAD_PPLIB_VIDEO_BIT          2
-#define WORKLOAD_PPLIB_VR_BIT             3
-#define WORKLOAD_PPLIB_COMPUTE_BIT        4
-#define WORKLOAD_PPLIB_CUSTOM_BIT         5
-#define WORKLOAD_PPLIB_COUNT              6
+#define WORKLOAD_DEFAULT_BIT              0
+#define WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT 1
+#define WORKLOAD_PPLIB_POWER_SAVING_BIT   2
+#define WORKLOAD_PPLIB_VIDEO_BIT          3
+#define WORKLOAD_PPLIB_VR_BIT             4
+#define WORKLOAD_PPLIB_COMPUTE_BIT        5
+#define WORKLOAD_PPLIB_CUSTOM_BIT         6
+#define WORKLOAD_PPLIB_COUNT              7
 
 typedef struct {
 	/* MP1_EXT_SCRATCH0 */
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index c478b38662d0..92903a4cc4d8 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1896,8 +1896,13 @@ set_fan_speed_rpm_failed:
 static int smu_v11_0_set_xgmi_pstate(struct smu_context *smu,
 				     uint32_t pstate)
 {
-	/* send msg to SMU to set pstate */
-	return 0;
+	int ret = 0;
+	mutex_lock(&(smu->mutex));
+	ret = smu_send_smc_msg_with_param(smu,
+					  SMU_MSG_SetXgmiMode,
+					  pstate ? XGMI_STATE_D0 : XGMI_STATE_D3);
+	mutex_unlock(&(smu->mutex));
+	return ret;
 }
 
 static const struct smu_funcs smu_v11_0_funcs = {
diff --git a/drivers/gpu/drm/arm/display/include/malidp_product.h b/drivers/gpu/drm/arm/display/include/malidp_product.h
index b35fc5db866b..1053b11352eb 100644
--- a/drivers/gpu/drm/arm/display/include/malidp_product.h
+++ b/drivers/gpu/drm/arm/display/include/malidp_product.h
@@ -20,4 +20,16 @@
 /* Mali-display product IDs */
 #define MALIDP_D71_PRODUCT_ID   0x0071
 
+union komeda_config_id {
+	struct {
+		__u32	max_line_sz:16,
+			n_pipelines:2,
+			n_scalers:2, /* number of scalers per pipeline */
+			n_layers:3, /* number of layers per pipeline */
+			n_richs:3, /* number of rich layers per pipeline */
+			reserved_bits:6;
+	};
+	__u32 value;
+};
+
 #endif /* _MALIDP_PRODUCT_H_ */
diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile
index d593125236ae..412eeba8c39f 100644
--- a/drivers/gpu/drm/arm/display/komeda/Makefile
+++ b/drivers/gpu/drm/arm/display/komeda/Makefile
@@ -1,14 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ccflags-y := \
-	-I$(src)/../include \
-	-I$(src)
+	-I $(srctree)/$(src)/../include \
+	-I $(srctree)/$(src)
 
 komeda-y := \
 	komeda_drv.o \
 	komeda_dev.o \
 	komeda_format_caps.o \
 	komeda_pipeline.o \
+	komeda_pipeline_state.o \
 	komeda_framebuffer.o \
 	komeda_kms.o \
 	komeda_crtc.o \
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c b/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
index c56cfc2de147..031e5f305a3c 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
@@ -391,7 +391,7 @@ static void d71_compiz_dump(struct komeda_component *c, struct seq_file *sf)
 	seq_printf(sf, "CU_USER_HIGH:\t\t0x%X\n", v[1]);
 }
 
-struct komeda_component_funcs d71_compiz_funcs = {
+static struct komeda_component_funcs d71_compiz_funcs = {
 	.update		= d71_compiz_update,
 	.disable	= d71_component_disable,
 	.dump_register	= d71_compiz_dump,
@@ -467,7 +467,7 @@ static void d71_improc_dump(struct komeda_component *c, struct seq_file *sf)
 		seq_printf(sf, "IPS_RGB_YUV_COEFF%u:\t0x%X\n", i, v[i]);
 }
 
-struct komeda_component_funcs d71_improc_funcs = {
+static struct komeda_component_funcs d71_improc_funcs = {
 	.update		= d71_improc_update,
 	.disable	= d71_component_disable,
 	.dump_register	= d71_improc_dump,
@@ -543,7 +543,8 @@ static void d71_timing_ctrlr_update(struct komeda_component *c,
 	malidp_write32(reg, BLK_CONTROL, value);
 }
 
-void d71_timing_ctrlr_dump(struct komeda_component *c, struct seq_file *sf)
+static void d71_timing_ctrlr_dump(struct komeda_component *c,
+				  struct seq_file *sf)
 {
 	u32 v[8], i;
 
@@ -579,7 +580,7 @@ void d71_timing_ctrlr_dump(struct komeda_component *c, struct seq_file *sf)
 	seq_printf(sf, "BS_USER:\t\t0x%X\n", v[4]);
 }
 
-struct komeda_component_funcs d71_timing_ctrlr_funcs = {
+static struct komeda_component_funcs d71_timing_ctrlr_funcs = {
 	.update		= d71_timing_ctrlr_update,
 	.disable	= d71_timing_ctrlr_disable,
 	.dump_register	= d71_timing_ctrlr_dump,
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
index 72631d673f85..34506ef7ad40 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
@@ -243,6 +243,56 @@ static int d71_disable_irq(struct komeda_dev *mdev)
 	return 0;
 }
 
+static void d71_on_off_vblank(struct komeda_dev *mdev, int master_pipe, bool on)
+{
+	struct d71_dev *d71 = mdev->chip_data;
+	struct d71_pipeline *pipe = d71->pipes[master_pipe];
+
+	malidp_write32_mask(pipe->dou_addr, BLK_IRQ_MASK,
+			    DOU_IRQ_PL0, on ? DOU_IRQ_PL0 : 0);
+}
+
+static int to_d71_opmode(int core_mode)
+{
+	switch (core_mode) {
+	case KOMEDA_MODE_DISP0:
+		return DO0_ACTIVE_MODE;
+	case KOMEDA_MODE_DISP1:
+		return DO1_ACTIVE_MODE;
+	case KOMEDA_MODE_DUAL_DISP:
+		return DO01_ACTIVE_MODE;
+	case KOMEDA_MODE_INACTIVE:
+		return INACTIVE_MODE;
+	default:
+		WARN(1, "Unknown operation mode");
+		return INACTIVE_MODE;
+	}
+}
+
+static int d71_change_opmode(struct komeda_dev *mdev, int new_mode)
+{
+	struct d71_dev *d71 = mdev->chip_data;
+	u32 opmode = to_d71_opmode(new_mode);
+	int ret;
+
+	malidp_write32_mask(d71->gcu_addr, BLK_CONTROL, 0x7, opmode);
+
+	ret = dp_wait_cond(((malidp_read32(d71->gcu_addr, BLK_CONTROL) & 0x7) == opmode),
+			   100, 1000, 10000);
+
+	return ret > 0 ? 0 : -ETIMEDOUT;
+}
+
+static void d71_flush(struct komeda_dev *mdev,
+		      int master_pipe, u32 active_pipes)
+{
+	struct d71_dev *d71 = mdev->chip_data;
+	u32 reg_offset = (master_pipe == 0) ?
+			 GCU_CONFIG_VALID0 : GCU_CONFIG_VALID1;
+
+	malidp_write32(d71->gcu_addr, reg_offset, GCU_CONFIG_CVAL);
+}
+
 static int d71_reset(struct d71_dev *d71)
 {
 	u32 __iomem *gcu = d71->gcu_addr;
@@ -459,6 +509,9 @@ static struct komeda_dev_funcs d71_chip_funcs = {
 	.irq_handler	= d71_irq_handler,
 	.enable_irq	= d71_enable_irq,
 	.disable_irq	= d71_disable_irq,
+	.on_off_vblank	= d71_on_off_vblank,
+	.change_opmode	= d71_change_opmode,
+	.flush		= d71_flush,
 };
 
 struct komeda_dev_funcs *
@@ -467,6 +520,7 @@ d71_identify(u32 __iomem *reg_base, struct komeda_chip_info *chip)
 	chip->arch_id	= malidp_read32(reg_base, GLB_ARCH_ID);
 	chip->core_id	= malidp_read32(reg_base, GLB_CORE_ID);
 	chip->core_info	= malidp_read32(reg_base, GLB_CORE_INFO);
+	chip->bus_width	= D71_BUS_WIDTH_16_BYTES;
 
 	return &d71_chip_funcs;
 }
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
index f88a14927be9..62fad59f5a6a 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
@@ -18,6 +18,144 @@
 #include "komeda_dev.h"
 #include "komeda_kms.h"
 
+/**
+ * komeda_crtc_atomic_check - build display output data flow
+ * @crtc: DRM crtc
+ * @state: the crtc state object
+ *
+ * crtc_atomic_check is the final check stage, so beside build a display data
+ * pipeline according to the crtc_state, but still needs to release or disable
+ * the unclaimed pipeline resources.
+ *
+ * RETURNS:
+ * Zero for success or -errno
+ */
+static int
+komeda_crtc_atomic_check(struct drm_crtc *crtc,
+			 struct drm_crtc_state *state)
+{
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+	struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(state);
+	int err;
+
+	if (state->active) {
+		err = komeda_build_display_data_flow(kcrtc, kcrtc_st);
+		if (err)
+			return err;
+	}
+
+	/* release unclaimed pipeline resources */
+	err = komeda_release_unclaimed_resources(kcrtc->master, kcrtc_st);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static u32 komeda_calc_mclk(struct komeda_crtc_state *kcrtc_st)
+{
+	unsigned long mclk = kcrtc_st->base.adjusted_mode.clock * 1000;
+
+	return mclk;
+}
+
+/* For active a crtc, mainly need two parts of preparation
+ * 1. adjust display operation mode.
+ * 2. enable needed clk
+ */
+static int
+komeda_crtc_prepare(struct komeda_crtc *kcrtc)
+{
+	struct komeda_dev *mdev = kcrtc->base.dev->dev_private;
+	struct komeda_pipeline *master = kcrtc->master;
+	struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(kcrtc->base.state);
+	unsigned long pxlclk_rate = kcrtc_st->base.adjusted_mode.clock * 1000;
+	u32 new_mode;
+	int err;
+
+	mutex_lock(&mdev->lock);
+
+	new_mode = mdev->dpmode | BIT(master->id);
+	if (WARN_ON(new_mode == mdev->dpmode)) {
+		err = 0;
+		goto unlock;
+	}
+
+	err = mdev->funcs->change_opmode(mdev, new_mode);
+	if (err) {
+		DRM_ERROR("failed to change opmode: 0x%x -> 0x%x.\n,",
+			  mdev->dpmode, new_mode);
+		goto unlock;
+	}
+
+	mdev->dpmode = new_mode;
+	/* Only need to enable mclk on single display mode, but no need to
+	 * enable mclk it on dual display mode, since the dual mode always
+	 * switch from single display mode, the mclk already enabled, no need
+	 * to enable it again.
+	 */
+	if (new_mode != KOMEDA_MODE_DUAL_DISP) {
+		err = clk_set_rate(mdev->mclk, komeda_calc_mclk(kcrtc_st));
+		if (err)
+			DRM_ERROR("failed to set mclk.\n");
+		err = clk_prepare_enable(mdev->mclk);
+		if (err)
+			DRM_ERROR("failed to enable mclk.\n");
+	}
+
+	err = clk_prepare_enable(master->aclk);
+	if (err)
+		DRM_ERROR("failed to enable axi clk for pipe%d.\n", master->id);
+	err = clk_set_rate(master->pxlclk, pxlclk_rate);
+	if (err)
+		DRM_ERROR("failed to set pxlclk for pipe%d\n", master->id);
+	err = clk_prepare_enable(master->pxlclk);
+	if (err)
+		DRM_ERROR("failed to enable pxl clk for pipe%d.\n", master->id);
+
+unlock:
+	mutex_unlock(&mdev->lock);
+
+	return err;
+}
+
+static int
+komeda_crtc_unprepare(struct komeda_crtc *kcrtc)
+{
+	struct komeda_dev *mdev = kcrtc->base.dev->dev_private;
+	struct komeda_pipeline *master = kcrtc->master;
+	u32 new_mode;
+	int err;
+
+	mutex_lock(&mdev->lock);
+
+	new_mode = mdev->dpmode & (~BIT(master->id));
+
+	if (WARN_ON(new_mode == mdev->dpmode)) {
+		err = 0;
+		goto unlock;
+	}
+
+	err = mdev->funcs->change_opmode(mdev, new_mode);
+	if (err) {
+		DRM_ERROR("failed to change opmode: 0x%x -> 0x%x.\n,",
+			  mdev->dpmode, new_mode);
+		goto unlock;
+	}
+
+	mdev->dpmode = new_mode;
+
+	clk_disable_unprepare(master->pxlclk);
+	clk_disable_unprepare(master->aclk);
+	if (new_mode == KOMEDA_MODE_INACTIVE)
+		clk_disable_unprepare(mdev->mclk);
+
+unlock:
+	mutex_unlock(&mdev->lock);
+
+	return err;
+}
+
 void komeda_crtc_handle_event(struct komeda_crtc   *kcrtc,
 			      struct komeda_events *evts)
 {
@@ -31,15 +169,264 @@ void komeda_crtc_handle_event(struct komeda_crtc   *kcrtc,
 	if (events & KOMEDA_EVENT_EOW)
 		DRM_DEBUG("EOW.\n");
 
-	/* will handle it with crtc->flush */
-	if (events & KOMEDA_EVENT_FLIP)
-		DRM_DEBUG("FLIP Done.\n");
+	if (events & KOMEDA_EVENT_FLIP) {
+		unsigned long flags;
+		struct drm_pending_vblank_event *event;
+
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+		if (kcrtc->disable_done) {
+			complete_all(kcrtc->disable_done);
+			kcrtc->disable_done = NULL;
+		} else if (crtc->state->event) {
+			event = crtc->state->event;
+			/*
+			 * Consume event before notifying drm core that flip
+			 * happened.
+			 */
+			crtc->state->event = NULL;
+			drm_crtc_send_vblank_event(crtc, event);
+		} else {
+			DRM_WARN("CRTC[%d]: FLIP happen but no pending commit.\n",
+				 drm_crtc_index(&kcrtc->base));
+		}
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	}
+}
+
+static void
+komeda_crtc_do_flush(struct drm_crtc *crtc,
+		     struct drm_crtc_state *old)
+{
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+	struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(crtc->state);
+	struct komeda_dev *mdev = kcrtc->base.dev->dev_private;
+	struct komeda_pipeline *master = kcrtc->master;
+
+	DRM_DEBUG_ATOMIC("CRTC%d_FLUSH: active_pipes: 0x%x, affected: 0x%x.\n",
+			 drm_crtc_index(crtc),
+			 kcrtc_st->active_pipes, kcrtc_st->affected_pipes);
+
+	/* step 1: update the pipeline/component state to HW */
+	if (has_bit(master->id, kcrtc_st->affected_pipes))
+		komeda_pipeline_update(master, old->state);
+
+	/* step 2: notify the HW to kickoff the update */
+	mdev->funcs->flush(mdev, master->id, kcrtc_st->active_pipes);
 }
 
-struct drm_crtc_helper_funcs komeda_crtc_helper_funcs = {
+static void
+komeda_crtc_atomic_enable(struct drm_crtc *crtc,
+			  struct drm_crtc_state *old)
+{
+	komeda_crtc_prepare(to_kcrtc(crtc));
+	drm_crtc_vblank_on(crtc);
+	komeda_crtc_do_flush(crtc, old);
+}
+
+static void
+komeda_crtc_atomic_disable(struct drm_crtc *crtc,
+			   struct drm_crtc_state *old)
+{
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+	struct komeda_crtc_state *old_st = to_kcrtc_st(old);
+	struct komeda_dev *mdev = crtc->dev->dev_private;
+	struct komeda_pipeline *master = kcrtc->master;
+	struct completion *disable_done = &crtc->state->commit->flip_done;
+	struct completion temp;
+	int timeout;
+
+	DRM_DEBUG_ATOMIC("CRTC%d_DISABLE: active_pipes: 0x%x, affected: 0x%x.\n",
+			 drm_crtc_index(crtc),
+			 old_st->active_pipes, old_st->affected_pipes);
+
+	if (has_bit(master->id, old_st->active_pipes))
+		komeda_pipeline_disable(master, old->state);
+
+	/* crtc_disable has two scenarios according to the state->active switch.
+	 * 1. active -> inactive
+	 *    this commit is a disable commit. and the commit will be finished
+	 *    or done after the disable operation. on this case we can directly
+	 *    use the crtc->state->event to tracking the HW disable operation.
+	 * 2. active -> active
+	 *    the crtc->commit is not for disable, but a modeset operation when
+	 *    crtc is active, such commit actually has been completed by 3
+	 *    DRM operations:
+	 *    crtc_disable, update_planes(crtc_flush), crtc_enable
+	 *    so on this case the crtc->commit is for the whole process.
+	 *    we can not use it for tracing the disable, we need a temporary
+	 *    flip_done for tracing the disable. and crtc->state->event for
+	 *    the crtc_enable operation.
+	 *    That's also the reason why skip modeset commit in
+	 *    komeda_crtc_atomic_flush()
+	 */
+	if (crtc->state->active) {
+		struct komeda_pipeline_state *pipe_st;
+		/* clear the old active_comps to zero */
+		pipe_st = komeda_pipeline_get_old_state(master, old->state);
+		pipe_st->active_comps = 0;
+
+		init_completion(&temp);
+		kcrtc->disable_done = &temp;
+		disable_done = &temp;
+	}
+
+	mdev->funcs->flush(mdev, master->id, 0);
+
+	/* wait the disable take affect.*/
+	timeout = wait_for_completion_timeout(disable_done, HZ);
+	if (timeout == 0) {
+		DRM_ERROR("disable pipeline%d timeout.\n", kcrtc->master->id);
+		if (crtc->state->active) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&crtc->dev->event_lock, flags);
+			kcrtc->disable_done = NULL;
+			spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+		}
+	}
+
+	drm_crtc_vblank_off(crtc);
+	komeda_crtc_unprepare(kcrtc);
+}
+
+static void
+komeda_crtc_atomic_flush(struct drm_crtc *crtc,
+			 struct drm_crtc_state *old)
+{
+	/* commit with modeset will be handled in enable/disable */
+	if (drm_atomic_crtc_needs_modeset(crtc->state))
+		return;
+
+	komeda_crtc_do_flush(crtc, old);
+}
+
+static enum drm_mode_status
+komeda_crtc_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *m)
+{
+	struct komeda_dev *mdev = crtc->dev->dev_private;
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+	struct komeda_pipeline *master = kcrtc->master;
+	long mode_clk, pxlclk;
+
+	if (m->flags & DRM_MODE_FLAG_INTERLACE)
+		return MODE_NO_INTERLACE;
+
+	/* main clock/AXI clk must be faster than pxlclk*/
+	mode_clk = m->clock * 1000;
+	pxlclk = clk_round_rate(master->pxlclk, mode_clk);
+	if (pxlclk != mode_clk) {
+		DRM_DEBUG_ATOMIC("pxlclk doesn't support %ld Hz\n", mode_clk);
+
+		return MODE_NOCLOCK;
+	}
+
+	if (clk_round_rate(mdev->mclk, mode_clk) < pxlclk) {
+		DRM_DEBUG_ATOMIC("mclk can't satisfy the requirement of %s-clk: %ld.\n",
+				 m->name, pxlclk);
+
+		return MODE_CLOCK_HIGH;
+	}
+
+	if (clk_round_rate(master->aclk, mode_clk) < pxlclk) {
+		DRM_DEBUG_ATOMIC("aclk can't satisfy the requirement of %s-clk: %ld.\n",
+				 m->name, pxlclk);
+
+		return MODE_CLOCK_HIGH;
+	}
+
+	return MODE_OK;
+}
+
+static bool komeda_crtc_mode_fixup(struct drm_crtc *crtc,
+				   const struct drm_display_mode *m,
+				   struct drm_display_mode *adjusted_mode)
+{
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+	struct komeda_pipeline *master = kcrtc->master;
+	long mode_clk = m->clock * 1000;
+
+	adjusted_mode->clock = clk_round_rate(master->pxlclk, mode_clk) / 1000;
+
+	return true;
+}
+
+static struct drm_crtc_helper_funcs komeda_crtc_helper_funcs = {
+	.atomic_check	= komeda_crtc_atomic_check,
+	.atomic_flush	= komeda_crtc_atomic_flush,
+	.atomic_enable	= komeda_crtc_atomic_enable,
+	.atomic_disable	= komeda_crtc_atomic_disable,
+	.mode_valid	= komeda_crtc_mode_valid,
+	.mode_fixup	= komeda_crtc_mode_fixup,
 };
 
+static void komeda_crtc_reset(struct drm_crtc *crtc)
+{
+	struct komeda_crtc_state *state;
+
+	if (crtc->state)
+		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+
+	kfree(to_kcrtc_st(crtc->state));
+	crtc->state = NULL;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state) {
+		crtc->state = &state->base;
+		crtc->state->crtc = crtc;
+	}
+}
+
+static struct drm_crtc_state *
+komeda_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
+{
+	struct komeda_crtc_state *old = to_kcrtc_st(crtc->state);
+	struct komeda_crtc_state *new;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &new->base);
+
+	new->affected_pipes = old->active_pipes;
+
+	return &new->base;
+}
+
+static void komeda_crtc_atomic_destroy_state(struct drm_crtc *crtc,
+					     struct drm_crtc_state *state)
+{
+	__drm_atomic_helper_crtc_destroy_state(state);
+	kfree(to_kcrtc_st(state));
+}
+
+static int komeda_crtc_vblank_enable(struct drm_crtc *crtc)
+{
+	struct komeda_dev *mdev = crtc->dev->dev_private;
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+
+	mdev->funcs->on_off_vblank(mdev, kcrtc->master->id, true);
+	return 0;
+}
+
+static void komeda_crtc_vblank_disable(struct drm_crtc *crtc)
+{
+	struct komeda_dev *mdev = crtc->dev->dev_private;
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+
+	mdev->funcs->on_off_vblank(mdev, kcrtc->master->id, false);
+}
+
 static const struct drm_crtc_funcs komeda_crtc_funcs = {
+	.gamma_set		= drm_atomic_helper_legacy_gamma_set,
+	.destroy		= drm_crtc_cleanup,
+	.set_config		= drm_atomic_helper_set_config,
+	.page_flip		= drm_atomic_helper_page_flip,
+	.reset			= komeda_crtc_reset,
+	.atomic_duplicate_state	= komeda_crtc_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_crtc_atomic_destroy_state,
+	.enable_vblank		= komeda_crtc_vblank_enable,
+	.disable_vblank		= komeda_crtc_vblank_disable,
 };
 
 int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms,
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
index 24548b87e182..ca3599e4a4d3 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
@@ -59,6 +59,48 @@ static void komeda_debugfs_init(struct komeda_dev *mdev)
 }
 #endif
 
+static ssize_t
+core_id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct komeda_dev *mdev = dev_to_mdev(dev);
+
+	return snprintf(buf, PAGE_SIZE, "0x%08x\n", mdev->chip.core_id);
+}
+static DEVICE_ATTR_RO(core_id);
+
+static ssize_t
+config_id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct komeda_dev *mdev = dev_to_mdev(dev);
+	struct komeda_pipeline *pipe = mdev->pipelines[0];
+	union komeda_config_id config_id;
+	int i;
+
+	memset(&config_id, 0, sizeof(config_id));
+
+	config_id.max_line_sz = pipe->layers[0]->hsize_in.end;
+	config_id.n_pipelines = mdev->n_pipelines;
+	config_id.n_scalers = pipe->n_scalers;
+	config_id.n_layers = pipe->n_layers;
+	config_id.n_richs = 0;
+	for (i = 0; i < pipe->n_layers; i++) {
+		if (pipe->layers[i]->layer_type == KOMEDA_FMT_RICH_LAYER)
+			config_id.n_richs++;
+	}
+	return snprintf(buf, PAGE_SIZE, "0x%08x\n", config_id.value);
+}
+static DEVICE_ATTR_RO(config_id);
+
+static struct attribute *komeda_sysfs_entries[] = {
+	&dev_attr_core_id.attr,
+	&dev_attr_config_id.attr,
+	NULL,
+};
+
+static struct attribute_group komeda_sysfs_attr_group = {
+	.attrs = komeda_sysfs_entries,
+};
+
 static int komeda_parse_pipe_dt(struct komeda_dev *mdev, struct device_node *np)
 {
 	struct komeda_pipeline *pipe;
@@ -151,6 +193,8 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 	if (!mdev)
 		return ERR_PTR(-ENOMEM);
 
+	mutex_init(&mdev->lock);
+
 	mdev->dev = dev;
 	mdev->reg_base = devm_ioremap_resource(dev, io_res);
 	if (IS_ERR(mdev->reg_base)) {
@@ -205,6 +249,12 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 		goto err_cleanup;
 	}
 
+	err = sysfs_create_group(&dev->kobj, &komeda_sysfs_attr_group);
+	if (err) {
+		DRM_ERROR("create sysfs group failed.\n");
+		goto err_cleanup;
+	}
+
 #ifdef CONFIG_DEBUG_FS
 	komeda_debugfs_init(mdev);
 #endif
@@ -222,6 +272,8 @@ void komeda_dev_destroy(struct komeda_dev *mdev)
 	struct komeda_dev_funcs *funcs = mdev->funcs;
 	int i;
 
+	sysfs_remove_group(&dev->kobj, &komeda_sysfs_attr_group);
+
 #ifdef CONFIG_DEBUG_FS
 	debugfs_remove_recursive(mdev->debugfs_root);
 #endif
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
index 8eae2620ce77..29e03c4e1ffc 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
@@ -103,9 +103,38 @@ struct komeda_dev_funcs {
 	int (*enable_irq)(struct komeda_dev *mdev);
 	/** @disable_irq: disable irq */
 	int (*disable_irq)(struct komeda_dev *mdev);
+	/** @on_off_vblank: notify HW to on/off vblank */
+	void (*on_off_vblank)(struct komeda_dev *mdev,
+			      int master_pipe, bool on);
 
 	/** @dump_register: Optional, dump registers to seq_file */
 	void (*dump_register)(struct komeda_dev *mdev, struct seq_file *seq);
+	/**
+	 * @change_opmode:
+	 *
+	 * Notify HW to switch to a new display operation mode.
+	 */
+	int (*change_opmode)(struct komeda_dev *mdev, int new_mode);
+	/** @flush: Notify the HW to flush or kickoff the update */
+	void (*flush)(struct komeda_dev *mdev,
+		      int master_pipe, u32 active_pipes);
+};
+
+/*
+ * DISPLAY_MODE describes how many display been enabled, and which will be
+ * passed to CHIP by &komeda_dev_funcs->change_opmode(), then CHIP can do the
+ * pipeline resources assignment according to this usage hint.
+ * -   KOMEDA_MODE_DISP0: Only one display enabled, pipeline-0 work as master.
+ * -   KOMEDA_MODE_DISP1: Only one display enabled, pipeline-0 work as master.
+ * -   KOMEDA_MODE_DUAL_DISP: Dual display mode, both display has been enabled.
+ * And D71 supports assign two pipelines to one single display on mode
+ * KOMEDA_MODE_DISP0/DISP1
+ */
+enum {
+	KOMEDA_MODE_INACTIVE	= 0,
+	KOMEDA_MODE_DISP0	= BIT(0),
+	KOMEDA_MODE_DISP1	= BIT(1),
+	KOMEDA_MODE_DUAL_DISP	= KOMEDA_MODE_DISP0 | KOMEDA_MODE_DISP1,
 };
 
 /**
@@ -116,21 +145,31 @@ struct komeda_dev_funcs {
  * control-abilites of device.
  */
 struct komeda_dev {
+	/** @dev: the base device structure */
 	struct device *dev;
+	/** @reg_base: the base address of komeda io space */
 	u32 __iomem   *reg_base;
 
+	/** @chip: the basic chip information */
 	struct komeda_chip_info chip;
 	/** @fmt_tbl: initialized by &komeda_dev_funcs->init_format_table */
 	struct komeda_format_caps_table fmt_tbl;
 	/** @pclk: APB clock for register access */
 	struct clk *pclk;
-	/** @mck: HW main engine clk */
+	/** @mclk: HW main engine clk */
 	struct clk *mclk;
 
 	/** @irq: irq number */
 	int irq;
 
+	/** @lock: used to protect dpmode */
+	struct mutex lock;
+	/** @dpmode: current display mode */
+	u32 dpmode;
+
+	/** @n_pipelines: the number of pipe in @pipelines */
 	int n_pipelines;
+	/** @pipelines: the komeda pipelines */
 	struct komeda_pipeline *pipelines[KOMEDA_MAX_PIPELINES];
 
 	/** @funcs: chip funcs to access to HW */
@@ -143,6 +182,7 @@ struct komeda_dev {
 	 */
 	void *chip_data;
 
+	/** @debugfs_root: root directory of komeda debugfs */
 	struct dentry *debugfs_root;
 };
 
@@ -158,4 +198,6 @@ d71_identify(u32 __iomem *reg, struct komeda_chip_info *chip);
 struct komeda_dev *komeda_dev_create(struct device *dev);
 void komeda_dev_destroy(struct komeda_dev *mdev);
 
+struct komeda_dev *dev_to_mdev(struct device *dev);
+
 #endif /*_KOMEDA_DEV_H_*/
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
index 2bdd189b041d..cfa5068d9d1e 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
@@ -17,6 +17,13 @@ struct komeda_drv {
 	struct komeda_kms_dev *kms;
 };
 
+struct komeda_dev *dev_to_mdev(struct device *dev)
+{
+	struct komeda_drv *mdrv = dev_get_drvdata(dev);
+
+	return mdrv ? mdrv->mdev : NULL;
+}
+
 static void komeda_unbind(struct device *dev)
 {
 	struct komeda_drv *mdrv = dev_get_drvdata(dev);
@@ -120,7 +127,7 @@ static const struct komeda_product_data komeda_products[] = {
 	},
 };
 
-const struct of_device_id komeda_of_match[] = {
+static const struct of_device_id komeda_of_match[] = {
 	{ .compatible = "arm,mali-d71", .data = &komeda_products[MALI_D71], },
 	{},
 };
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h
index 0de2e4a2afd2..ea2fe190c1e3 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h
@@ -10,11 +10,16 @@
 #include <drm/drm_framebuffer.h>
 #include "komeda_format_caps.h"
 
-/** struct komeda_fb - entend drm_framebuffer with komeda attribute */
+/**
+ * struct komeda_fb - Entending drm_framebuffer with komeda attribute
+ */
 struct komeda_fb {
 	/** @base: &drm_framebuffer */
 	struct drm_framebuffer base;
-	/* @format_caps: &komeda_format_caps */
+	/**
+	 * @format_caps:
+	 * extends drm_format_info for komeda specific information
+	 */
 	const struct komeda_format_caps *format_caps;
 	/** @aligned_w: aligned frame buffer width */
 	u32 aligned_w;
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index b214edbfbbc6..86f6542afb40 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -26,10 +26,10 @@ static int komeda_gem_cma_dumb_create(struct drm_file *file,
 				      struct drm_device *dev,
 				      struct drm_mode_create_dumb *args)
 {
-	u32 alignment = 16; /* TODO get alignment from dev */
+	struct komeda_dev *mdev = dev->dev_private;
+	u32 pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
 
-	args->pitch = ALIGN(DIV_ROUND_UP(args->width * args->bpp, 8),
-			    alignment);
+	args->pitch = ALIGN(pitch, mdev->chip.bus_width);
 
 	return drm_gem_cma_dumb_create_internal(file, dev, args);
 }
@@ -100,9 +100,37 @@ static const struct drm_mode_config_helper_funcs komeda_mode_config_helpers = {
 	.atomic_commit_tail = komeda_kms_commit_tail,
 };
 
+static int komeda_kms_check(struct drm_device *dev,
+			    struct drm_atomic_state *state)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *old_crtc_st, *new_crtc_st;
+	int i, err;
+
+	err = drm_atomic_helper_check_modeset(dev, state);
+	if (err)
+		return err;
+
+	/* komeda need to re-calculate resource assumption in every commit
+	 * so need to add all affected_planes (even unchanged) to
+	 * drm_atomic_state.
+	 */
+	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_st, new_crtc_st, i) {
+		err = drm_atomic_add_affected_planes(state, crtc);
+		if (err)
+			return err;
+	}
+
+	err = drm_atomic_helper_check_planes(dev, state);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static const struct drm_mode_config_funcs komeda_mode_config_funcs = {
 	.fb_create		= komeda_fb_create,
-	.atomic_check		= drm_atomic_helper_check,
+	.atomic_check		= komeda_kms_check,
 	.atomic_commit		= drm_atomic_helper_commit,
 };
 
@@ -184,6 +212,7 @@ uninstall_irq:
 	drm_irq_uninstall(drm);
 cleanup_mode_config:
 	drm_mode_config_cleanup(drm);
+	komeda_kms_cleanup_private_objs(kms);
 free_kms:
 	kfree(kms);
 	return ERR_PTR(err);
@@ -198,7 +227,7 @@ void komeda_kms_detach(struct komeda_kms_dev *kms)
 	drm_dev_unregister(drm);
 	drm_irq_uninstall(drm);
 	component_unbind_all(mdev->dev, drm);
-	komeda_kms_cleanup_private_objs(mdev);
+	komeda_kms_cleanup_private_objs(kms);
 	drm_mode_config_cleanup(drm);
 	drm->dev_private = NULL;
 	drm_dev_put(drm);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
index 15ac8b85506c..ac3d9209b4d9 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
@@ -15,7 +15,9 @@
 #include <video/videomode.h>
 #include <video/display_timing.h>
 
-/** struct komeda_plane - komeda instance of drm_plane */
+/**
+ * struct komeda_plane - komeda instance of drm_plane
+ */
 struct komeda_plane {
 	/** @base: &drm_plane */
 	struct drm_plane base;
@@ -70,9 +72,14 @@ struct komeda_crtc {
 	 * merge into the master.
 	 */
 	struct komeda_pipeline *slave;
+
+	/** @disable_done: this flip_done is for tracing the disable */
+	struct completion *disable_done;
 };
 
-/** struct komeda_crtc_state */
+/**
+ * struct komeda_crtc_state
+ */
 struct komeda_crtc_state {
 	/** @base: &drm_crtc_state */
 	struct drm_crtc_state base;
@@ -80,7 +87,15 @@ struct komeda_crtc_state {
 	/* private properties */
 
 	/* computed state which are used by validate/check */
+	/**
+	 * @affected_pipes:
+	 * the affected pipelines in once display instance
+	 */
 	u32 affected_pipes;
+	/**
+	 * @active_pipes:
+	 * the active pipelines in once display instance
+	 */
 	u32 active_pipes;
 };
 
@@ -108,7 +123,7 @@ int komeda_kms_add_crtcs(struct komeda_kms_dev *kms, struct komeda_dev *mdev);
 int komeda_kms_add_planes(struct komeda_kms_dev *kms, struct komeda_dev *mdev);
 int komeda_kms_add_private_objs(struct komeda_kms_dev *kms,
 				struct komeda_dev *mdev);
-void komeda_kms_cleanup_private_objs(struct komeda_dev *mdev);
+void komeda_kms_cleanup_private_objs(struct komeda_kms_dev *kms);
 
 void komeda_crtc_handle_event(struct komeda_crtc   *kcrtc,
 			      struct komeda_events *evts);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
index 07398efc40f5..c379439c6194 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
@@ -62,7 +62,7 @@ void komeda_pipeline_destroy(struct komeda_dev *mdev,
 	devm_kfree(mdev->dev, pipe);
 }
 
-struct komeda_component **
+static struct komeda_component **
 komeda_pipeline_get_component_pos(struct komeda_pipeline *pipe, int id)
 {
 	struct komeda_dev *mdev = pipe->mdev;
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
index c30a790d0712..b1f813a349a4 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
@@ -90,32 +90,35 @@ struct komeda_component {
 	u32 __iomem *reg;
 	/** @id: component id */
 	u32 id;
-	/** @hw_ic: component hw id,
-	 *  which is initialized by chip and used by chip only
+	/**
+	 * @hw_id: component hw id,
+	 * which is initialized by chip and used by chip only
 	 */
 	u32 hw_id;
 
 	/**
 	 * @max_active_inputs:
-	 * @max_active_outpus:
+	 * @max_active_outputs:
 	 *
-	 * maximum number of inputs/outputs that can be active in the same time
+	 * maximum number of inputs/outputs that can be active at the same time
 	 * Note:
 	 * the number isn't the bit number of @supported_inputs or
 	 * @supported_outputs, but may be less than it, since component may not
 	 * support enabling all @supported_inputs/outputs at the same time.
 	 */
 	u8 max_active_inputs;
+	/** @max_active_outputs: maximum number of outputs */
 	u8 max_active_outputs;
 	/**
 	 * @supported_inputs:
 	 * @supported_outputs:
 	 *
-	 * bitmask of BIT(component->id) for the supported inputs/outputs
+	 * bitmask of BIT(component->id) for the supported inputs/outputs,
 	 * describes the possibilities of how a component is linked into a
 	 * pipeline.
 	 */
 	u32 supported_inputs;
+	/** @supported_outputs: bitmask of supported output componenet ids */
 	u32 supported_outputs;
 
 	/**
@@ -134,7 +137,8 @@ struct komeda_component {
 struct komeda_component_output {
 	/** @component: indicate which component the data comes from */
 	struct komeda_component *component;
-	/** @output_port:
+	/**
+	 * @output_port:
 	 * the output port of the &komeda_component_output.component
 	 */
 	u8 output_port;
@@ -150,11 +154,12 @@ struct komeda_component_output {
 struct komeda_component_state {
 	/** @obj: tracking component_state by drm_atomic_state */
 	struct drm_private_state obj;
+	/** @component: backpointer to the component */
 	struct komeda_component *component;
 	/**
 	 * @binding_user:
-	 * currently bound user, the user can be crtc/plane/wb_conn, which is
-	 * valid decided by @component and @inputs
+	 * currently bound user, the user can be @crtc, @plane or @wb_conn,
+	 * which is valid decided by @component and @inputs
 	 *
 	 * -  Layer: its user always is plane.
 	 * -  compiz/improc/timing_ctrlr: the user is crtc.
@@ -162,20 +167,24 @@ struct komeda_component_state {
 	 * -  scaler: plane when input is layer, wb_conn if input is compiz.
 	 */
 	union {
+		/** @crtc: backpointer for user crtc */
 		struct drm_crtc *crtc;
+		/** @plane: backpointer for user plane */
 		struct drm_plane *plane;
+		/** @wb_conn: backpointer for user wb_connector  */
 		struct drm_connector *wb_conn;
 		void *binding_user;
 	};
+
 	/**
 	 * @active_inputs:
 	 *
 	 * active_inputs is bitmask of @inputs index
 	 *
-	 * -  active_inputs = changed_active_inputs + unchanged_active_inputs
-	 * -  affected_inputs = old->active_inputs + new->active_inputs;
+	 * -  active_inputs = changed_active_inputs | unchanged_active_inputs
+	 * -  affected_inputs = old->active_inputs | new->active_inputs;
 	 * -  disabling_inputs = affected_inputs ^ active_inputs;
-	 * -  changed_inputs = disabling_inputs + changed_active_inputs;
+	 * -  changed_inputs = disabling_inputs | changed_active_inputs;
 	 *
 	 * NOTE:
 	 * changed_inputs doesn't include all active_input but only
@@ -183,7 +192,9 @@ struct komeda_component_state {
 	 * level for dirty update.
 	 */
 	u16 active_inputs;
+	/** @changed_active_inputs: bitmask of the changed @active_inputs */
 	u16 changed_active_inputs;
+	/** @affected_inputs: bitmask for affected @inputs */
 	u16 affected_inputs;
 	/**
 	 * @inputs:
@@ -278,6 +289,22 @@ struct komeda_timing_ctrlr_state {
 	struct komeda_component_state base;
 };
 
+/* Why define A separated structure but not use plane_state directly ?
+ * 1. Komeda supports layer_split which means a plane_state can be split and
+ *    handled by two layers, one layer only handle half of plane image.
+ * 2. Fix up the user properties according to HW's capabilities, like user
+ *    set rotation to R180, but HW only supports REFLECT_X+Y. the rot here is
+ *    after drm_rotation_simplify()
+ */
+struct komeda_data_flow_cfg {
+	struct komeda_component_output input;
+	u16 in_x, in_y, in_w, in_h;
+	u32 out_x, out_y, out_w, out_h;
+	u32 rot;
+	int blending_zorder;
+	u8 pixel_blend_mode, layer_alpha;
+};
+
 /** struct komeda_pipeline_funcs */
 struct komeda_pipeline_funcs {
 	/* dump_register: Optional, dump registers to seq_file */
@@ -303,14 +330,23 @@ struct komeda_pipeline {
 	int id;
 	/** @avail_comps: available components mask of pipeline */
 	u32 avail_comps;
+	/** @n_layers: the number of layer on @layers */
 	int n_layers;
+	/** @layers: the pipeline layers */
 	struct komeda_layer *layers[KOMEDA_PIPELINE_MAX_LAYERS];
+	/** @n_scalers: the number of scaler on @scalers */
 	int n_scalers;
+	/** @scalers: the pipeline scalers */
 	struct komeda_scaler *scalers[KOMEDA_PIPELINE_MAX_SCALERS];
+	/** @compiz: compositor */
 	struct komeda_compiz *compiz;
+	/** @wb_layer: writeback layer */
 	struct komeda_layer  *wb_layer;
+	/** @improc: post image processor */
 	struct komeda_improc *improc;
+	/** @ctrlr: timing controller */
 	struct komeda_timing_ctrlr *ctrlr;
+	/** @funcs: chip pipeline functions */
 	struct komeda_pipeline_funcs *funcs; /* private pipeline functions */
 
 	/** @of_node: pipeline dt node */
@@ -331,6 +367,7 @@ struct komeda_pipeline {
 struct komeda_pipeline_state {
 	/** @obj: tracking pipeline_state by drm_atomic_state */
 	struct drm_private_state obj;
+	/** @pipe: backpointer to the pipeline */
 	struct komeda_pipeline *pipe;
 	/** @crtc: currently bound crtc */
 	struct drm_crtc *crtc;
@@ -382,4 +419,26 @@ komeda_component_add(struct komeda_pipeline *pipe,
 void komeda_component_destroy(struct komeda_dev *mdev,
 			      struct komeda_component *c);
 
+struct komeda_plane_state;
+struct komeda_crtc_state;
+struct komeda_crtc;
+
+int komeda_build_layer_data_flow(struct komeda_layer *layer,
+				 struct komeda_plane_state *kplane_st,
+				 struct komeda_crtc_state *kcrtc_st,
+				 struct komeda_data_flow_cfg *dflow);
+int komeda_build_display_data_flow(struct komeda_crtc *kcrtc,
+				   struct komeda_crtc_state *kcrtc_st);
+
+int komeda_release_unclaimed_resources(struct komeda_pipeline *pipe,
+				       struct komeda_crtc_state *kcrtc_st);
+
+struct komeda_pipeline_state *
+komeda_pipeline_get_old_state(struct komeda_pipeline *pipe,
+			      struct drm_atomic_state *state);
+void komeda_pipeline_disable(struct komeda_pipeline *pipe,
+			     struct drm_atomic_state *old_state);
+void komeda_pipeline_update(struct komeda_pipeline *pipe,
+			    struct drm_atomic_state *old_state);
+
 #endif /* _KOMEDA_PIPELINE_H_*/
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
new file mode 100644
index 000000000000..36570d7dad61
--- /dev/null
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
@@ -0,0 +1,610 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * Author: James.Qian.Wang <james.qian.wang@arm.com>
+ *
+ */
+
+#include <drm/drm_print.h>
+#include <linux/clk.h>
+#include "komeda_dev.h"
+#include "komeda_kms.h"
+#include "komeda_pipeline.h"
+#include "komeda_framebuffer.h"
+
+static inline bool is_switching_user(void *old, void *new)
+{
+	if (!old || !new)
+		return false;
+
+	return old != new;
+}
+
+static struct komeda_pipeline_state *
+komeda_pipeline_get_state(struct komeda_pipeline *pipe,
+			  struct drm_atomic_state *state)
+{
+	struct drm_private_state *priv_st;
+
+	priv_st = drm_atomic_get_private_obj_state(state, &pipe->obj);
+	if (IS_ERR(priv_st))
+		return ERR_CAST(priv_st);
+
+	return priv_to_pipe_st(priv_st);
+}
+
+struct komeda_pipeline_state *
+komeda_pipeline_get_old_state(struct komeda_pipeline *pipe,
+			      struct drm_atomic_state *state)
+{
+	struct drm_private_state *priv_st;
+
+	priv_st = drm_atomic_get_old_private_obj_state(state, &pipe->obj);
+	if (priv_st)
+		return priv_to_pipe_st(priv_st);
+	return NULL;
+}
+
+static struct komeda_pipeline_state *
+komeda_pipeline_get_new_state(struct komeda_pipeline *pipe,
+			      struct drm_atomic_state *state)
+{
+	struct drm_private_state *priv_st;
+
+	priv_st = drm_atomic_get_new_private_obj_state(state, &pipe->obj);
+	if (priv_st)
+		return priv_to_pipe_st(priv_st);
+	return NULL;
+}
+
+/* Assign pipeline for crtc */
+static struct komeda_pipeline_state *
+komeda_pipeline_get_state_and_set_crtc(struct komeda_pipeline *pipe,
+				       struct drm_atomic_state *state,
+				       struct drm_crtc *crtc)
+{
+	struct komeda_pipeline_state *st;
+
+	st = komeda_pipeline_get_state(pipe, state);
+	if (IS_ERR(st))
+		return st;
+
+	if (is_switching_user(crtc, st->crtc)) {
+		DRM_DEBUG_ATOMIC("CRTC%d required pipeline%d is busy.\n",
+				 drm_crtc_index(crtc), pipe->id);
+		return ERR_PTR(-EBUSY);
+	}
+
+	/* pipeline only can be disabled when the it is free or unused */
+	if (!crtc && st->active_comps) {
+		DRM_DEBUG_ATOMIC("Disabling a busy pipeline:%d.\n", pipe->id);
+		return ERR_PTR(-EBUSY);
+	}
+
+	st->crtc = crtc;
+
+	if (crtc) {
+		struct komeda_crtc_state *kcrtc_st;
+
+		kcrtc_st = to_kcrtc_st(drm_atomic_get_new_crtc_state(state,
+								     crtc));
+
+		kcrtc_st->active_pipes |= BIT(pipe->id);
+		kcrtc_st->affected_pipes |= BIT(pipe->id);
+	}
+	return st;
+}
+
+static struct komeda_component_state *
+komeda_component_get_state(struct komeda_component *c,
+			   struct drm_atomic_state *state)
+{
+	struct drm_private_state *priv_st;
+
+	WARN_ON(!drm_modeset_is_locked(&c->pipeline->obj.lock));
+
+	priv_st = drm_atomic_get_private_obj_state(state, &c->obj);
+	if (IS_ERR(priv_st))
+		return ERR_CAST(priv_st);
+
+	return priv_to_comp_st(priv_st);
+}
+
+static struct komeda_component_state *
+komeda_component_get_old_state(struct komeda_component *c,
+			       struct drm_atomic_state *state)
+{
+	struct drm_private_state *priv_st;
+
+	priv_st = drm_atomic_get_old_private_obj_state(state, &c->obj);
+	if (priv_st)
+		return priv_to_comp_st(priv_st);
+	return NULL;
+}
+
+/**
+ * komeda_component_get_state_and_set_user()
+ *
+ * @c: component to get state and set user
+ * @state: global atomic state
+ * @user: direct user, the binding user
+ * @crtc: the CRTC user, the big boss :)
+ *
+ * This function accepts two users:
+ * -   The direct user: can be plane/crtc/wb_connector depends on component
+ * -   The big boss (CRTC)
+ * CRTC is the big boss (the final user), because all component resources
+ * eventually will be assigned to CRTC, like the layer will be binding to
+ * kms_plane, but kms plane will be binding to a CRTC eventually.
+ *
+ * The big boss (CRTC) is for pipeline assignment, since &komeda_component isn't
+ * independent and can be assigned to CRTC freely, but belongs to a specific
+ * pipeline, only pipeline can be shared between crtc, and pipeline as a whole
+ * (include all the internal components) assigned to a specific CRTC.
+ *
+ * So when set a user to komeda_component, need first to check the status of
+ * component->pipeline to see if the pipeline is available on this specific
+ * CRTC. if the pipeline is busy (assigned to another CRTC), even the required
+ * component is free, the component still cannot be assigned to the direct user.
+ */
+static struct komeda_component_state *
+komeda_component_get_state_and_set_user(struct komeda_component *c,
+					struct drm_atomic_state *state,
+					void *user,
+					struct drm_crtc *crtc)
+{
+	struct komeda_pipeline_state *pipe_st;
+	struct komeda_component_state *st;
+
+	/* First check if the pipeline is available */
+	pipe_st = komeda_pipeline_get_state_and_set_crtc(c->pipeline,
+							 state, crtc);
+	if (IS_ERR(pipe_st))
+		return ERR_CAST(pipe_st);
+
+	st = komeda_component_get_state(c, state);
+	if (IS_ERR(st))
+		return st;
+
+	/* check if the component has been occupied */
+	if (is_switching_user(user, st->binding_user)) {
+		DRM_DEBUG_ATOMIC("required %s is busy.\n", c->name);
+		return ERR_PTR(-EBUSY);
+	}
+
+	st->binding_user = user;
+	/* mark the component as active if user is valid */
+	if (st->binding_user)
+		pipe_st->active_comps |= BIT(c->id);
+
+	return st;
+}
+
+static void
+komeda_component_add_input(struct komeda_component_state *state,
+			   struct komeda_component_output *input,
+			   int idx)
+{
+	struct komeda_component *c = state->component;
+
+	WARN_ON((idx < 0 || idx >= c->max_active_inputs));
+
+	/* since the inputs[i] is only valid when it is active. So if a input[i]
+	 * is a newly enabled input which switches from disable to enable, then
+	 * the old inputs[i] is undefined (NOT zeroed), we can not rely on
+	 * memcmp, but directly mark it changed
+	 */
+	if (!has_bit(idx, state->affected_inputs) ||
+	    memcmp(&state->inputs[idx], input, sizeof(*input))) {
+		memcpy(&state->inputs[idx], input, sizeof(*input));
+		state->changed_active_inputs |= BIT(idx);
+	}
+	state->active_inputs |= BIT(idx);
+	state->affected_inputs |= BIT(idx);
+}
+
+static int
+komeda_component_check_input(struct komeda_component_state *state,
+			     struct komeda_component_output *input,
+			     int idx)
+{
+	struct komeda_component *c = state->component;
+
+	if ((idx < 0) || (idx >= c->max_active_inputs)) {
+		DRM_DEBUG_ATOMIC("%s invalid input id: %d.\n", c->name, idx);
+		return -EINVAL;
+	}
+
+	if (has_bit(idx, state->active_inputs)) {
+		DRM_DEBUG_ATOMIC("%s required input_id: %d has been occupied already.\n",
+				 c->name, idx);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+komeda_component_set_output(struct komeda_component_output *output,
+			    struct komeda_component *comp,
+			    u8 output_port)
+{
+	output->component = comp;
+	output->output_port = output_port;
+}
+
+static int
+komeda_component_validate_private(struct komeda_component *c,
+				  struct komeda_component_state *st)
+{
+	int err;
+
+	if (!c->funcs->validate)
+		return 0;
+
+	err = c->funcs->validate(c, st);
+	if (err)
+		DRM_DEBUG_ATOMIC("%s validate private failed.\n", c->name);
+
+	return err;
+}
+
+static int
+komeda_layer_check_cfg(struct komeda_layer *layer,
+		       struct komeda_plane_state *kplane_st,
+		       struct komeda_data_flow_cfg *dflow)
+{
+	if (!in_range(&layer->hsize_in, dflow->in_w)) {
+		DRM_DEBUG_ATOMIC("src_w: %d is out of range.\n", dflow->in_w);
+		return -EINVAL;
+	}
+
+	if (!in_range(&layer->vsize_in, dflow->in_h)) {
+		DRM_DEBUG_ATOMIC("src_h: %d is out of range.\n", dflow->in_h);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+komeda_layer_validate(struct komeda_layer *layer,
+		      struct komeda_plane_state *kplane_st,
+		      struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_plane_state *plane_st = &kplane_st->base;
+	struct drm_framebuffer *fb = plane_st->fb;
+	struct komeda_fb *kfb = to_kfb(fb);
+	struct komeda_component_state *c_st;
+	struct komeda_layer_state *st;
+	int i, err;
+
+	err = komeda_layer_check_cfg(layer, kplane_st, dflow);
+	if (err)
+		return err;
+
+	c_st = komeda_component_get_state_and_set_user(&layer->base,
+			plane_st->state, plane_st->plane, plane_st->crtc);
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	st = to_layer_st(c_st);
+
+	st->rot = dflow->rot;
+	st->hsize = kfb->aligned_w;
+	st->vsize = kfb->aligned_h;
+
+	for (i = 0; i < fb->format->num_planes; i++)
+		st->addr[i] = komeda_fb_get_pixel_addr(kfb, dflow->in_x,
+						       dflow->in_y, i);
+
+	err = komeda_component_validate_private(&layer->base, c_st);
+	if (err)
+		return err;
+
+	/* update the data flow for the next stage */
+	komeda_component_set_output(&dflow->input, &layer->base, 0);
+
+	return 0;
+}
+
+static void pipeline_composition_size(struct komeda_crtc_state *kcrtc_st,
+				      u16 *hsize, u16 *vsize)
+{
+	struct drm_display_mode *m = &kcrtc_st->base.adjusted_mode;
+
+	if (hsize)
+		*hsize = m->hdisplay;
+	if (vsize)
+		*vsize = m->vdisplay;
+}
+
+static int
+komeda_compiz_set_input(struct komeda_compiz *compiz,
+			struct komeda_crtc_state *kcrtc_st,
+			struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_atomic_state *drm_st = kcrtc_st->base.state;
+	struct komeda_component_state *c_st, *old_st;
+	struct komeda_compiz_input_cfg *cin;
+	u16 compiz_w, compiz_h;
+	int idx = dflow->blending_zorder;
+
+	pipeline_composition_size(kcrtc_st, &compiz_w, &compiz_h);
+	/* check display rect */
+	if ((dflow->out_x + dflow->out_w > compiz_w) ||
+	    (dflow->out_y + dflow->out_h > compiz_h) ||
+	     dflow->out_w == 0 || dflow->out_h == 0) {
+		DRM_DEBUG_ATOMIC("invalid disp rect [x=%d, y=%d, w=%d, h=%d]\n",
+				 dflow->out_x, dflow->out_y,
+				 dflow->out_w, dflow->out_h);
+		return -EINVAL;
+	}
+
+	c_st = komeda_component_get_state_and_set_user(&compiz->base, drm_st,
+			kcrtc_st->base.crtc, kcrtc_st->base.crtc);
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	if (komeda_component_check_input(c_st, &dflow->input, idx))
+		return -EINVAL;
+
+	cin = &(to_compiz_st(c_st)->cins[idx]);
+
+	cin->hsize   = dflow->out_w;
+	cin->vsize   = dflow->out_h;
+	cin->hoffset = dflow->out_x;
+	cin->voffset = dflow->out_y;
+	cin->pixel_blend_mode = dflow->pixel_blend_mode;
+	cin->layer_alpha = dflow->layer_alpha;
+
+	old_st = komeda_component_get_old_state(&compiz->base, drm_st);
+	WARN_ON(!old_st);
+
+	/* compare with old to check if this input has been changed */
+	if (memcmp(&(to_compiz_st(old_st)->cins[idx]), cin, sizeof(*cin)))
+		c_st->changed_active_inputs |= BIT(idx);
+
+	komeda_component_add_input(c_st, &dflow->input, idx);
+
+	return 0;
+}
+
+static int
+komeda_compiz_validate(struct komeda_compiz *compiz,
+		       struct komeda_crtc_state *state,
+		       struct komeda_data_flow_cfg *dflow)
+{
+	struct komeda_component_state *c_st;
+	struct komeda_compiz_state *st;
+
+	c_st = komeda_component_get_state_and_set_user(&compiz->base,
+			state->base.state, state->base.crtc, state->base.crtc);
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	st = to_compiz_st(c_st);
+
+	pipeline_composition_size(state, &st->hsize, &st->vsize);
+
+	komeda_component_set_output(&dflow->input, &compiz->base, 0);
+
+	/* compiz output dflow will be fed to the next pipeline stage, prepare
+	 * the data flow configuration for the next stage
+	 */
+	if (dflow) {
+		dflow->in_w = st->hsize;
+		dflow->in_h = st->vsize;
+		dflow->out_w = dflow->in_w;
+		dflow->out_h = dflow->in_h;
+		/* the output data of compiz doesn't have alpha, it only can be
+		 * used as bottom layer when blend it with master layers
+		 */
+		dflow->pixel_blend_mode = DRM_MODE_BLEND_PIXEL_NONE;
+		dflow->layer_alpha = 0xFF;
+		dflow->blending_zorder = 0;
+	}
+
+	return 0;
+}
+
+static int
+komeda_improc_validate(struct komeda_improc *improc,
+		       struct komeda_crtc_state *kcrtc_st,
+		       struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_crtc *crtc = kcrtc_st->base.crtc;
+	struct komeda_component_state *c_st;
+	struct komeda_improc_state *st;
+
+	c_st = komeda_component_get_state_and_set_user(&improc->base,
+			kcrtc_st->base.state, crtc, crtc);
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	st = to_improc_st(c_st);
+
+	st->hsize = dflow->in_w;
+	st->vsize = dflow->in_h;
+
+	komeda_component_add_input(&st->base, &dflow->input, 0);
+	komeda_component_set_output(&dflow->input, &improc->base, 0);
+
+	return 0;
+}
+
+static int
+komeda_timing_ctrlr_validate(struct komeda_timing_ctrlr *ctrlr,
+			     struct komeda_crtc_state *kcrtc_st,
+			     struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_crtc *crtc = kcrtc_st->base.crtc;
+	struct komeda_timing_ctrlr_state *st;
+	struct komeda_component_state *c_st;
+
+	c_st = komeda_component_get_state_and_set_user(&ctrlr->base,
+			kcrtc_st->base.state, crtc, crtc);
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	st = to_ctrlr_st(c_st);
+
+	komeda_component_add_input(&st->base, &dflow->input, 0);
+	komeda_component_set_output(&dflow->input, &ctrlr->base, 0);
+
+	return 0;
+}
+
+int komeda_build_layer_data_flow(struct komeda_layer *layer,
+				 struct komeda_plane_state *kplane_st,
+				 struct komeda_crtc_state *kcrtc_st,
+				 struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_plane *plane = kplane_st->base.plane;
+	struct komeda_pipeline *pipe = layer->base.pipeline;
+	int err;
+
+	DRM_DEBUG_ATOMIC("%s handling [PLANE:%d:%s]: src[x/y:%d/%d, w/h:%d/%d] disp[x/y:%d/%d, w/h:%d/%d]",
+			 layer->base.name, plane->base.id, plane->name,
+			 dflow->in_x, dflow->in_y, dflow->in_w, dflow->in_h,
+			 dflow->out_x, dflow->out_y, dflow->out_w, dflow->out_h);
+
+	err = komeda_layer_validate(layer, kplane_st, dflow);
+	if (err)
+		return err;
+
+	err = komeda_compiz_set_input(pipe->compiz, kcrtc_st, dflow);
+
+	return err;
+}
+
+/* build display output data flow, the data path is:
+ * compiz -> improc -> timing_ctrlr
+ */
+int komeda_build_display_data_flow(struct komeda_crtc *kcrtc,
+				   struct komeda_crtc_state *kcrtc_st)
+{
+	struct komeda_pipeline *master = kcrtc->master;
+	struct komeda_data_flow_cfg m_dflow; /* master data flow */
+	int err;
+
+	memset(&m_dflow, 0, sizeof(m_dflow));
+
+	err = komeda_compiz_validate(master->compiz, kcrtc_st, &m_dflow);
+	if (err)
+		return err;
+
+	err = komeda_improc_validate(master->improc, kcrtc_st, &m_dflow);
+	if (err)
+		return err;
+
+	err = komeda_timing_ctrlr_validate(master->ctrlr, kcrtc_st, &m_dflow);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void
+komeda_pipeline_unbound_components(struct komeda_pipeline *pipe,
+				   struct komeda_pipeline_state *new)
+{
+	struct drm_atomic_state *drm_st = new->obj.state;
+	struct komeda_pipeline_state *old = priv_to_pipe_st(pipe->obj.state);
+	struct komeda_component_state *c_st;
+	struct komeda_component *c;
+	u32 disabling_comps, id;
+
+	WARN_ON(!old);
+
+	disabling_comps = (~new->active_comps) & old->active_comps;
+
+	/* unbound all disabling component */
+	dp_for_each_set_bit(id, disabling_comps) {
+		c = komeda_pipeline_get_component(pipe, id);
+		c_st = komeda_component_get_state_and_set_user(c,
+				drm_st, NULL, new->crtc);
+		WARN_ON(IS_ERR(c_st));
+	}
+}
+
+/* release unclaimed pipeline resource */
+int komeda_release_unclaimed_resources(struct komeda_pipeline *pipe,
+				       struct komeda_crtc_state *kcrtc_st)
+{
+	struct drm_atomic_state *drm_st = kcrtc_st->base.state;
+	struct komeda_pipeline_state *st;
+
+	/* ignore the pipeline which is not affected */
+	if (!pipe || !has_bit(pipe->id, kcrtc_st->affected_pipes))
+		return 0;
+
+	if (has_bit(pipe->id, kcrtc_st->active_pipes))
+		st = komeda_pipeline_get_new_state(pipe, drm_st);
+	else
+		st = komeda_pipeline_get_state_and_set_crtc(pipe, drm_st, NULL);
+
+	if (WARN_ON(IS_ERR_OR_NULL(st)))
+		return -EINVAL;
+
+	komeda_pipeline_unbound_components(pipe, st);
+
+	return 0;
+}
+
+void komeda_pipeline_disable(struct komeda_pipeline *pipe,
+			     struct drm_atomic_state *old_state)
+{
+	struct komeda_pipeline_state *old;
+	struct komeda_component *c;
+	struct komeda_component_state *c_st;
+	u32 id, disabling_comps = 0;
+
+	old = komeda_pipeline_get_old_state(pipe, old_state);
+
+	disabling_comps = old->active_comps;
+	DRM_DEBUG_ATOMIC("PIPE%d: disabling_comps: 0x%x.\n",
+			 pipe->id, disabling_comps);
+
+	dp_for_each_set_bit(id, disabling_comps) {
+		c = komeda_pipeline_get_component(pipe, id);
+		c_st = priv_to_comp_st(c->obj.state);
+
+		/*
+		 * If we disabled a component then all active_inputs should be
+		 * put in the list of changed_active_inputs, so they get
+		 * re-enabled.
+		 * This usually happens during a modeset when the pipeline is
+		 * first disabled and then the actual state gets committed
+		 * again.
+		 */
+		c_st->changed_active_inputs |= c_st->active_inputs;
+
+		c->funcs->disable(c);
+	}
+}
+
+void komeda_pipeline_update(struct komeda_pipeline *pipe,
+			    struct drm_atomic_state *old_state)
+{
+	struct komeda_pipeline_state *new = priv_to_pipe_st(pipe->obj.state);
+	struct komeda_pipeline_state *old;
+	struct komeda_component *c;
+	u32 id, changed_comps = 0;
+
+	old = komeda_pipeline_get_old_state(pipe, old_state);
+
+	changed_comps = new->active_comps | old->active_comps;
+
+	DRM_DEBUG_ATOMIC("PIPE%d: active_comps: 0x%x, changed: 0x%x.\n",
+			 pipe->id, new->active_comps, changed_comps);
+
+	dp_for_each_set_bit(id, changed_comps) {
+		c = komeda_pipeline_get_component(pipe, id);
+
+		if (new->active_comps & BIT(c->id))
+			c->funcs->update(c, priv_to_comp_st(c->obj.state));
+		else
+			c->funcs->disable(c);
+	}
+}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
index 0a4953a9a909..07ed0cc1bc44 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
@@ -7,10 +7,96 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_print.h>
 #include "komeda_dev.h"
 #include "komeda_kms.h"
 
+static int
+komeda_plane_init_data_flow(struct drm_plane_state *st,
+			    struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_framebuffer *fb = st->fb;
+
+	memset(dflow, 0, sizeof(*dflow));
+
+	dflow->blending_zorder = st->zpos;
+
+	/* if format doesn't have alpha, fix blend mode to PIXEL_NONE */
+	dflow->pixel_blend_mode = fb->format->has_alpha ?
+		st->pixel_blend_mode : DRM_MODE_BLEND_PIXEL_NONE;
+	dflow->layer_alpha = st->alpha >> 8;
+
+	dflow->out_x = st->crtc_x;
+	dflow->out_y = st->crtc_y;
+	dflow->out_w = st->crtc_w;
+	dflow->out_h = st->crtc_h;
+
+	dflow->in_x = st->src_x >> 16;
+	dflow->in_y = st->src_y >> 16;
+	dflow->in_w = st->src_w >> 16;
+	dflow->in_h = st->src_h >> 16;
+
+	return 0;
+}
+
+/**
+ * komeda_plane_atomic_check - build input data flow
+ * @plane: DRM plane
+ * @state: the plane state object
+ *
+ * RETURNS:
+ * Zero for success or -errno
+ */
+static int
+komeda_plane_atomic_check(struct drm_plane *plane,
+			  struct drm_plane_state *state)
+{
+	struct komeda_plane *kplane = to_kplane(plane);
+	struct komeda_plane_state *kplane_st = to_kplane_st(state);
+	struct komeda_layer *layer = kplane->layer;
+	struct drm_crtc_state *crtc_st;
+	struct komeda_crtc *kcrtc;
+	struct komeda_crtc_state *kcrtc_st;
+	struct komeda_data_flow_cfg dflow;
+	int err;
+
+	if (!state->crtc || !state->fb)
+		return 0;
+
+	crtc_st = drm_atomic_get_crtc_state(state->state, state->crtc);
+	if (!crtc_st->enable) {
+		DRM_DEBUG_ATOMIC("Cannot update plane on a disabled CRTC.\n");
+		return -EINVAL;
+	}
+
+	/* crtc is inactive, skip the resource assignment */
+	if (!crtc_st->active)
+		return 0;
+
+	kcrtc = to_kcrtc(state->crtc);
+	kcrtc_st = to_kcrtc_st(crtc_st);
+
+	err = komeda_plane_init_data_flow(state, &dflow);
+	if (err)
+		return err;
+
+	err = komeda_build_layer_data_flow(layer, kplane_st, kcrtc_st, &dflow);
+
+	return err;
+}
+
+/* plane doesn't represent a real HW, so there is no HW update for plane.
+ * komeda handles all the HW update in crtc->atomic_flush
+ */
+static void
+komeda_plane_atomic_update(struct drm_plane *plane,
+			   struct drm_plane_state *old_state)
+{
+}
+
 static const struct drm_plane_helper_funcs komeda_plane_helper_funcs = {
+	.atomic_check	= komeda_plane_atomic_check,
+	.atomic_update	= komeda_plane_atomic_update,
 };
 
 static void komeda_plane_destroy(struct drm_plane *plane)
@@ -20,7 +106,60 @@ static void komeda_plane_destroy(struct drm_plane *plane)
 	kfree(to_kplane(plane));
 }
 
+static void komeda_plane_reset(struct drm_plane *plane)
+{
+	struct komeda_plane_state *state;
+	struct komeda_plane *kplane = to_kplane(plane);
+
+	if (plane->state)
+		__drm_atomic_helper_plane_destroy_state(plane->state);
+
+	kfree(plane->state);
+	plane->state = NULL;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state) {
+		state->base.rotation = DRM_MODE_ROTATE_0;
+		state->base.pixel_blend_mode = DRM_MODE_BLEND_PREMULTI;
+		state->base.alpha = DRM_BLEND_ALPHA_OPAQUE;
+		state->base.zpos = kplane->layer->base.id;
+		plane->state = &state->base;
+		plane->state->plane = plane;
+	}
+}
+
+static struct drm_plane_state *
+komeda_plane_atomic_duplicate_state(struct drm_plane *plane)
+{
+	struct komeda_plane_state *new;
+
+	if (WARN_ON(!plane->state))
+		return NULL;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	__drm_atomic_helper_plane_duplicate_state(plane, &new->base);
+
+	return &new->base;
+}
+
+static void
+komeda_plane_atomic_destroy_state(struct drm_plane *plane,
+				  struct drm_plane_state *state)
+{
+	__drm_atomic_helper_plane_destroy_state(state);
+	kfree(to_kplane_st(state));
+}
+
 static const struct drm_plane_funcs komeda_plane_funcs = {
+	.update_plane		= drm_atomic_helper_update_plane,
+	.disable_plane		= drm_atomic_helper_disable_plane,
+	.destroy		= komeda_plane_destroy,
+	.reset			= komeda_plane_reset,
+	.atomic_duplicate_state	= komeda_plane_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_plane_atomic_destroy_state,
 };
 
 /* for komeda, which is pipeline can be share between crtcs */
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c b/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c
index f1c9e3fefa86..a54878cbd6e4 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c
@@ -7,6 +7,188 @@
 #include "komeda_dev.h"
 #include "komeda_kms.h"
 
+static void
+komeda_component_state_reset(struct komeda_component_state *st)
+{
+	st->binding_user = NULL;
+	st->affected_inputs = st->active_inputs;
+	st->active_inputs = 0;
+	st->changed_active_inputs = 0;
+}
+
+static struct drm_private_state *
+komeda_layer_atomic_duplicate_state(struct drm_private_obj *obj)
+{
+	struct komeda_layer_state *st;
+
+	st = kmemdup(obj->state, sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return NULL;
+
+	komeda_component_state_reset(&st->base);
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &st->base.obj);
+
+	return &st->base.obj;
+}
+
+static void
+komeda_layer_atomic_destroy_state(struct drm_private_obj *obj,
+				  struct drm_private_state *state)
+{
+	struct komeda_layer_state *st = to_layer_st(priv_to_comp_st(state));
+
+	kfree(st);
+}
+
+static const struct drm_private_state_funcs komeda_layer_obj_funcs = {
+	.atomic_duplicate_state	= komeda_layer_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_layer_atomic_destroy_state,
+};
+
+static int komeda_layer_obj_add(struct komeda_kms_dev *kms,
+				struct komeda_layer *layer)
+{
+	struct komeda_layer_state *st;
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	st->base.component = &layer->base;
+	drm_atomic_private_obj_init(&kms->base, &layer->base.obj, &st->base.obj,
+				    &komeda_layer_obj_funcs);
+	return 0;
+}
+
+static struct drm_private_state *
+komeda_compiz_atomic_duplicate_state(struct drm_private_obj *obj)
+{
+	struct komeda_compiz_state *st;
+
+	st = kmemdup(obj->state, sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return NULL;
+
+	komeda_component_state_reset(&st->base);
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &st->base.obj);
+
+	return &st->base.obj;
+}
+
+static void
+komeda_compiz_atomic_destroy_state(struct drm_private_obj *obj,
+				   struct drm_private_state *state)
+{
+	kfree(to_compiz_st(priv_to_comp_st(state)));
+}
+
+static const struct drm_private_state_funcs komeda_compiz_obj_funcs = {
+	.atomic_duplicate_state	= komeda_compiz_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_compiz_atomic_destroy_state,
+};
+
+static int komeda_compiz_obj_add(struct komeda_kms_dev *kms,
+				 struct komeda_compiz *compiz)
+{
+	struct komeda_compiz_state *st;
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	st->base.component = &compiz->base;
+	drm_atomic_private_obj_init(&kms->base, &compiz->base.obj, &st->base.obj,
+				    &komeda_compiz_obj_funcs);
+
+	return 0;
+}
+
+static struct drm_private_state *
+komeda_improc_atomic_duplicate_state(struct drm_private_obj *obj)
+{
+	struct komeda_improc_state *st;
+
+	st = kmemdup(obj->state, sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return NULL;
+
+	komeda_component_state_reset(&st->base);
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &st->base.obj);
+
+	return &st->base.obj;
+}
+
+static void
+komeda_improc_atomic_destroy_state(struct drm_private_obj *obj,
+				   struct drm_private_state *state)
+{
+	kfree(to_improc_st(priv_to_comp_st(state)));
+}
+
+static const struct drm_private_state_funcs komeda_improc_obj_funcs = {
+	.atomic_duplicate_state	= komeda_improc_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_improc_atomic_destroy_state,
+};
+
+static int komeda_improc_obj_add(struct komeda_kms_dev *kms,
+				 struct komeda_improc *improc)
+{
+	struct komeda_improc_state *st;
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	st->base.component = &improc->base;
+	drm_atomic_private_obj_init(&kms->base, &improc->base.obj, &st->base.obj,
+				    &komeda_improc_obj_funcs);
+
+	return 0;
+}
+
+static struct drm_private_state *
+komeda_timing_ctrlr_atomic_duplicate_state(struct drm_private_obj *obj)
+{
+	struct komeda_timing_ctrlr_state *st;
+
+	st = kmemdup(obj->state, sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return NULL;
+
+	komeda_component_state_reset(&st->base);
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &st->base.obj);
+
+	return &st->base.obj;
+}
+
+static void
+komeda_timing_ctrlr_atomic_destroy_state(struct drm_private_obj *obj,
+					 struct drm_private_state *state)
+{
+	kfree(to_ctrlr_st(priv_to_comp_st(state)));
+}
+
+static const struct drm_private_state_funcs komeda_timing_ctrlr_obj_funcs = {
+	.atomic_duplicate_state	= komeda_timing_ctrlr_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_timing_ctrlr_atomic_destroy_state,
+};
+
+static int komeda_timing_ctrlr_obj_add(struct komeda_kms_dev *kms,
+				       struct komeda_timing_ctrlr *ctrlr)
+{
+	struct komeda_compiz_state *st;
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	st->base.component = &ctrlr->base;
+	drm_atomic_private_obj_init(&kms->base, &ctrlr->base.obj, &st->base.obj,
+				    &komeda_timing_ctrlr_obj_funcs);
+
+	return 0;
+}
+
 static struct drm_private_state *
 komeda_pipeline_atomic_duplicate_state(struct drm_private_obj *obj)
 {
@@ -55,7 +237,7 @@ int komeda_kms_add_private_objs(struct komeda_kms_dev *kms,
 				struct komeda_dev *mdev)
 {
 	struct komeda_pipeline *pipe;
-	int i, err;
+	int i, j, err;
 
 	for (i = 0; i < mdev->n_pipelines; i++) {
 		pipe = mdev->pipelines[i];
@@ -64,25 +246,33 @@ int komeda_kms_add_private_objs(struct komeda_kms_dev *kms,
 		if (err)
 			return err;
 
-		/* Add component */
+		for (j = 0; j < pipe->n_layers; j++) {
+			err = komeda_layer_obj_add(kms, pipe->layers[j]);
+			if (err)
+				return err;
+		}
+
+		err = komeda_compiz_obj_add(kms, pipe->compiz);
+		if (err)
+			return err;
+
+		err = komeda_improc_obj_add(kms, pipe->improc);
+		if (err)
+			return err;
+
+		err = komeda_timing_ctrlr_obj_add(kms, pipe->ctrlr);
+		if (err)
+			return err;
 	}
 
 	return 0;
 }
 
-void komeda_kms_cleanup_private_objs(struct komeda_dev *mdev)
+void komeda_kms_cleanup_private_objs(struct komeda_kms_dev *kms)
 {
-	struct komeda_pipeline *pipe;
-	struct komeda_component *c;
-	int i, id;
+	struct drm_mode_config *config = &kms->base.mode_config;
+	struct drm_private_obj *obj, *next;
 
-	for (i = 0; i < mdev->n_pipelines; i++) {
-		pipe = mdev->pipelines[i];
-		dp_for_each_set_bit(id, pipe->avail_comps) {
-			c = komeda_pipeline_get_component(pipe, id);
-
-			drm_atomic_private_obj_fini(&c->obj);
-		}
-		drm_atomic_private_obj_fini(&pipe->obj);
-	}
+	list_for_each_entry_safe(obj, next, &config->privobj_list, head)
+		drm_atomic_private_obj_fini(obj);
 }
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 5eb40130fafb..f4924cb7f495 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -798,6 +798,50 @@ drm_atomic_get_private_obj_state(struct drm_atomic_state *state,
 EXPORT_SYMBOL(drm_atomic_get_private_obj_state);
 
 /**
+ * drm_atomic_get_old_private_obj_state
+ * @state: global atomic state object
+ * @obj: private_obj to grab
+ *
+ * This function returns the old private object state for the given private_obj,
+ * or NULL if the private_obj is not part of the global atomic state.
+ */
+struct drm_private_state *
+drm_atomic_get_old_private_obj_state(struct drm_atomic_state *state,
+				     struct drm_private_obj *obj)
+{
+	int i;
+
+	for (i = 0; i < state->num_private_objs; i++)
+		if (obj == state->private_objs[i].ptr)
+			return state->private_objs[i].old_state;
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_atomic_get_old_private_obj_state);
+
+/**
+ * drm_atomic_get_new_private_obj_state
+ * @state: global atomic state object
+ * @obj: private_obj to grab
+ *
+ * This function returns the new private object state for the given private_obj,
+ * or NULL if the private_obj is not part of the global atomic state.
+ */
+struct drm_private_state *
+drm_atomic_get_new_private_obj_state(struct drm_atomic_state *state,
+				     struct drm_private_obj *obj)
+{
+	int i;
+
+	for (i = 0; i < state->num_private_objs; i++)
+		if (obj == state->private_objs[i].ptr)
+			return state->private_objs[i].new_state;
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_atomic_get_new_private_obj_state);
+
+/**
  * drm_atomic_get_connector_state - get connector state
  * @state: global atomic state object
  * @connector: connector to get state object for
@@ -1236,4 +1280,3 @@ int drm_atomic_debugfs_init(struct drm_minor *minor)
 			minor->debugfs_root, minor);
 }
 #endif
-
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 2355124849db..b34c3d38bf15 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1416,12 +1416,6 @@ EXPORT_SYMBOL(drm_mode_create_scaling_mode_property);
  *
  *	The driver may place further restrictions within these minimum
  *	and maximum bounds.
- *
- *	The semantics for the vertical blank timestamp differ when
- *	variable refresh rate is active. The vertical blank timestamp
- *	is defined to be an estimate using the current mode's fixed
- *	refresh rate timings. The semantics for the page-flip event
- *	timestamp remain the same.
  */
 
 /**
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 2c22ea446075..649cfd8b4200 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -162,6 +162,25 @@ static const struct edid_quirk {
 	/* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/
 	{ "ETR", 13896, EDID_QUIRK_FORCE_8BPC },
 
+	/* Valve Index Headset */
+	{ "VLV", 0x91a8, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b0, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b1, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b2, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b3, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b4, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b5, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b6, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b7, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b8, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91b9, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91ba, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91bb, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91bc, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91bd, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91be, EDID_QUIRK_NON_DESKTOP },
+	{ "VLV", 0x91bf, EDID_QUIRK_NON_DESKTOP },
+
 	/* HTC Vive and Vive Pro VR Headsets */
 	{ "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP },
 	{ "HVR", 0xaa02, EDID_QUIRK_NON_DESKTOP },
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index fae4676707b6..50de138c89e0 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1372,8 +1372,8 @@ EXPORT_SYMBOL(drm_gem_unlock_reservations);
  * drm_gem_fence_array_add - Adds the fence to an array of fences to be
  * waited on, deduplicating fences from the same context.
  *
- * @fence_array array of dma_fence * for the job to block on.
- * @fence the dma_fence to add to the list of dependencies.
+ * @fence_array: array of dma_fence * for the job to block on.
+ * @fence: the dma_fence to add to the list of dependencies.
  *
  * Returns:
  * 0 on success, or an error on failing to expand the array.
@@ -1423,9 +1423,9 @@ EXPORT_SYMBOL(drm_gem_fence_array_add);
  * GEM objects used in the job but before updating the reservations with your
  * own fences.
  *
- * @fence_array array of dma_fence * for the job to block on.
- * @obj the gem object to add new dependencies from.
- * @write whether the job might write the object (so we need to depend on
+ * @fence_array: array of dma_fence * for the job to block on.
+ * @obj: the gem object to add new dependencies from.
+ * @write: whether the job might write the object (so we need to depend on
  * shared fences in the reservation object).
  */
 int drm_gem_fence_array_add_implicit(struct xarray *fence_array,
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index cc26625b4b33..e01ceed09e67 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -186,13 +186,13 @@ void drm_gem_cma_free_object(struct drm_gem_object *gem_obj)
 
 	cma_obj = to_drm_gem_cma_obj(gem_obj);
 
-	if (cma_obj->vaddr) {
-		dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
-			    cma_obj->vaddr, cma_obj->paddr);
-	} else if (gem_obj->import_attach) {
+	if (gem_obj->import_attach) {
 		if (cma_obj->vaddr)
 			dma_buf_vunmap(gem_obj->import_attach->dmabuf, cma_obj->vaddr);
 		drm_prime_gem_destroy(gem_obj, cma_obj->sgt);
+	} else if (cma_obj->vaddr) {
+		dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
+			    cma_obj->vaddr, cma_obj->paddr);
 	}
 
 	drm_gem_object_release(gem_obj);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 5878145077d0..2263e3ddd822 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -245,6 +245,9 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_
 	case DRM_CAP_SYNCOBJ:
 		req->value = drm_core_check_feature(dev, DRIVER_SYNCOBJ);
 		return 0;
+	case DRM_CAP_SYNCOBJ_TIMELINE:
+		req->value = drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE);
+		return 0;
 	}
 
 	/* Other caps only work with KMS drivers */
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index f3ceeb504e6c..361a01a08c18 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -740,8 +740,8 @@ drm_syncobj_transfer_ioctl(struct drm_device *dev, void *data,
 	struct drm_syncobj_transfer *args = data;
 	int ret;
 
-	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
-		return -ENODEV;
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
+		return -EOPNOTSUPP;
 
 	if (args->pad)
 		return -EINVAL;
@@ -1091,8 +1091,8 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 	struct drm_syncobj **syncobjs;
 	int ret = 0;
 
-	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
-		return -ENODEV;
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
+		return -EOPNOTSUPP;
 
 	if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
 			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
@@ -1195,7 +1195,7 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 	uint32_t i, j;
 	int ret;
 
-	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
 		return -EOPNOTSUPP;
 
 	if (args->pad != 0)
@@ -1266,8 +1266,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 	uint32_t i;
 	int ret;
 
-	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
-		return -ENODEV;
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
+		return -EOPNOTSUPP;
 
 	if (args->pad != 0)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 9f42f7538236..7eb7cf9c3fa8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -514,6 +514,9 @@ static int etnaviv_bind(struct device *dev)
 	}
 	drm->dev_private = priv;
 
+	dev->dma_parms = &priv->dma_parms;
+	dma_set_max_seg_size(dev, SZ_2G);
+
 	mutex_init(&priv->gem_lock);
 	INIT_LIST_HEAD(&priv->gem_list);
 	priv->num_gpus = 0;
@@ -551,6 +554,8 @@ static void etnaviv_unbind(struct device *dev)
 
 	component_unbind_all(dev, drm);
 
+	dev->dma_parms = NULL;
+
 	drm->dev_private = NULL;
 	kfree(priv);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index 6044ace6bb3e..8798423705e1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -42,6 +42,7 @@ struct etnaviv_file_private {
 
 struct etnaviv_drm_private {
 	int num_gpus;
+	struct device_dma_parameters dma_parms;
 	struct etnaviv_gpu *gpu[ETNA_MAX_PIPES];
 
 	/* list of GEM objects: */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index c60752ef7324..e8778ebb72e6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -622,24 +622,18 @@ int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
 	lockdep_set_class(&to_etnaviv_bo(obj)->lock, &etnaviv_shm_lock_class);
 
 	ret = drm_gem_object_init(dev, obj, size);
-	if (ret == 0) {
-		struct address_space *mapping;
-
-		/*
-		 * Our buffers are kept pinned, so allocating them
-		 * from the MOVABLE zone is a really bad idea, and
-		 * conflicts with CMA. See comments above new_inode()
-		 * why this is required _and_ expected if you're
-		 * going to pin these pages.
-		 */
-		mapping = obj->filp->f_mapping;
-		mapping_set_gfp_mask(mapping, GFP_HIGHUSER |
-				     __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
-	}
-
 	if (ret)
 		goto fail;
 
+	/*
+	 * Our buffers are kept pinned, so allocating them from the MOVABLE
+	 * zone is a really bad idea, and conflicts with CMA. See comments
+	 * above new_inode() why this is required _and_ expected if you're
+	 * going to pin these pages.
+	 */
+	mapping_set_gfp_mask(obj->filp->f_mapping, GFP_HIGHUSER |
+			     __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+
 	etnaviv_gem_obj_add(dev, obj);
 
 	ret = drm_gem_handle_create(file, obj, handle);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 6904535475de..72d01e873160 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -365,6 +365,7 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 	dev_info(gpu->dev, "model: GC%x, revision: %x\n",
 		 gpu->identity.model, gpu->identity.revision);
 
+	gpu->idle_mask = ~VIVS_HI_IDLE_STATE_AXI_LP;
 	/*
 	 * If there is a match in the HWDB, we aren't interested in the
 	 * remaining register values, as they might be wrong.
@@ -412,7 +413,7 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 	}
 
 	/* GC600 idle register reports zero bits where modules aren't present */
-	if (gpu->identity.model == chipModel_GC600) {
+	if (gpu->identity.model == chipModel_GC600)
 		gpu->idle_mask = VIVS_HI_IDLE_STATE_TX |
 				 VIVS_HI_IDLE_STATE_RA |
 				 VIVS_HI_IDLE_STATE_SE |
@@ -421,9 +422,6 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 				 VIVS_HI_IDLE_STATE_PE |
 				 VIVS_HI_IDLE_STATE_DE |
 				 VIVS_HI_IDLE_STATE_FE;
-	} else {
-		gpu->idle_mask = ~VIVS_HI_IDLE_STATE_AXI_LP;
-	}
 
 	etnaviv_hw_specs(gpu);
 }
diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile
index 271fb46d4dd0..ea8324abc784 100644
--- a/drivers/gpu/drm/i915/gvt/Makefile
+++ b/drivers/gpu/drm/i915/gvt/Makefile
@@ -5,5 +5,5 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \
 	execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \
 	fb_decoder.o dmabuf.o page_track.o
 
-ccflags-y				+= -I$(src) -I$(src)/$(GVT_DIR)
+ccflags-y				+= -I $(srctree)/$(src) -I $(srctree)/$(src)/$(GVT_DIR)/
 i915-y					+= $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index 2ec89bcb59f1..8a9606f91e68 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -196,9 +196,9 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
 int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
 {
 	struct dentry *ent;
-	char name[10] = "";
+	char name[16] = "";
 
-	sprintf(name, "vgpu%d", vgpu->id);
+	snprintf(name, 16, "vgpu%d", vgpu->id);
 	vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root);
 	if (!vgpu->debugfs)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 4e1e425189ba..41c8ebc60c63 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -45,6 +45,7 @@ static int vgpu_gem_get_pages(
 	int i, ret;
 	gen8_pte_t __iomem *gtt_entries;
 	struct intel_vgpu_fb_info *fb_info;
+	u32 page_num;
 
 	fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info;
 	if (WARN_ON(!fb_info))
@@ -54,14 +55,15 @@ static int vgpu_gem_get_pages(
 	if (unlikely(!st))
 		return -ENOMEM;
 
-	ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL);
+	page_num = obj->base.size >> PAGE_SHIFT;
+	ret = sg_alloc_table(st, page_num, GFP_KERNEL);
 	if (ret) {
 		kfree(st);
 		return ret;
 	}
 	gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
 		(fb_info->start >> PAGE_SHIFT);
-	for_each_sg(st->sgl, sg, fb_info->size, i) {
+	for_each_sg(st->sgl, sg, page_num, i) {
 		sg->offset = 0;
 		sg->length = PAGE_SIZE;
 		sg_dma_address(sg) =
@@ -158,7 +160,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
 		return NULL;
 
 	drm_gem_private_object_init(dev, &obj->base,
-		info->size << PAGE_SHIFT);
+		roundup(info->size, PAGE_SIZE));
 	i915_gem_object_init(obj, &intel_vgpu_gem_ops);
 
 	obj->read_domains = I915_GEM_DOMAIN_GTT;
@@ -206,11 +208,12 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		struct intel_vgpu_fb_info *info,
 		int plane_id)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_vgpu_primary_plane_format p;
 	struct intel_vgpu_cursor_plane_format c;
 	int ret, tile_height = 1;
 
+	memset(info, 0, sizeof(*info));
+
 	if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
 		ret = intel_vgpu_decode_primary_plane(vgpu, &p);
 		if (ret)
@@ -267,8 +270,7 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	info->size = (info->stride * roundup(info->height, tile_height)
-		      + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	info->size = info->stride * roundup(info->height, tile_height);
 	if (info->size == 0) {
 		gvt_vgpu_err("fb size is zero\n");
 		return -EINVAL;
@@ -278,11 +280,6 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start);
 		return -EFAULT;
 	}
-	if (((info->start >> PAGE_SHIFT) + info->size) >
-		ggtt_total_entries(&dev_priv->ggtt)) {
-		gvt_vgpu_err("Invalid GTT offset or size\n");
-		return -EFAULT;
-	}
 
 	if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) {
 		gvt_vgpu_err("invalid gma addr\n");
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index c2f7d20f6346..08c74e65836b 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -811,7 +811,7 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
 
 /* Allocate shadow page table without guest page. */
 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
-		struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type)
+		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
 {
 	struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 	struct intel_vgpu_ppgtt_spt *spt = NULL;
@@ -861,7 +861,7 @@ err_free_spt:
 
 /* Allocate shadow page table associated with specific gfn. */
 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
-		struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type,
+		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
 		unsigned long gfn, bool guest_pde_ips)
 {
 	struct intel_vgpu_ppgtt_spt *spt;
@@ -936,7 +936,7 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
 {
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 	struct intel_vgpu_ppgtt_spt *s;
-	intel_gvt_gtt_type_t cur_pt_type;
+	enum intel_gvt_gtt_type cur_pt_type;
 
 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
 
@@ -1076,6 +1076,9 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
 	} else {
 		int type = get_next_pt_type(we->type);
 
+		if (!gtt_type_is_pt(type))
+			goto err;
+
 		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
 		if (IS_ERR(spt)) {
 			ret = PTR_ERR(spt);
@@ -1855,7 +1858,7 @@ static void vgpu_free_mm(struct intel_vgpu_mm *mm)
  * Zero on success, negative error code in pointer if failed.
  */
 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_vgpu_mm *mm;
@@ -2309,7 +2312,7 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
 }
 
 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t type)
+		enum intel_gvt_gtt_type type)
 {
 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
@@ -2594,7 +2597,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
  * Zero on success, negative error code if failed.
  */
 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
 {
 	struct intel_vgpu_mm *mm;
 
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 32c573aea494..42d0394f0de2 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -95,8 +95,8 @@ struct intel_gvt_gtt {
 	unsigned long scratch_mfn;
 };
 
-typedef enum {
-	GTT_TYPE_INVALID = -1,
+enum intel_gvt_gtt_type {
+	GTT_TYPE_INVALID = 0,
 
 	GTT_TYPE_GGTT_PTE,
 
@@ -124,7 +124,7 @@ typedef enum {
 	GTT_TYPE_PPGTT_PML4_PT,
 
 	GTT_TYPE_MAX,
-} intel_gvt_gtt_type_t;
+};
 
 enum intel_gvt_mm_type {
 	INTEL_GVT_MM_GGTT,
@@ -148,7 +148,7 @@ struct intel_vgpu_mm {
 
 	union {
 		struct {
-			intel_gvt_gtt_type_t root_entry_type;
+			enum intel_gvt_gtt_type root_entry_type;
 			/*
 			 * The 4 PDPs in ring context. For 48bit addressing,
 			 * only PDP0 is valid and point to PML4. For 32it
@@ -169,7 +169,7 @@ struct intel_vgpu_mm {
 };
 
 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
 
 static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm)
 {
@@ -233,7 +233,7 @@ struct intel_vgpu_ppgtt_spt {
 	struct intel_vgpu *vgpu;
 
 	struct {
-		intel_gvt_gtt_type_t type;
+		enum intel_gvt_gtt_type type;
 		bool pde_ips; /* for 64KB PTEs */
 		void *vaddr;
 		struct page *page;
@@ -241,7 +241,7 @@ struct intel_vgpu_ppgtt_spt {
 	} shadow_page;
 
 	struct {
-		intel_gvt_gtt_type_t type;
+		enum intel_gvt_gtt_type type;
 		bool pde_ips; /* for 64KB PTEs */
 		unsigned long gfn;
 		unsigned long write_cnt;
@@ -267,7 +267,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
 		u64 pdps[]);
 
 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
 
 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]);
 
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 18f01eeb2510..90673fca792f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1206,7 +1206,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 
 static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
 {
-	intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
+	enum intel_gvt_gtt_type root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
 	struct intel_vgpu_mm *mm;
 	u64 *pdps;
 
@@ -3303,7 +3303,7 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt)
 /* Special MMIO blocks. */
 static struct gvt_mmio_block mmio_blocks[] = {
 	{D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
-	{D_ALL, MCHBAR_MIRROR_REG_BASE, 0x4000, NULL, NULL},
+	{D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
 	{D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
 		pvinfo_mmio_read, pvinfo_mmio_write},
 	{D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index e7e14c842be4..edf6d646eb25 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -132,6 +132,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
 
 	{RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
 	{RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
+	{RCS0, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */
 
 	{RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
 	{RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h
index 3de5b643b266..33aaa14bfdde 100644
--- a/drivers/gpu/drm/i915/gvt/reg.h
+++ b/drivers/gpu/drm/i915/gvt/reg.h
@@ -126,7 +126,4 @@
 #define RING_GFX_MODE(base)	_MMIO((base) + 0x29c)
 #define VF_GUARDBAND		_MMIO(0x83a4)
 
-/* define the effective range of MCHBAR register on Sandybridge+ */
-#define MCHBAR_MIRROR_REG_BASE	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000)
-
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8998fa5ab198..7c99bbc3e2b8 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1343,7 +1343,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 	struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
 	struct intel_vgpu_mm *mm;
 	struct intel_vgpu *vgpu = workload->vgpu;
-	intel_gvt_gtt_type_t root_entry_type;
+	enum intel_gvt_gtt_type root_entry_type;
 	u64 pdps[GVT_RING_CTX_NR_PDPS];
 
 	switch (desc->addressing_mode) {
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b836721d3b13..ce342f7f7ddb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
 
+	/*
+	 * Are we using semaphores when the gpu is already saturated?
+	 *
+	 * Using semaphores incurs a cost in having the GPU poll a
+	 * memory location, busywaiting for it to change. The continual
+	 * memory reads can have a noticeable impact on the rest of the
+	 * system with the extra bus traffic, stalling the cpu as it too
+	 * tries to access memory across the bus (perf stat -e bus-cycles).
+	 *
+	 * If we installed a semaphore on this request and we only submit
+	 * the request after the signaler completed, that indicates the
+	 * system is overloaded and using semaphores at this time only
+	 * increases the amount of work we are doing. If so, we disable
+	 * further use of semaphores until we are idle again, whence we
+	 * optimistically try again.
+	 */
+	if (request->sched.semaphores &&
+	    i915_sw_fence_signaled(&request->semaphore))
+		request->hw_context->saturated |= request->sched.semaphores;
+
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
@@ -799,6 +819,39 @@ err_unreserve:
 }
 
 static int
+i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
+{
+	if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+		return 0;
+
+	signal = list_prev_entry(signal, ring_link);
+	if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
+		return 0;
+
+	return i915_sw_fence_await_dma_fence(&rq->submit,
+					     &signal->fence, 0,
+					     I915_FENCE_GFP);
+}
+
+static intel_engine_mask_t
+already_busywaiting(struct i915_request *rq)
+{
+	/*
+	 * Polling a semaphore causes bus traffic, delaying other users of
+	 * both the GPU and CPU. We want to limit the impact on others,
+	 * while taking advantage of early submission to reduce GPU
+	 * latency. Therefore we restrict ourselves to not using more
+	 * than one semaphore from each source, and not using a semaphore
+	 * if we have detected the engine is saturated (i.e. would not be
+	 * submitted early and cause bus traffic reading an already passed
+	 * semaphore).
+	 *
+	 * See the are-we-too-late? check in __i915_request_submit().
+	 */
+	return rq->sched.semaphores | rq->hw_context->saturated;
+}
+
+static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
 		    gfp_t gfp)
@@ -811,11 +864,15 @@ emit_semaphore_wait(struct i915_request *to,
 	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
 
 	/* Just emit the first semaphore we see as request space is limited. */
-	if (to->sched.semaphores & from->engine->mask)
+	if (already_busywaiting(to) & from->engine->mask)
 		return i915_sw_fence_await_dma_fence(&to->submit,
 						     &from->fence, 0,
 						     I915_FENCE_GFP);
 
+	err = i915_request_await_start(to, from);
+	if (err < 0)
+		return err;
+
 	err = i915_sw_fence_await_dma_fence(&to->semaphore,
 					    &from->fence, 0,
 					    I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index ca341a9e47e6..9093daabc290 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -173,13 +173,13 @@ static void icl_update_output_csc(struct intel_crtc *crtc,
 	I915_WRITE(PIPE_CSC_OUTPUT_PREOFF_LO(pipe), preoff[2]);
 
 	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RY_GY(pipe), coeff[0] << 16 | coeff[1]);
-	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BY(pipe), coeff[2]);
+	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BY(pipe), coeff[2] << 16);
 
 	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RU_GU(pipe), coeff[3] << 16 | coeff[4]);
-	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BU(pipe), coeff[5]);
+	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BU(pipe), coeff[5] << 16);
 
 	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RV_GV(pipe), coeff[6] << 16 | coeff[7]);
-	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BV(pipe), coeff[8]);
+	I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BV(pipe), coeff[8] << 16);
 
 	I915_WRITE(PIPE_CSC_OUTPUT_POSTOFF_HI(pipe), postoff[0]);
 	I915_WRITE(PIPE_CSC_OUTPUT_POSTOFF_ME(pipe), postoff[1]);
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 8931e0fee873..924cc556223a 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce,
 	ce->gem_context = ctx;
 	ce->engine = engine;
 	ce->ops = engine->cops;
+	ce->saturated = 0;
 
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 68b4ca1611e0..339c7437fe82 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 
 #include "i915_active_types.h"
+#include "intel_engine_types.h"
 
 struct i915_gem_context;
 struct i915_vma;
@@ -58,6 +59,8 @@ struct intel_context {
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
+	intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
 	/**
 	 * active_tracker: Active tracker for the external rq activity
 	 * on this intel_context object.
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index ccaf63679435..9682dd575152 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -541,10 +541,6 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
 		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 				  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
 
-	/* WaEnableStateCacheRedirectToCS:icl */
-	WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
-			  GEN11_STATE_CACHE_REDIRECT_TO_CS);
-
 	/* Wa_2006665173:icl (pre-prod) */
 	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
@@ -1050,6 +1046,9 @@ static void icl_whitelist_build(struct i915_wa_list *w)
 
 	/* WaAllowUMDToModifySamplerMode:icl */
 	whitelist_reg(w, GEN10_SAMPLER_MODE);
+
+	/* WaEnableStateCacheRedirectToCS:icl */
+	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
 }
 
 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 72d1bfcaab7a..7a05cbf2f820 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-ccflags-y := -Idrivers/gpu/drm/msm
-ccflags-y += -Idrivers/gpu/drm/msm/disp/dpu1
-ccflags-$(CONFIG_DRM_MSM_DSI) += -Idrivers/gpu/drm/msm/dsi
+ccflags-y := -I $(srctree)/$(src)
+ccflags-y += -I $(srctree)/$(src)/disp/dpu1
+ccflags-$(CONFIG_DRM_MSM_DSI) += -I $(srctree)/$(src)/dsi
 
 msm-y := \
 	adreno/adreno_device.o \
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index ec24508b9d68..e74dce474250 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -527,6 +527,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
 		dev_warn_once(gpu->dev->dev,
 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+		ret = 0;
 	}
 
 out:
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 581404e6544d..378c5dd692b0 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -1,7 +1,7 @@
-ccflags-y += -I$(src)/include
-ccflags-y += -I$(src)/include/nvkm
-ccflags-y += -I$(src)/nvkm
-ccflags-y += -I$(src)
+ccflags-y += -I $(srctree)/$(src)/include
+ccflags-y += -I $(srctree)/$(src)/include/nvkm
+ccflags-y += -I $(srctree)/$(src)/nvkm
+ccflags-y += -I $(srctree)/$(src)
 
 # NVKM - HW resource manager
 #- code also used by various userspace tools/tests
diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
index 7f5e572daa2d..591611dc4e34 100644
--- a/drivers/gpu/drm/panfrost/Kconfig
+++ b/drivers/gpu/drm/panfrost/Kconfig
@@ -3,7 +3,7 @@
 config DRM_PANFROST
 	tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)"
 	depends on DRM
-	depends on ARM || ARM64 || COMPILE_TEST
+	depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
 	depends on MMU
 	select DRM_SCHED
 	select IOMMU_SUPPORT
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index a8121ae67ee3..238bd1d89d43 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -7,6 +7,7 @@
 #include <linux/regulator/consumer.h>
 
 #include "panfrost_device.h"
+#include "panfrost_devfreq.h"
 #include "panfrost_features.h"
 #include "panfrost_issues.h"
 #include "panfrost_gpu.h"
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index 91e8fb0f2b25..970f669c6d29 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -98,6 +98,7 @@ int panfrost_device_init(struct panfrost_device *pfdev)
 	struct resource *res;
 
 	mutex_init(&pfdev->sched_lock);
+	mutex_init(&pfdev->reset_lock);
 	INIT_LIST_HEAD(&pfdev->scheduled_jobs);
 
 	spin_lock_init(&pfdev->hwaccess_lock);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 1ba48d105763..56f452dfb490 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -78,6 +78,7 @@ struct panfrost_device {
 	struct list_head scheduled_jobs;
 
 	struct mutex sched_lock;
+	struct mutex reset_lock;
 
 	struct {
 		struct devfreq *devfreq;
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index c06af78ab833..94b0819ad50b 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -3,8 +3,6 @@
 /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
 /* Copyright 2019 Collabora ltd. */
 
-#include <linux/bitfield.h>
-#include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/pagemap.h>
@@ -172,13 +170,27 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 {
 	struct panfrost_device *pfdev = dev->dev_private;
 	struct drm_panfrost_submit *args = data;
-	struct drm_syncobj *sync_out;
+	struct drm_syncobj *sync_out = NULL;
 	struct panfrost_job *job;
 	int ret = 0;
 
+	if (!args->jc)
+		return -EINVAL;
+
+	if (args->requirements && args->requirements != PANFROST_JD_REQ_FS)
+		return -EINVAL;
+
+	if (args->out_sync > 0) {
+		sync_out = drm_syncobj_find(file, args->out_sync);
+		if (!sync_out)
+			return -ENODEV;
+	}
+
 	job = kzalloc(sizeof(*job), GFP_KERNEL);
-	if (!job)
-		return -ENOMEM;
+	if (!job) {
+		ret = -ENOMEM;
+		goto fail_out_sync;
+	}
 
 	kref_init(&job->refcount);
 
@@ -190,25 +202,24 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 
 	ret = panfrost_copy_in_sync(dev, file, args, job);
 	if (ret)
-		goto fail;
+		goto fail_job;
 
 	ret = panfrost_lookup_bos(dev, file, args, job);
 	if (ret)
-		goto fail;
+		goto fail_job;
 
 	ret = panfrost_job_push(job);
 	if (ret)
-		goto fail;
+		goto fail_job;
 
 	/* Update the return sync object for the job */
-	sync_out = drm_syncobj_find(file, args->out_sync);
-	if (sync_out) {
+	if (sync_out)
 		drm_syncobj_replace_fence(sync_out, job->render_done_fence);
-		drm_syncobj_put(sync_out);
-	}
 
-fail:
+fail_job:
 	panfrost_job_put(job);
+fail_out_sync:
+	drm_syncobj_put(sync_out);
 
 	return ret;
 }
@@ -384,16 +395,15 @@ static int panfrost_probe(struct platform_device *pdev)
 
 	err = panfrost_device_init(pfdev);
 	if (err) {
-		dev_err(&pdev->dev, "Fatal error during GPU init\n");
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "Fatal error during GPU init\n");
 		goto err_out0;
 	}
 
-	dma_set_mask_and_coherent(pfdev->dev,
-		DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features)));
-
 	err = panfrost_devfreq_init(pfdev);
 	if (err) {
-		dev_err(&pdev->dev, "Fatal error during devfreq init\n");
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "Fatal error during devfreq init\n");
 		goto err_out1;
 	}
 
@@ -410,6 +420,7 @@ static int panfrost_probe(struct platform_device *pdev)
 err_out1:
 	panfrost_device_fini(pfdev);
 err_out0:
+	pm_runtime_disable(pfdev->dev);
 	drm_dev_put(ddev);
 	return err;
 }
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 8a0376283a21..a5528a360ef4 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -14,7 +14,7 @@
 /* Called DRM core on the last userspace/kernel unreference of the
  * BO.
  */
-void panfrost_gem_free_object(struct drm_gem_object *obj)
+static void panfrost_gem_free_object(struct drm_gem_object *obj)
 {
 	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
 	struct panfrost_device *pfdev = obj->dev->dev_private;
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index aceaf6e44a09..58ef25573cda 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -2,8 +2,10 @@
 /* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
 /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
 /* Copyright 2019 Collabora ltd. */
+#include <linux/bitfield.h>
 #include <linux/bitmap.h>
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
@@ -276,13 +278,13 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
 		 pfdev->features.hw_issues);
 
 	dev_info(pfdev->dev, "Features: L2:0x%08x Shader:0x%08x Tiler:0x%08x Mem:0x%0x MMU:0x%08x AS:0x%x JS:0x%x",
-		 gpu_read(pfdev, GPU_L2_FEATURES),
-		 gpu_read(pfdev, GPU_CORE_FEATURES),
-		 gpu_read(pfdev, GPU_TILER_FEATURES),
-		 gpu_read(pfdev, GPU_MEM_FEATURES),
-		 gpu_read(pfdev, GPU_MMU_FEATURES),
-		 gpu_read(pfdev, GPU_AS_PRESENT),
-		 gpu_read(pfdev, GPU_JS_PRESENT));
+		 pfdev->features.l2_features,
+		 pfdev->features.core_features,
+		 pfdev->features.tiler_features,
+		 pfdev->features.mem_features,
+		 pfdev->features.mmu_features,
+		 pfdev->features.as_present,
+		 pfdev->features.js_present);
 
 	dev_info(pfdev->dev, "shader_present=0x%0llx l2_present=0x%0llx",
 		 pfdev->features.shader_present, pfdev->features.l2_present);
@@ -332,6 +334,9 @@ int panfrost_gpu_init(struct panfrost_device *pfdev)
 
 	panfrost_gpu_init_features(pfdev);
 
+	dma_set_mask_and_coherent(pfdev->dev,
+		DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features)));
+
 	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu");
 	if (irq <= 0)
 		return -ENODEV;
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 0a7ed04f7d52..a5716c8fe8b3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -384,6 +384,8 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 		job_read(pfdev, JS_TAIL_LO(js)),
 		sched_job);
 
+	mutex_lock(&pfdev->reset_lock);
+
 	for (i = 0; i < NUM_JOB_SLOTS; i++)
 		drm_sched_stop(&pfdev->js->queue[i].sched);
 
@@ -406,6 +408,8 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 	/* restart scheduler after GPU is usable again */
 	for (i = 0; i < NUM_JOB_SLOTS; i++)
 		drm_sched_start(&pfdev->js->queue[i].sched, true);
+
+	mutex_unlock(&pfdev->reset_lock);
 }
 
 static const struct drm_sched_backend_ops panfrost_sched_ops = {
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index aa898c699101..433df7036f96 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -922,12 +922,12 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div,
 	ref_div_max = max(min(100 / post_div, ref_div_max), 1u);
 
 	/* get matching reference and feedback divider */
-	*ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
+	*ref_div = min(max(den/post_div, 1u), ref_div_max);
 	*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
 
 	/* limit fb divider to its maximum */
 	if (*fb_div > fb_div_max) {
-		*ref_div = DIV_ROUND_CLOSEST(*ref_div * fb_div_max, *fb_div);
+		*ref_div = (*ref_div * fb_div_max)/(*fb_div);
 		*fb_div = fb_div_max;
 	}
 }
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 8d7a634c12c2..cb938d3cd3c2 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -448,6 +448,14 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static void rockchip_drm_platform_shutdown(struct platform_device *pdev)
+{
+	struct drm_device *drm = platform_get_drvdata(pdev);
+
+	if (drm)
+		drm_atomic_helper_shutdown(drm);
+}
+
 static const struct of_device_id rockchip_drm_dt_ids[] = {
 	{ .compatible = "rockchip,display-subsystem", },
 	{ /* sentinel */ },
@@ -457,6 +465,7 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids);
 static struct platform_driver rockchip_drm_platform_driver = {
 	.probe = rockchip_drm_platform_probe,
 	.remove = rockchip_drm_platform_remove,
+	.shutdown = rockchip_drm_platform_shutdown,
 	.driver = {
 		.name = "rockchip-drm",
 		.of_match_table = rockchip_drm_dt_ids,