114 files changed, 8561 insertions, 742 deletions
diff --git a/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.txt b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.txt
new file mode 100644
index 000000000000..82caa7b65ae8
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.txt
@@ -0,0 +1,20 @@
+Feiyang FY07024DI26A30-D 7" MIPI-DSI LCD Panel
+
+Required properties:
+- compatible: must be "feiyang,fy07024di26a30d"
+- reg: DSI virtual channel used by that screen
+- avdd-supply: analog regulator dc1 switch
+- dvdd-supply: 3v3 digital regulator
+- reset-gpios: a GPIO phandle for the reset pin
+
+Optional properties:
+- backlight: phandle for the backlight control.
+
+panel@0 {
+	compatible = "feiyang,fy07024di26a30d";
+	reg = <0>;
+	avdd-supply = <&reg_dc1sw>;
+	dvdd-supply = <&reg_dldo2>;
+	reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */
+	backlight = <&backlight>;
+};
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,p079zca.txt b/Documentation/devicetree/bindings/display/panel/innolux,p079zca.txt
index d0f55161579a..3ab8c7412cf6 100644
--- a/Documentation/devicetree/bindings/display/panel/innolux,p079zca.txt
+++ b/Documentation/devicetree/bindings/display/panel/innolux,p079zca.txt
@@ -12,7 +12,7 @@ Optional properties:
 Example:
 
 	&mipi_dsi {
-		panel {
+		panel@0 {
 			compatible = "innolux,p079zca";
 			reg = <0>;
 			power-supply = <...>;
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt b/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt
index 595d9dfeffd3..d1cab3a8f0fb 100644
--- a/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt
+++ b/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt
@@ -13,7 +13,7 @@ Optional properties:
 Example:
 
 	&mipi_dsi {
-		panel {
+		panel@0 {
 			compatible = "innolux,p079zca";
 			reg = <0>;
 			avdd-supply = <...>;
diff --git a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt
index 164a5fa236da..cfefff688614 100644
--- a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt
+++ b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt
@@ -12,7 +12,7 @@ Optional properties:
 Example:
 
 	&mipi_dsi {
-		panel {
+		panel@0 {
 			compatible = "kingdisplay,kd097d04";
 			reg = <0>;
 			power-supply = <...>;
diff --git a/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt b/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt
new file mode 100644
index 000000000000..1b5763200cf6
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt
@@ -0,0 +1,18 @@
+Rocktech jh057n00900 5.5" 720x1440 TFT LCD panel
+
+Required properties:
+- compatible: should be "rocktech,jh057n00900"
+- reg: DSI virtual channel of the peripheral
+- reset-gpios: panel reset gpio
+- backlight: phandle of the backlight device attached to the panel
+
+Example:
+
+	&mipi_dsi {
+		panel@0 {
+			compatible = "rocktech,jh057n00900";
+			reg = <0>;
+			backlight = <&backlight>;
+			reset-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.txt b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.txt
new file mode 100644
index 000000000000..d1ad31bca8d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.txt
@@ -0,0 +1,72 @@
+Rockchip specific extensions for rk3066 HDMI
+============================================
+
+Required properties:
+- compatible:
+	"rockchip,rk3066-hdmi";
+- reg:
+	Physical base address and length of the controller's registers.
+- clocks, clock-names:
+	Phandle to HDMI controller clock, name should be "hclk".
+- interrupts:
+	HDMI interrupt number.
+- power-domains:
+	Phandle to the RK3066_PD_VIO power domain.
+- rockchip,grf:
+	This soc uses GRF regs to switch the HDMI TX input between vop0 and vop1.
+- ports:
+	Contains one port node with two endpoints, numbered 0 and 1,
+	connected respectively to vop0 and vop1.
+	Contains one port node with one endpoint
+	connected to a hdmi-connector node.
+- pinctrl-0, pinctrl-name:
+	Switch the iomux for the HPD/I2C pins to HDMI function.
+
+Example:
+	hdmi: hdmi@10116000 {
+		compatible = "rockchip,rk3066-hdmi";
+		reg = <0x10116000 0x2000>;
+		interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru HCLK_HDMI>;
+		clock-names = "hclk";
+		power-domains = <&power RK3066_PD_VIO>;
+		rockchip,grf = <&grf>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			hdmi_in: port@0 {
+				reg = <0>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				hdmi_in_vop0: endpoint@0 {
+					reg = <0>;
+					remote-endpoint = <&vop0_out_hdmi>;
+				};
+				hdmi_in_vop1: endpoint@1 {
+					reg = <1>;
+					remote-endpoint = <&vop1_out_hdmi>;
+				};
+			};
+			hdmi_out: port@1 {
+				reg = <1>;
+				hdmi_out_con: endpoint {
+					remote-endpoint = <&hdmi_con_in>;
+				};
+			};
+		};
+	};
+
+&pinctrl {
+		hdmi {
+			hdmi_hpd: hdmi-hpd {
+				rockchip,pins = <0 RK_PA0 1 &pcfg_pull_default>;
+			};
+			hdmii2c_xfer: hdmii2c-xfer {
+				rockchip,pins = <0 RK_PA1 1 &pcfg_pull_none>,
+						<0 RK_PA2 1 &pcfg_pull_none>;
+			};
+		};
+};
diff --git a/Documentation/devicetree/bindings/gpu/aspeed-gfx.txt b/Documentation/devicetree/bindings/gpu/aspeed-gfx.txt
new file mode 100644
index 000000000000..958bdf962339
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/aspeed-gfx.txt
@@ -0,0 +1,41 @@
+Device tree configuration for the GFX display device on the ASPEED SoCs
+
+Required properties:
+  - compatible
+    * Must be one of the following:
+      + aspeed,ast2500-gfx
+      + aspeed,ast2400-gfx
+    * In addition, the ASPEED pinctrl bindings require the 'syscon' property to
+      be present
+
+  - reg: Physical base address and length of the GFX registers
+
+  - interrupts: interrupt number for the GFX device
+
+  - clocks: clock number used to generate the pixel clock
+
+  - resets: reset line that must be released to use the GFX device
+
+  - memory-region:
+    Phandle to a memory region to allocate from, as defined in
+    Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+
+
+Example:
+
+gfx: display@1e6e6000 {
+	compatible = "aspeed,ast2500-gfx", "syscon";
+	reg = <0x1e6e6000 0x1000>;
+	reg-io-width = <4>;
+	clocks = <&syscon ASPEED_CLK_GATE_D1CLK>;
+	resets = <&syscon ASPEED_RESET_CRT1>;
+	interrupts = <0x19>;
+	memory-region = <&gfx_memory>;
+};
+
+gfx_memory: framebuffer {
+	size = <0x01000000>;
+	alignment = <0x01000000>;
+	compatible = "shared-dma-pool";
+	reusable;
+};
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 5e6ea1f991d8..9506140167d6 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -345,6 +345,7 @@ ricoh	Ricoh Co. Ltd.
 rikomagic	Rikomagic Tech Corp. Ltd
 riscv	RISC-V Foundation
 rockchip	Fuzhou Rockchip Electronics Co., Ltd
+rocktech	ROCKTECH DISPLAYS LIMITED
 rohm	ROHM Semiconductor Co., Ltd
 ronbo   Ronbo Electronics
 roofull	Shenzhen Roofull Technology Co, Ltd
diff --git a/MAINTAINERS b/MAINTAINERS
index e6d476bf9e78..4b69b6b48b3b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4894,6 +4894,14 @@ M:	Dave Airlie <airlied@redhat.com>
 S:	Odd Fixes
 F:	drivers/gpu/drm/ast/
 
+DRM DRIVER FOR ASPEED BMC GFX
+M:	Joel Stanley <joel@jms.id.au>
+L:	linux-aspeed@lists.ozlabs.org
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Supported
+F:	drivers/gpu/drm/aspeed/
+F:	Documentation/devicetree/bindings/gpu/aspeed-gfx.txt
+
 DRM DRIVER FOR BOCHS VIRTUAL GPU
 M:	Gerd Hoffmann <kraxel@redhat.com>
 L:	virtualization@lists.linux-foundation.org
@@ -4907,6 +4915,12 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 S:	Maintained
 F:	drivers/gpu/drm/tve200/
 
+DRM DRIVER FOR FEIYANG FY07024DI26A30-D MIPI-DSI LCD PANELS
+M:	Jagan Teki <jagan@amarulasolutions.com>
+S:	Maintained
+F:	drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
+F:	Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.txt
+
 DRM DRIVER FOR ILITEK ILI9225 PANELS
 M:	David Lechner <david@lechnology.com>
 S:	Maintained
@@ -4998,6 +5012,12 @@ S:	Orphan / Obsolete
 F:	drivers/gpu/drm/r128/
 F:	include/uapi/drm/r128_drm.h
 
+DRM DRIVER FOR ROCKTECH JH057N00900 PANELS
+M:	Guido Günther <agx@sigxcpu.org>
+S:	Maintained
+F:	drivers/gpu/drm/panel/panel-rocktech-jh057n00900.c
+F:	Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt
+
 DRM DRIVER FOR SAVAGE VIDEO CARDS
 S:	Orphan / Obsolete
 F:	drivers/gpu/drm/savage/
@@ -5186,6 +5206,15 @@ S:	Maintained
 F:	drivers/gpu/drm/hisilicon/
 F:	Documentation/devicetree/bindings/display/hisilicon/
 
+DRM DRIVERS FOR LIMA
+M:	Qiang Yu <yuq825@gmail.com>
+L:	dri-devel@lists.freedesktop.org
+L:	lima@lists.freedesktop.org
+S:	Maintained
+F:	drivers/gpu/drm/lima/
+F:	include/uapi/drm/lima_drm.h
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+
 DRM DRIVERS FOR MEDIATEK
 M:	CK Hu <ck.hu@mediatek.com>
 M:	Philipp Zabel <p.zabel@pengutronix.de>
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 0913a6ccab5a..1f006e083eb9 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,4 +1,5 @@
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
+obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
+	 reservation.o seqno-fence.o
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
 obj-$(CONFIG_SW_SYNC)		+= sw_sync.o sync_debug.o
 obj-$(CONFIG_UDMABUF)		+= udmabuf.o
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
new file mode 100644
index 000000000000..c729f98a7bd3
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -0,0 +1,241 @@
+/*
+ * fence-chain: chain fences together in a timeline
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ * Authors:
+ *	Christian König <christian.koenig@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/dma-fence-chain.h>
+
+static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
+
+/**
+ * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
+ * @chain: chain node to get the previous node from
+ *
+ * Use dma_fence_get_rcu_safe to get a reference to the previous fence of the
+ * chain node.
+ */
+static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
+{
+	struct dma_fence *prev;
+
+	rcu_read_lock();
+	prev = dma_fence_get_rcu_safe(&chain->prev);
+	rcu_read_unlock();
+	return prev;
+}
+
+/**
+ * dma_fence_chain_walk - chain walking function
+ * @fence: current chain node
+ *
+ * Walk the chain to the next node. Returns the next fence or NULL if we are at
+ * the end of the chain. Garbage collects chain nodes which are already
+ * signaled.
+ */
+struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
+{
+	struct dma_fence_chain *chain, *prev_chain;
+	struct dma_fence *prev, *replacement, *tmp;
+
+	chain = to_dma_fence_chain(fence);
+	if (!chain) {
+		dma_fence_put(fence);
+		return NULL;
+	}
+
+	while ((prev = dma_fence_chain_get_prev(chain))) {
+
+		prev_chain = to_dma_fence_chain(prev);
+		if (prev_chain) {
+			if (!dma_fence_is_signaled(prev_chain->fence))
+				break;
+
+			replacement = dma_fence_chain_get_prev(prev_chain);
+		} else {
+			if (!dma_fence_is_signaled(prev))
+				break;
+
+			replacement = NULL;
+		}
+
+		tmp = cmpxchg((void **)&chain->prev, (void *)prev, (void *)replacement);
+		if (tmp == prev)
+			dma_fence_put(tmp);
+		else
+			dma_fence_put(replacement);
+		dma_fence_put(prev);
+	}
+
+	dma_fence_put(fence);
+	return prev;
+}
+EXPORT_SYMBOL(dma_fence_chain_walk);
+
+/**
+ * dma_fence_chain_find_seqno - find fence chain node by seqno
+ * @pfence: pointer to the chain node where to start
+ * @seqno: the sequence number to search for
+ *
+ * Advance the fence pointer to the chain node which will signal this sequence
+ * number. If no sequence number is provided then this is a no-op.
+ *
+ * Returns EINVAL if the fence is not a chain node or the sequence number has
+ * not yet advanced far enough.
+ */
+int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
+{
+	struct dma_fence_chain *chain;
+
+	if (!seqno)
+		return 0;
+
+	chain = to_dma_fence_chain(*pfence);
+	if (!chain || chain->base.seqno < seqno)
+		return -EINVAL;
+
+	dma_fence_chain_for_each(*pfence, &chain->base) {
+		if ((*pfence)->context != chain->base.context ||
+		    to_dma_fence_chain(*pfence)->prev_seqno < seqno)
+			break;
+	}
+	dma_fence_put(&chain->base);
+
+	return 0;
+}
+EXPORT_SYMBOL(dma_fence_chain_find_seqno);
+
+static const char *dma_fence_chain_get_driver_name(struct dma_fence *fence)
+{
+        return "dma_fence_chain";
+}
+
+static const char *dma_fence_chain_get_timeline_name(struct dma_fence *fence)
+{
+        return "unbound";
+}
+
+static void dma_fence_chain_irq_work(struct irq_work *work)
+{
+	struct dma_fence_chain *chain;
+
+	chain = container_of(work, typeof(*chain), work);
+
+	/* Try to rearm the callback */
+	if (!dma_fence_chain_enable_signaling(&chain->base))
+		/* Ok, we are done. No more unsignaled fences left */
+		dma_fence_signal(&chain->base);
+	dma_fence_put(&chain->base);
+}
+
+static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	struct dma_fence_chain *chain;
+
+	chain = container_of(cb, typeof(*chain), cb);
+	irq_work_queue(&chain->work);
+	dma_fence_put(f);
+}
+
+static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
+{
+	struct dma_fence_chain *head = to_dma_fence_chain(fence);
+
+	dma_fence_get(&head->base);
+	dma_fence_chain_for_each(fence, &head->base) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(fence);
+		struct dma_fence *f = chain ? chain->fence : fence;
+
+		dma_fence_get(f);
+		if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
+			dma_fence_put(fence);
+			return true;
+		}
+		dma_fence_put(f);
+	}
+	dma_fence_put(&head->base);
+	return false;
+}
+
+static bool dma_fence_chain_signaled(struct dma_fence *fence)
+{
+	dma_fence_chain_for_each(fence, fence) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(fence);
+		struct dma_fence *f = chain ? chain->fence : fence;
+
+		if (!dma_fence_is_signaled(f)) {
+			dma_fence_put(fence);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static void dma_fence_chain_release(struct dma_fence *fence)
+{
+	struct dma_fence_chain *chain = to_dma_fence_chain(fence);
+
+	dma_fence_put(rcu_dereference_protected(chain->prev, true));
+	dma_fence_put(chain->fence);
+	dma_fence_free(fence);
+}
+
+const struct dma_fence_ops dma_fence_chain_ops = {
+	.get_driver_name = dma_fence_chain_get_driver_name,
+	.get_timeline_name = dma_fence_chain_get_timeline_name,
+	.enable_signaling = dma_fence_chain_enable_signaling,
+	.signaled = dma_fence_chain_signaled,
+	.release = dma_fence_chain_release,
+};
+EXPORT_SYMBOL(dma_fence_chain_ops);
+
+/**
+ * dma_fence_chain_init - initialize a fence chain
+ * @chain: the chain node to initialize
+ * @prev: the previous fence
+ * @fence: the current fence
+ *
+ * Initialize a new chain node and either start a new chain or add the node to
+ * the existing chain of the previous fence.
+ */
+void dma_fence_chain_init(struct dma_fence_chain *chain,
+			  struct dma_fence *prev,
+			  struct dma_fence *fence,
+			  uint64_t seqno)
+{
+	struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
+	uint64_t context;
+
+	spin_lock_init(&chain->lock);
+	rcu_assign_pointer(chain->prev, prev);
+	chain->fence = fence;
+	chain->prev_seqno = 0;
+	init_irq_work(&chain->work, dma_fence_chain_irq_work);
+
+	/* Try to reuse the context of the previous chain node. */
+	if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
+		context = prev->context;
+		chain->prev_seqno = prev->seqno;
+	} else {
+		context = dma_fence_context_alloc(1);
+		/* Make sure that we always have a valid sequence number. */
+		if (prev_chain)
+			seqno = max(prev->seqno, seqno);
+	}
+
+	dma_fence_init(&chain->base, &dma_fence_chain_ops,
+		       &chain->lock, context, seqno);
+}
+EXPORT_SYMBOL(dma_fence_chain_init);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 82bb221ec94e..bcbc4234893a 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -335,6 +335,10 @@ source "drivers/gpu/drm/xen/Kconfig"
 
 source "drivers/gpu/drm/vboxvideo/Kconfig"
 
+source "drivers/gpu/drm/lima/Kconfig"
+
+source "drivers/gpu/drm/aspeed/Kconfig"
+
 # Keep legacy drivers last
 
 menuconfig DRM_LEGACY
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 0baf148e3687..d97c0a3a8cfc 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -110,3 +110,5 @@ obj-$(CONFIG_DRM_PL111) += pl111/
 obj-$(CONFIG_DRM_TVE200) += tve200/
 obj-$(CONFIG_DRM_XEN) += xen/
 obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
+obj-$(CONFIG_DRM_LIMA)  += lima/
+obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
diff --git a/drivers/gpu/drm/aspeed/Kconfig b/drivers/gpu/drm/aspeed/Kconfig
new file mode 100644
index 000000000000..42b74d18a41b
--- /dev/null
+++ b/drivers/gpu/drm/aspeed/Kconfig
@@ -0,0 +1,14 @@
+config DRM_ASPEED_GFX
+	tristate "ASPEED BMC Display Controller"
+	depends on DRM && OF
+	select DRM_KMS_HELPER
+	select DRM_KMS_CMA_HELPER
+	select DRM_PANEL
+	select DMA_CMA
+	select CMA
+	select MFD_SYSCON
+	help
+	  Chose this option if you have an ASPEED AST2500 SOC Display
+	  Controller (aka GFX).
+
+	  If M is selected this module will be called aspeed_gfx.
diff --git a/drivers/gpu/drm/aspeed/Makefile b/drivers/gpu/drm/aspeed/Makefile
new file mode 100644
index 000000000000..6e194cd790d8
--- /dev/null
+++ b/drivers/gpu/drm/aspeed/Makefile
@@ -0,0 +1,3 @@
+aspeed_gfx-y := aspeed_gfx_drv.o aspeed_gfx_crtc.o aspeed_gfx_out.o
+
+obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed_gfx.o
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx.h b/drivers/gpu/drm/aspeed/aspeed_gfx.h
new file mode 100644
index 000000000000..a10358bb61ec
--- /dev/null
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright 2018 IBM Corporation */
+
+#include <drm/drm_device.h>
+#include <drm/drm_simple_kms_helper.h>
+
+struct aspeed_gfx {
+	void __iomem			*base;
+	struct clk			*clk;
+	struct reset_control		*rst;
+	struct regmap			*scu;
+
+	struct drm_simple_display_pipe	pipe;
+	struct drm_connector		connector;
+	struct drm_fbdev_cma		*fbdev;
+};
+
+int aspeed_gfx_create_pipe(struct drm_device *drm);
+int aspeed_gfx_create_output(struct drm_device *drm);
+
+#define CRT_CTRL1		0x60 /* CRT Control I */
+#define CRT_CTRL2		0x64 /* CRT Control II */
+#define CRT_STATUS		0x68 /* CRT Status */
+#define CRT_MISC		0x6c /* CRT Misc Setting */
+#define CRT_HORIZ0		0x70 /* CRT Horizontal Total & Display Enable End */
+#define CRT_HORIZ1		0x74 /* CRT Horizontal Retrace Start & End */
+#define CRT_VERT0		0x78 /* CRT Vertical Total & Display Enable End */
+#define CRT_VERT1		0x7C /* CRT Vertical Retrace Start & End */
+#define CRT_ADDR		0x80 /* CRT Display Starting Address */
+#define CRT_OFFSET		0x84 /* CRT Display Offset & Terminal Count */
+#define CRT_THROD		0x88 /* CRT Threshold */
+#define CRT_XSCALE		0x8C /* CRT Scaling-Up Factor */
+#define CRT_CURSOR0		0x90 /* CRT Hardware Cursor X & Y Offset */
+#define CRT_CURSOR1		0x94 /* CRT Hardware Cursor X & Y Position */
+#define CRT_CURSOR2		0x98 /* CRT Hardware Cursor Pattern Address */
+#define CRT_9C			0x9C
+#define CRT_OSD_H		0xA0 /* CRT OSD Horizontal Start/End */
+#define CRT_OSD_V		0xA4 /* CRT OSD Vertical Start/End */
+#define CRT_OSD_ADDR		0xA8 /* CRT OSD Pattern Address */
+#define CRT_OSD_DISP		0xAC /* CRT OSD Offset */
+#define CRT_OSD_THRESH		0xB0 /* CRT OSD Threshold & Alpha */
+#define CRT_B4			0xB4
+#define CRT_STS_V		0xB8 /* CRT Status V */
+#define CRT_SCRATCH		0xBC /* Scratchpad */
+#define CRT_BB0_ADDR		0xD0 /* CRT Display BB0 Starting Address */
+#define CRT_BB1_ADDR		0xD4 /* CRT Display BB1 Starting Address */
+#define CRT_BB_COUNT		0xD8 /* CRT Display BB Terminal Count */
+#define OSD_COLOR1		0xE0 /* OSD Color Palette Index 1 & 0 */
+#define OSD_COLOR2		0xE4 /* OSD Color Palette Index 3 & 2 */
+#define OSD_COLOR3		0xE8 /* OSD Color Palette Index 5 & 4 */
+#define OSD_COLOR4		0xEC /* OSD Color Palette Index 7 & 6 */
+#define OSD_COLOR5		0xF0 /* OSD Color Palette Index 9 & 8 */
+#define OSD_COLOR6		0xF4 /* OSD Color Palette Index 11 & 10 */
+#define OSD_COLOR7		0xF8 /* OSD Color Palette Index 13 & 12 */
+#define OSD_COLOR8		0xFC /* OSD Color Palette Index 15 & 14 */
+
+/* CTRL1 */
+#define CRT_CTRL_EN			BIT(0)
+#define CRT_CTRL_HW_CURSOR_EN		BIT(1)
+#define CRT_CTRL_OSD_EN			BIT(2)
+#define CRT_CTRL_INTERLACED		BIT(3)
+#define CRT_CTRL_COLOR_RGB565		(0 << 7)
+#define CRT_CTRL_COLOR_YUV444		(1 << 7)
+#define CRT_CTRL_COLOR_XRGB8888		(2 << 7)
+#define CRT_CTRL_COLOR_RGB888		(3 << 7)
+#define CRT_CTRL_COLOR_YUV444_2RGB	(5 << 7)
+#define CRT_CTRL_COLOR_YUV422		(7 << 7)
+#define CRT_CTRL_COLOR_MASK		GENMASK(9, 7)
+#define CRT_CTRL_HSYNC_NEGATIVE		BIT(16)
+#define CRT_CTRL_VSYNC_NEGATIVE		BIT(17)
+#define CRT_CTRL_VERTICAL_INTR_EN	BIT(30)
+#define CRT_CTRL_VERTICAL_INTR_STS	BIT(31)
+
+/* CTRL2 */
+#define CRT_CTRL_DAC_EN			BIT(0)
+#define CRT_CTRL_VBLANK_LINE(x)		(((x) << 20) & CRT_CTRL_VBLANK_LINE_MASK)
+#define CRT_CTRL_VBLANK_LINE_MASK	GENMASK(20, 31)
+
+/* CRT_HORIZ0 */
+#define CRT_H_TOTAL(x)			(x)
+#define CRT_H_DE(x)			((x) << 16)
+
+/* CRT_HORIZ1 */
+#define CRT_H_RS_START(x)		(x)
+#define CRT_H_RS_END(x)			((x) << 16)
+
+/* CRT_VIRT0 */
+#define CRT_V_TOTAL(x)			(x)
+#define CRT_V_DE(x)			((x) << 16)
+
+/* CRT_VIRT1 */
+#define CRT_V_RS_START(x)		(x)
+#define CRT_V_RS_END(x)			((x) << 16)
+
+/* CRT_OFFSET */
+#define CRT_DISP_OFFSET(x)		(x)
+#define CRT_TERM_COUNT(x)		((x) << 16)
+
+/* CRT_THROD */
+#define CRT_THROD_LOW(x)		(x)
+#define CRT_THROD_HIGH(x)		((x) << 8)
+
+/* Default Threshold Seting */
+#define G5_CRT_THROD_VAL	(CRT_THROD_LOW(0x24) | CRT_THROD_HIGH(0x3C))
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c b/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c
new file mode 100644
index 000000000000..15db9e426ec4
--- /dev/null
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2018 IBM Corporation
+
+#include <linux/clk.h>
+#include <linux/reset.h>
+#include <linux/regmap.h>
+
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_vblank.h>
+
+#include "aspeed_gfx.h"
+
+static struct aspeed_gfx *
+drm_pipe_to_aspeed_gfx(struct drm_simple_display_pipe *pipe)
+{
+	return container_of(pipe, struct aspeed_gfx, pipe);
+}
+
+static int aspeed_gfx_set_pixel_fmt(struct aspeed_gfx *priv, u32 *bpp)
+{
+	struct drm_crtc *crtc = &priv->pipe.crtc;
+	struct drm_device *drm = crtc->dev;
+	const u32 format = crtc->primary->state->fb->format->format;
+	u32 ctrl1;
+
+	ctrl1 = readl(priv->base + CRT_CTRL1);
+	ctrl1 &= ~CRT_CTRL_COLOR_MASK;
+
+	switch (format) {
+	case DRM_FORMAT_RGB565:
+		dev_dbg(drm->dev, "Setting up RGB565 mode\n");
+		ctrl1 |= CRT_CTRL_COLOR_RGB565;
+		*bpp = 16;
+		break;
+	case DRM_FORMAT_XRGB8888:
+		dev_dbg(drm->dev, "Setting up XRGB8888 mode\n");
+		ctrl1 |= CRT_CTRL_COLOR_XRGB8888;
+		*bpp = 32;
+		break;
+	default:
+		dev_err(drm->dev, "Unhandled pixel format %08x\n", format);
+		return -EINVAL;
+	}
+
+	writel(ctrl1, priv->base + CRT_CTRL1);
+
+	return 0;
+}
+
+static void aspeed_gfx_enable_controller(struct aspeed_gfx *priv)
+{
+	u32 ctrl1 = readl(priv->base + CRT_CTRL1);
+	u32 ctrl2 = readl(priv->base + CRT_CTRL2);
+
+	/* SCU2C: set DAC source for display output to Graphics CRT (GFX) */
+	regmap_update_bits(priv->scu, 0x2c, BIT(16), BIT(16));
+
+	writel(ctrl1 | CRT_CTRL_EN, priv->base + CRT_CTRL1);
+	writel(ctrl2 | CRT_CTRL_DAC_EN, priv->base + CRT_CTRL2);
+}
+
+static void aspeed_gfx_disable_controller(struct aspeed_gfx *priv)
+{
+	u32 ctrl1 = readl(priv->base + CRT_CTRL1);
+	u32 ctrl2 = readl(priv->base + CRT_CTRL2);
+
+	writel(ctrl1 & ~CRT_CTRL_EN, priv->base + CRT_CTRL1);
+	writel(ctrl2 & ~CRT_CTRL_DAC_EN, priv->base + CRT_CTRL2);
+
+	regmap_update_bits(priv->scu, 0x2c, BIT(16), 0);
+}
+
+static void aspeed_gfx_crtc_mode_set_nofb(struct aspeed_gfx *priv)
+{
+	struct drm_display_mode *m = &priv->pipe.crtc.state->adjusted_mode;
+	u32 ctrl1, d_offset, t_count, bpp;
+	int err;
+
+	err = aspeed_gfx_set_pixel_fmt(priv, &bpp);
+	if (err)
+		return;
+
+#if 0
+	/* TODO: we have only been able to test with the 40MHz USB clock. The
+	 * clock is fixed, so we cannot adjust it here. */
+	clk_set_rate(priv->pixel_clk, m->crtc_clock * 1000);
+#endif
+
+	ctrl1 = readl(priv->base + CRT_CTRL1);
+	ctrl1 &= ~(CRT_CTRL_INTERLACED |
+			CRT_CTRL_HSYNC_NEGATIVE |
+			CRT_CTRL_VSYNC_NEGATIVE);
+
+	if (m->flags & DRM_MODE_FLAG_INTERLACE)
+		ctrl1 |= CRT_CTRL_INTERLACED;
+
+	if (!(m->flags & DRM_MODE_FLAG_PHSYNC))
+		ctrl1 |= CRT_CTRL_HSYNC_NEGATIVE;
+
+	if (!(m->flags & DRM_MODE_FLAG_PVSYNC))
+		ctrl1 |= CRT_CTRL_VSYNC_NEGATIVE;
+
+	writel(ctrl1, priv->base + CRT_CTRL1);
+
+	/* Horizontal timing */
+	writel(CRT_H_TOTAL(m->htotal - 1) | CRT_H_DE(m->hdisplay - 1),
+			priv->base + CRT_HORIZ0);
+	writel(CRT_H_RS_START(m->hsync_start - 1) | CRT_H_RS_END(m->hsync_end),
+			priv->base + CRT_HORIZ1);
+
+
+	/* Vertical timing */
+	writel(CRT_V_TOTAL(m->vtotal - 1) | CRT_V_DE(m->vdisplay - 1),
+			priv->base + CRT_VERT0);
+	writel(CRT_V_RS_START(m->vsync_start) | CRT_V_RS_END(m->vsync_end),
+			priv->base + CRT_VERT1);
+
+	/*
+	 * Display Offset: address difference between consecutive scan lines
+	 * Terminal Count: memory size of one scan line
+	 */
+	d_offset = m->hdisplay * bpp / 8;
+	t_count = (m->hdisplay * bpp + 127) / 128;
+	writel(CRT_DISP_OFFSET(d_offset) | CRT_TERM_COUNT(t_count),
+			priv->base + CRT_OFFSET);
+
+	/*
+	 * Threshold: FIFO thresholds of refill and stop (16 byte chunks
+	 * per line, rounded up)
+	 */
+	writel(G5_CRT_THROD_VAL, priv->base + CRT_THROD);
+}
+
+static void aspeed_gfx_pipe_enable(struct drm_simple_display_pipe *pipe,
+			      struct drm_crtc_state *crtc_state,
+			      struct drm_plane_state *plane_state)
+{
+	struct aspeed_gfx *priv = drm_pipe_to_aspeed_gfx(pipe);
+	struct drm_crtc *crtc = &pipe->crtc;
+
+	aspeed_gfx_crtc_mode_set_nofb(priv);
+	aspeed_gfx_enable_controller(priv);
+	drm_crtc_vblank_on(crtc);
+}
+
+static void aspeed_gfx_pipe_disable(struct drm_simple_display_pipe *pipe)
+{
+	struct aspeed_gfx *priv = drm_pipe_to_aspeed_gfx(pipe);
+	struct drm_crtc *crtc = &pipe->crtc;
+
+	drm_crtc_vblank_off(crtc);
+	aspeed_gfx_disable_controller(priv);
+}
+
+static void aspeed_gfx_pipe_update(struct drm_simple_display_pipe *pipe,
+				   struct drm_plane_state *plane_state)
+{
+	struct aspeed_gfx *priv = drm_pipe_to_aspeed_gfx(pipe);
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_framebuffer *fb = pipe->plane.state->fb;
+	struct drm_pending_vblank_event *event;
+	struct drm_gem_cma_object *gem;
+
+	spin_lock_irq(&crtc->dev->event_lock);
+	event = crtc->state->event;
+	if (event) {
+		crtc->state->event = NULL;
+
+		if (drm_crtc_vblank_get(crtc) == 0)
+			drm_crtc_arm_vblank_event(crtc, event);
+		else
+			drm_crtc_send_vblank_event(crtc, event);
+	}
+	spin_unlock_irq(&crtc->dev->event_lock);
+
+	if (!fb)
+		return;
+
+	gem = drm_fb_cma_get_gem_obj(fb, 0);
+	if (!gem)
+		return;
+	writel(gem->paddr, priv->base + CRT_ADDR);
+}
+
+static int aspeed_gfx_enable_vblank(struct drm_simple_display_pipe *pipe)
+{
+	struct aspeed_gfx *priv = drm_pipe_to_aspeed_gfx(pipe);
+	u32 reg = readl(priv->base + CRT_CTRL1);
+
+	/* Clear pending VBLANK IRQ */
+	writel(reg | CRT_CTRL_VERTICAL_INTR_STS, priv->base + CRT_CTRL1);
+
+	reg |= CRT_CTRL_VERTICAL_INTR_EN;
+	writel(reg, priv->base + CRT_CTRL1);
+
+	return 0;
+}
+
+static void aspeed_gfx_disable_vblank(struct drm_simple_display_pipe *pipe)
+{
+	struct aspeed_gfx *priv = drm_pipe_to_aspeed_gfx(pipe);
+	u32 reg = readl(priv->base + CRT_CTRL1);
+
+	reg &= ~CRT_CTRL_VERTICAL_INTR_EN;
+	writel(reg, priv->base + CRT_CTRL1);
+
+	/* Clear pending VBLANK IRQ */
+	writel(reg | CRT_CTRL_VERTICAL_INTR_STS, priv->base + CRT_CTRL1);
+}
+
+static struct drm_simple_display_pipe_funcs aspeed_gfx_funcs = {
+	.enable		= aspeed_gfx_pipe_enable,
+	.disable	= aspeed_gfx_pipe_disable,
+	.update		= aspeed_gfx_pipe_update,
+	.prepare_fb	= drm_gem_fb_simple_display_pipe_prepare_fb,
+	.enable_vblank	= aspeed_gfx_enable_vblank,
+	.disable_vblank	= aspeed_gfx_disable_vblank,
+};
+
+static const uint32_t aspeed_gfx_formats[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_RGB565,
+};
+
+int aspeed_gfx_create_pipe(struct drm_device *drm)
+{
+	struct aspeed_gfx *priv = drm->dev_private;
+
+	return drm_simple_display_pipe_init(drm, &priv->pipe, &aspeed_gfx_funcs,
+					    aspeed_gfx_formats,
+					    ARRAY_SIZE(aspeed_gfx_formats),
+					    NULL,
+					    &priv->connector);
+}
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
new file mode 100644
index 000000000000..eeb22eccd1fc
--- /dev/null
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2018 IBM Corporation
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/irq.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_vblank.h>
+#include <drm/drm_drv.h>
+
+#include "aspeed_gfx.h"
+
+/**
+ * DOC: ASPEED GFX Driver
+ *
+ * This driver is for the ASPEED BMC SoC's 'GFX' display hardware, also called
+ * the 'SOC Display Controller' in the datasheet. This driver runs on the ARM
+ * based BMC systems, unlike the ast driver which runs on a host CPU and is for
+ * a PCIe graphics device.
+ *
+ * The AST2500 supports a total of 3 output paths:
+ *
+ *   1. VGA output, the output target can choose either or both to the DAC
+ *   or DVO interface.
+ *
+ *   2. Graphics CRT output, the output target can choose either or both to
+ *   the DAC or DVO interface.
+ *
+ *   3. Video input from DVO, the video input can be used for video engine
+ *   capture or DAC display output.
+ *
+ * Output options are selected in SCU2C.
+ *
+ * The "VGA mode" device is the PCI attached controller. The "Graphics CRT"
+ * is the ARM's internal display controller.
+ *
+ * The driver only supports a simple configuration consisting of a 40MHz
+ * pixel clock, fixed by hardware limitations, and the VGA output path.
+ *
+ * The driver was written with the 'AST2500 Software Programming Guide' v17,
+ * which is available under NDA from ASPEED.
+ */
+
+static const struct drm_mode_config_funcs aspeed_gfx_mode_config_funcs = {
+	.fb_create		= drm_gem_fb_create,
+	.atomic_check		= drm_atomic_helper_check,
+	.atomic_commit		= drm_atomic_helper_commit,
+};
+
+static void aspeed_gfx_setup_mode_config(struct drm_device *drm)
+{
+	drm_mode_config_init(drm);
+
+	drm->mode_config.min_width = 0;
+	drm->mode_config.min_height = 0;
+	drm->mode_config.max_width = 800;
+	drm->mode_config.max_height = 600;
+	drm->mode_config.funcs = &aspeed_gfx_mode_config_funcs;
+}
+
+static irqreturn_t aspeed_gfx_irq_handler(int irq, void *data)
+{
+	struct drm_device *drm = data;
+	struct aspeed_gfx *priv = drm->dev_private;
+	u32 reg;
+
+	reg = readl(priv->base + CRT_CTRL1);
+
+	if (reg & CRT_CTRL_VERTICAL_INTR_STS) {
+		drm_crtc_handle_vblank(&priv->pipe.crtc);
+		writel(reg, priv->base + CRT_CTRL1);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+
+
+static int aspeed_gfx_load(struct drm_device *drm)
+{
+	struct platform_device *pdev = to_platform_device(drm->dev);
+	struct aspeed_gfx *priv;
+	struct resource *res;
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	drm->dev_private = priv;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(drm->dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->scu = syscon_regmap_lookup_by_compatible("aspeed,ast2500-scu");
+	if (IS_ERR(priv->scu)) {
+		dev_err(&pdev->dev, "failed to find SCU regmap\n");
+		return PTR_ERR(priv->scu);
+	}
+
+	ret = of_reserved_mem_device_init(drm->dev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"failed to initialize reserved mem: %d\n", ret);
+		return ret;
+	}
+
+	ret = dma_set_mask_and_coherent(drm->dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(&pdev->dev, "failed to set DMA mask: %d\n", ret);
+		return ret;
+	}
+
+	priv->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+	if (IS_ERR(priv->rst)) {
+		dev_err(&pdev->dev,
+			"missing or invalid reset controller device tree entry");
+		return PTR_ERR(priv->rst);
+	}
+	reset_control_deassert(priv->rst);
+
+	priv->clk = devm_clk_get(drm->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev,
+			"missing or invalid clk device tree entry");
+		return PTR_ERR(priv->clk);
+	}
+	clk_prepare_enable(priv->clk);
+
+	/* Sanitize control registers */
+	writel(0, priv->base + CRT_CTRL1);
+	writel(0, priv->base + CRT_CTRL2);
+
+	aspeed_gfx_setup_mode_config(drm);
+
+	ret = drm_vblank_init(drm, 1);
+	if (ret < 0) {
+		dev_err(drm->dev, "Failed to initialise vblank\n");
+		return ret;
+	}
+
+	ret = aspeed_gfx_create_output(drm);
+	if (ret < 0) {
+		dev_err(drm->dev, "Failed to create outputs\n");
+		return ret;
+	}
+
+	ret = aspeed_gfx_create_pipe(drm);
+	if (ret < 0) {
+		dev_err(drm->dev, "Cannot setup simple display pipe\n");
+		return ret;
+	}
+
+	ret = devm_request_irq(drm->dev, platform_get_irq(pdev, 0),
+			       aspeed_gfx_irq_handler, 0, "aspeed gfx", drm);
+	if (ret < 0) {
+		dev_err(drm->dev, "Failed to install IRQ handler\n");
+		return ret;
+	}
+
+	drm_mode_config_reset(drm);
+
+	drm_fbdev_generic_setup(drm, 32);
+
+	return 0;
+}
+
+static void aspeed_gfx_unload(struct drm_device *drm)
+{
+	drm_kms_helper_poll_fini(drm);
+	drm_mode_config_cleanup(drm);
+
+	drm->dev_private = NULL;
+}
+
+DEFINE_DRM_GEM_CMA_FOPS(fops);
+
+static struct drm_driver aspeed_gfx_driver = {
+	.driver_features        = DRIVER_GEM | DRIVER_MODESET |
+				DRIVER_PRIME | DRIVER_ATOMIC,
+	.gem_create_object	= drm_cma_gem_create_object_default_funcs,
+	.dumb_create		= drm_gem_cma_dumb_create,
+	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_mmap		= drm_gem_prime_mmap,
+	.fops = &fops,
+	.name = "aspeed-gfx-drm",
+	.desc = "ASPEED GFX DRM",
+	.date = "20180319",
+	.major = 1,
+	.minor = 0,
+};
+
+static const struct of_device_id aspeed_gfx_match[] = {
+	{ .compatible = "aspeed,ast2500-gfx" },
+	{ }
+};
+
+static int aspeed_gfx_probe(struct platform_device *pdev)
+{
+	struct drm_device *drm;
+	int ret;
+
+	drm = drm_dev_alloc(&aspeed_gfx_driver, &pdev->dev);
+	if (IS_ERR(drm))
+		return PTR_ERR(drm);
+
+	ret = aspeed_gfx_load(drm);
+	if (ret)
+		goto err_free;
+
+	ret = drm_dev_register(drm, 0);
+	if (ret)
+		goto err_unload;
+
+	return 0;
+
+err_unload:
+	aspeed_gfx_unload(drm);
+err_free:
+	drm_dev_put(drm);
+
+	return ret;
+}
+
+static int aspeed_gfx_remove(struct platform_device *pdev)
+{
+	struct drm_device *drm = platform_get_drvdata(pdev);
+
+	drm_dev_unregister(drm);
+	aspeed_gfx_unload(drm);
+	drm_dev_put(drm);
+
+	return 0;
+}
+
+static struct platform_driver aspeed_gfx_platform_driver = {
+	.probe		= aspeed_gfx_probe,
+	.remove		= aspeed_gfx_remove,
+	.driver = {
+		.name = "aspeed_gfx",
+		.of_match_table = aspeed_gfx_match,
+	},
+};
+
+module_platform_driver(aspeed_gfx_platform_driver);
+
+MODULE_AUTHOR("Joel Stanley <joel@jms.id.au>");
+MODULE_DESCRIPTION("ASPEED BMC DRM/KMS driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_out.c b/drivers/gpu/drm/aspeed/aspeed_gfx_out.c
new file mode 100644
index 000000000000..67ee5fa10055
--- /dev/null
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_out.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2018 IBM Corporation
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_connector.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_probe_helper.h>
+
+#include "aspeed_gfx.h"
+
+static int aspeed_gfx_get_modes(struct drm_connector *connector)
+{
+	return drm_add_modes_noedid(connector, 800, 600);
+}
+
+static const struct
+drm_connector_helper_funcs aspeed_gfx_connector_helper_funcs = {
+	.get_modes = aspeed_gfx_get_modes,
+};
+
+static const struct drm_connector_funcs aspeed_gfx_connector_funcs = {
+	.fill_modes		= drm_helper_probe_single_connector_modes,
+	.destroy		= drm_connector_cleanup,
+	.reset			= drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state	= drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_connector_destroy_state,
+};
+
+int aspeed_gfx_create_output(struct drm_device *drm)
+{
+	struct aspeed_gfx *priv = drm->dev_private;
+	int ret;
+
+	priv->connector.dpms = DRM_MODE_DPMS_OFF;
+	priv->connector.polled = 0;
+	drm_connector_helper_add(&priv->connector,
+				 &aspeed_gfx_connector_helper_funcs);
+	ret = drm_connector_init(drm, &priv->connector,
+				 &aspeed_gfx_connector_funcs,
+				 DRM_MODE_CONNECTOR_Unknown);
+	return ret;
+}
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index 7425d083b944..049d058571d4 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -73,7 +73,6 @@ struct bochs_device {
 	struct drm_crtc crtc;
 	struct drm_encoder encoder;
 	struct drm_connector connector;
-	bool mode_config_initialized;
 
 	/* ttm */
 	struct {
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 9e7cd6b34106..485f9cf05e8b 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -267,7 +267,6 @@ const struct drm_mode_config_funcs bochs_mode_funcs = {
 int bochs_kms_init(struct bochs_device *bochs)
 {
 	drm_mode_config_init(bochs->dev);
-	bochs->mode_config_initialized = true;
 
 	bochs->dev->mode_config.max_width = 8192;
 	bochs->dev->mode_config.max_height = 8192;
@@ -292,8 +291,6 @@ int bochs_kms_init(struct bochs_device *bochs)
 
 void bochs_kms_fini(struct bochs_device *bochs)
 {
-	if (bochs->mode_config_initialized) {
-		drm_mode_config_cleanup(bochs->dev);
-		bochs->mode_config_initialized = false;
-	}
+	drm_atomic_helper_shutdown(bochs->dev);
+	drm_mode_config_cleanup(bochs->dev);
 }
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h
index 013dffc8c1bd..1bd816be3aae 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.h
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.h
@@ -101,7 +101,6 @@ struct cirrus_crtc {
 
 struct cirrus_fbdev;
 struct cirrus_mode_info {
-	bool				mode_config_initialized;
 	struct cirrus_crtc		*crtc;
 	/* pointer to fbdev info structure */
 	struct cirrus_fbdev		*gfbdev;
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index 7f9bc32af685..b109cd71426f 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -575,7 +575,6 @@ int cirrus_modeset_init(struct cirrus_device *cdev)
 	int ret;
 
 	drm_mode_config_init(cdev->dev);
-	cdev->mode_info.mode_config_initialized = true;
 
 	cdev->dev->mode_config.max_width = CIRRUS_MAX_FB_WIDTH;
 	cdev->dev->mode_config.max_height = CIRRUS_MAX_FB_HEIGHT;
@@ -613,9 +612,6 @@ int cirrus_modeset_init(struct cirrus_device *cdev)
 void cirrus_modeset_fini(struct cirrus_device *cdev)
 {
 	cirrus_fbdev_fini(cdev);
-
-	if (cdev->mode_info.mode_config_initialized) {
-		drm_mode_config_cleanup(cdev->dev);
-		cdev->mode_info.mode_config_initialized = false;
-	}
+	drm_helper_force_disable_all(cdev->dev);
+	drm_mode_config_cleanup(cdev->dev);
 }
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 03749a24e4dd..84791dd4a90d 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -639,20 +639,19 @@ static struct sysrq_key_op sysrq_drm_fb_helper_restore_op = { };
 static void dpms_legacy(struct drm_fb_helper *fb_helper, int dpms_mode)
 {
 	struct drm_device *dev = fb_helper->dev;
-	struct drm_crtc *crtc;
 	struct drm_connector *connector;
+	struct drm_mode_set *modeset;
 	int i, j;
 
 	drm_modeset_lock_all(dev);
 	for (i = 0; i < fb_helper->crtc_count; i++) {
-		crtc = fb_helper->crtc_info[i].mode_set.crtc;
+		modeset = &fb_helper->crtc_info[i].mode_set;
 
-		if (!crtc->enabled)
+		if (!modeset->crtc->enabled)
 			continue;
 
-		/* Walk the connectors & encoders on this fb turning them on/off */
-		drm_fb_helper_for_each_connector(fb_helper, j) {
-			connector = fb_helper->connector_info[j]->connector;
+		for (j = 0; j < modeset->num_connectors; j++) {
+			connector = modeset->connectors[j];
 			connector->funcs->dpms(connector, dpms_mode);
 			drm_object_property_set_value(&connector->base,
 				dev->mode_config.dpms_property, dpms_mode);
@@ -1874,7 +1873,6 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	int crtc_count = 0;
 	int i;
 	struct drm_fb_helper_surface_size sizes;
-	int gamma_size = 0;
 	int best_depth = 0;
 
 	memset(&sizes, 0, sizeof(struct drm_fb_helper_surface_size));
@@ -1890,7 +1888,6 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	if (preferred_bpp != sizes.surface_bpp)
 		sizes.surface_depth = sizes.surface_bpp = preferred_bpp;
 
-	/* first up get a count of crtcs now in use and new min/maxes width/heights */
 	drm_fb_helper_for_each_connector(fb_helper, i) {
 		struct drm_fb_helper_connector *fb_helper_conn = fb_helper->connector_info[i];
 		struct drm_cmdline_mode *cmdline_mode;
@@ -1970,6 +1967,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		sizes.surface_depth = best_depth;
 	}
 
+	/* first up get a count of crtcs now in use and new min/maxes width/heights */
 	crtc_count = 0;
 	for (i = 0; i < fb_helper->crtc_count; i++) {
 		struct drm_display_mode *desired_mode;
@@ -1992,9 +1990,6 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		x = fb_helper->crtc_info[i].x;
 		y = fb_helper->crtc_info[i].y;
 
-		if (gamma_size == 0)
-			gamma_size = fb_helper->crtc_info[i].mode_set.crtc->gamma_size;
-
 		sizes.surface_width  = max_t(u32, desired_mode->hdisplay + x, sizes.surface_width);
 		sizes.surface_height = max_t(u32, desired_mode->vdisplay + y, sizes.surface_height);
 
@@ -3317,8 +3312,6 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 		return ret;
 	}
 
-	drm_client_add(&fb_helper->client);
-
 	if (!preferred_bpp)
 		preferred_bpp = dev->mode_config.preferred_depth;
 	if (!preferred_bpp)
@@ -3329,6 +3322,8 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 	if (ret)
 		DRM_DEV_DEBUG(dev->dev, "client hotplug ret=%d\n", ret);
 
+	drm_client_add(&fb_helper->client);
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_fbdev_generic_setup);
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 3750a982aaf6..1ee208c2c85e 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -408,7 +408,7 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
 	loff_t num_pages = obj->size >> PAGE_SHIFT;
 	struct page *page;
 
-	if (vmf->pgoff > num_pages || WARN_ON_ONCE(!shmem->pages))
+	if (vmf->pgoff >= num_pages || WARN_ON_ONCE(!shmem->pages))
 		return VM_FAULT_SIGBUS;
 
 	page = shmem->pages[vmf->pgoff];
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 251d67e04c2d..d9a483a5fce0 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -180,12 +180,20 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
 int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
+int drm_syncobj_transfer_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_private);
 int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_private);
+int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_private);
 int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_private);
+int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_private);
+int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_private);
 
 /* drm_framebuffer.c */
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 687943df58e1..d337f161909c 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -686,12 +686,20 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TRANSFER, drm_syncobj_transfer_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 8dbcdc77f6bf..132fef8ff1b6 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -168,6 +168,13 @@ bool drm_need_swiotlb(int dma_bits)
 	if (xen_pv_domain())
 		return true;
 
+	/*
+	 * Enforce dma_alloc_coherent when memory encryption is active as well
+	 * for the same reasons as for Xen paravirtual hosts.
+	 */
+	if (mem_encrypt_active())
+		return true;
+
 	for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling) {
 		max_iomem = max(max_iomem,  tmp->end);
 	}
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 0e7fc3e7dfb4..f5cb0aabfe35 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -253,3 +253,31 @@ void drm_err(const char *format, ...)
 	va_end(args);
 }
 EXPORT_SYMBOL(drm_err);
+
+/**
+ * drm_print_regset32 - print the contents of registers to a
+ * &drm_printer stream.
+ *
+ * @p: the &drm printer
+ * @regset: the list of registers to print.
+ *
+ * Often in driver debug, it's useful to be able to either capture the
+ * contents of registers in the steady state using debugfs or at
+ * specific points during operation.  This lets the driver have a
+ * single list of registers for both.
+ */
+void drm_print_regset32(struct drm_printer *p, struct debugfs_regset32 *regset)
+{
+	int namelen = 0;
+	int i;
+
+	for (i = 0; i < regset->nregs; i++)
+		namelen = max(namelen, (int)strlen(regset->regs[i].name));
+
+	for (i = 0; i < regset->nregs; i++) {
+		drm_printf(p, "%*s = 0x%08x\n",
+			   namelen, regset->regs[i].name,
+			   readl(regset->base + regset->regs[i].offset));
+	}
+}
+EXPORT_SYMBOL(drm_print_regset32);
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 5329e66598c6..f3ceeb504e6c 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -61,6 +61,7 @@ struct syncobj_wait_entry {
 	struct task_struct *task;
 	struct dma_fence *fence;
 	struct dma_fence_cb fence_cb;
+	u64    point;
 };
 
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
@@ -95,6 +96,8 @@ EXPORT_SYMBOL(drm_syncobj_find);
 static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
 				       struct syncobj_wait_entry *wait)
 {
+	struct dma_fence *fence;
+
 	if (wait->fence)
 		return;
 
@@ -103,11 +106,15 @@ static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
 	 * have the lock, try one more time just to be sure we don't add a
 	 * callback when a fence has already been set.
 	 */
-	if (syncobj->fence)
-		wait->fence = dma_fence_get(
-			rcu_dereference_protected(syncobj->fence, 1));
-	else
+	fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+	if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) {
+		dma_fence_put(fence);
 		list_add_tail(&wait->node, &syncobj->cb_list);
+	} else if (!fence) {
+		wait->fence = dma_fence_get_stub();
+	} else {
+		wait->fence = fence;
+	}
 	spin_unlock(&syncobj->lock);
 }
 
@@ -123,6 +130,44 @@ static void drm_syncobj_remove_wait(struct drm_syncobj *syncobj,
 }
 
 /**
+ * drm_syncobj_add_point - add new timeline point to the syncobj
+ * @syncobj: sync object to add timeline point do
+ * @chain: chain node to use to add the point
+ * @fence: fence to encapsulate in the chain node
+ * @point: sequence number to use for the point
+ *
+ * Add the chain node as new timeline point to the syncobj.
+ */
+void drm_syncobj_add_point(struct drm_syncobj *syncobj,
+			   struct dma_fence_chain *chain,
+			   struct dma_fence *fence,
+			   uint64_t point)
+{
+	struct syncobj_wait_entry *cur, *tmp;
+	struct dma_fence *prev;
+
+	dma_fence_get(fence);
+
+	spin_lock(&syncobj->lock);
+
+	prev = drm_syncobj_fence_get(syncobj);
+	/* You are adding an unorder point to timeline, which could cause payload returned from query_ioctl is 0! */
+	if (prev && prev->seqno >= point)
+		DRM_ERROR("You are adding an unorder point to timeline!\n");
+	dma_fence_chain_init(chain, prev, fence, point);
+	rcu_assign_pointer(syncobj->fence, &chain->base);
+
+	list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
+		syncobj_wait_syncobj_func(syncobj, cur);
+	spin_unlock(&syncobj->lock);
+
+	/* Walk the chain once to trigger garbage collection */
+	dma_fence_chain_for_each(fence, prev);
+	dma_fence_put(prev);
+}
+EXPORT_SYMBOL(drm_syncobj_add_point);
+
+/**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
  * @fence: fence to install in sync file.
@@ -145,10 +190,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 	rcu_assign_pointer(syncobj->fence, fence);
 
 	if (fence != old_fence) {
-		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
-			list_del_init(&cur->node);
+		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
 			syncobj_wait_syncobj_func(syncobj, cur);
-		}
 	}
 
 	spin_unlock(&syncobj->lock);
@@ -171,6 +214,8 @@ static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 	dma_fence_put(fence);
 }
 
+/* 5s default for wait submission */
+#define DRM_SYNCOBJ_WAIT_FOR_SUBMIT_TIMEOUT 5000000000ULL
 /**
  * drm_syncobj_find_fence - lookup and reference the fence in a sync object
  * @file_private: drm file private pointer
@@ -191,16 +236,58 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
 			   struct dma_fence **fence)
 {
 	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
-	int ret = 0;
+	struct syncobj_wait_entry wait;
+	u64 timeout = nsecs_to_jiffies64(DRM_SYNCOBJ_WAIT_FOR_SUBMIT_TIMEOUT);
+	int ret;
 
 	if (!syncobj)
 		return -ENOENT;
 
 	*fence = drm_syncobj_fence_get(syncobj);
-	if (!*fence) {
+	drm_syncobj_put(syncobj);
+
+	if (*fence) {
+		ret = dma_fence_chain_find_seqno(fence, point);
+		if (!ret)
+			return 0;
+		dma_fence_put(*fence);
+	} else {
 		ret = -EINVAL;
 	}
-	drm_syncobj_put(syncobj);
+
+	if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+		return ret;
+
+	memset(&wait, 0, sizeof(wait));
+	wait.task = current;
+	wait.point = point;
+	drm_syncobj_fence_add_wait(syncobj, &wait);
+
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (wait.fence) {
+			ret = 0;
+			break;
+		}
+                if (timeout == 0) {
+                        ret = -ETIME;
+                        break;
+                }
+
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+                timeout = schedule_timeout(timeout);
+	} while (1);
+
+	__set_current_state(TASK_RUNNING);
+	*fence = wait.fence;
+
+	if (wait.node.next)
+		drm_syncobj_remove_wait(syncobj, &wait);
+
 	return ret;
 }
 EXPORT_SYMBOL(drm_syncobj_find_fence);
@@ -593,6 +680,80 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 					&args->handle);
 }
 
+static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
+					    struct drm_syncobj_transfer *args)
+{
+	struct drm_syncobj *timeline_syncobj = NULL;
+	struct dma_fence *fence;
+	struct dma_fence_chain *chain;
+	int ret;
+
+	timeline_syncobj = drm_syncobj_find(file_private, args->dst_handle);
+	if (!timeline_syncobj) {
+		return -ENOENT;
+	}
+	ret = drm_syncobj_find_fence(file_private, args->src_handle,
+				     args->src_point, args->flags,
+				     &fence);
+	if (ret)
+		goto err;
+	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+	if (!chain) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->dst_point);
+err1:
+	dma_fence_put(fence);
+err:
+	drm_syncobj_put(timeline_syncobj);
+
+	return ret;
+}
+
+static int
+drm_syncobj_transfer_to_binary(struct drm_file *file_private,
+			       struct drm_syncobj_transfer *args)
+{
+	struct drm_syncobj *binary_syncobj = NULL;
+	struct dma_fence *fence;
+	int ret;
+
+	binary_syncobj = drm_syncobj_find(file_private, args->dst_handle);
+	if (!binary_syncobj)
+		return -ENOENT;
+	ret = drm_syncobj_find_fence(file_private, args->src_handle,
+				     args->src_point, args->flags, &fence);
+	if (ret)
+		goto err;
+	drm_syncobj_replace_fence(binary_syncobj, fence);
+	dma_fence_put(fence);
+err:
+	drm_syncobj_put(binary_syncobj);
+
+	return ret;
+}
+int
+drm_syncobj_transfer_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_private)
+{
+	struct drm_syncobj_transfer *args = data;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad)
+		return -EINVAL;
+
+	if (args->dst_point)
+		ret = drm_syncobj_transfer_to_timeline(file_private, args);
+	else
+		ret = drm_syncobj_transfer_to_binary(file_private, args);
+
+	return ret;
+}
+
 static void syncobj_wait_fence_func(struct dma_fence *fence,
 				    struct dma_fence_cb *cb)
 {
@@ -605,13 +766,27 @@ static void syncobj_wait_fence_func(struct dma_fence *fence,
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
 				      struct syncobj_wait_entry *wait)
 {
+	struct dma_fence *fence;
+
 	/* This happens inside the syncobj lock */
-	wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
-							      lockdep_is_held(&syncobj->lock)));
+	fence = rcu_dereference_protected(syncobj->fence,
+					  lockdep_is_held(&syncobj->lock));
+	dma_fence_get(fence);
+	if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) {
+		dma_fence_put(fence);
+		return;
+	} else if (!fence) {
+		wait->fence = dma_fence_get_stub();
+	} else {
+		wait->fence = fence;
+	}
+
 	wake_up_process(wait->task);
+	list_del_init(&wait->node);
 }
 
 static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
+						  void __user *user_points,
 						  uint32_t count,
 						  uint32_t flags,
 						  signed long timeout,
@@ -619,12 +794,27 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 {
 	struct syncobj_wait_entry *entries;
 	struct dma_fence *fence;
+	uint64_t *points;
 	uint32_t signaled_count, i;
 
-	entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
-	if (!entries)
+	points = kmalloc_array(count, sizeof(*points), GFP_KERNEL);
+	if (points == NULL)
 		return -ENOMEM;
 
+	if (!user_points) {
+		memset(points, 0, count * sizeof(uint64_t));
+
+	} else if (copy_from_user(points, user_points,
+				  sizeof(uint64_t) * count)) {
+		timeout = -EFAULT;
+		goto err_free_points;
+	}
+
+	entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
+	if (!entries) {
+		timeout = -ENOMEM;
+		goto err_free_points;
+	}
 	/* Walk the list of sync objects and initialize entries.  We do
 	 * this up-front so that we can properly return -EINVAL if there is
 	 * a syncobj with a missing fence and then never have the chance of
@@ -632,9 +822,13 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 	 */
 	signaled_count = 0;
 	for (i = 0; i < count; ++i) {
+		struct dma_fence *fence;
+
 		entries[i].task = current;
-		entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
-		if (!entries[i].fence) {
+		entries[i].point = points[i];
+		fence = drm_syncobj_fence_get(syncobjs[i]);
+		if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) {
+			dma_fence_put(fence);
 			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
 				continue;
 			} else {
@@ -643,7 +837,13 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 			}
 		}
 
-		if (dma_fence_is_signaled(entries[i].fence)) {
+		if (fence)
+			entries[i].fence = fence;
+		else
+			entries[i].fence = dma_fence_get_stub();
+
+		if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) ||
+		    dma_fence_is_signaled(entries[i].fence)) {
 			if (signaled_count == 0 && idx)
 				*idx = i;
 			signaled_count++;
@@ -676,7 +876,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 			if (!fence)
 				continue;
 
-			if (dma_fence_is_signaled(fence) ||
+			if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) ||
+			    dma_fence_is_signaled(fence) ||
 			    (!entries[i].fence_cb.func &&
 			     dma_fence_add_callback(fence,
 						    &entries[i].fence_cb,
@@ -721,6 +922,9 @@ cleanup_entries:
 	}
 	kfree(entries);
 
+err_free_points:
+	kfree(points);
+
 	return timeout;
 }
 
@@ -760,19 +964,33 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies);
 static int drm_syncobj_array_wait(struct drm_device *dev,
 				  struct drm_file *file_private,
 				  struct drm_syncobj_wait *wait,
-				  struct drm_syncobj **syncobjs)
+				  struct drm_syncobj_timeline_wait *timeline_wait,
+				  struct drm_syncobj **syncobjs, bool timeline)
 {
-	signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
+	signed long timeout = 0;
 	uint32_t first = ~0;
 
-	timeout = drm_syncobj_array_wait_timeout(syncobjs,
-						 wait->count_handles,
-						 wait->flags,
-						 timeout, &first);
-	if (timeout < 0)
-		return timeout;
-
-	wait->first_signaled = first;
+	if (!timeline) {
+		timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
+		timeout = drm_syncobj_array_wait_timeout(syncobjs,
+							 NULL,
+							 wait->count_handles,
+							 wait->flags,
+							 timeout, &first);
+		if (timeout < 0)
+			return timeout;
+		wait->first_signaled = first;
+	} else {
+		timeout = drm_timeout_abs_to_jiffies(timeline_wait->timeout_nsec);
+		timeout = drm_syncobj_array_wait_timeout(syncobjs,
+							 u64_to_user_ptr(timeline_wait->points),
+							 timeline_wait->count_handles,
+							 timeline_wait->flags,
+							 timeout, &first);
+		if (timeout < 0)
+			return timeout;
+		timeline_wait->first_signaled = first;
+	}
 	return 0;
 }
 
@@ -858,13 +1076,48 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 		return ret;
 
 	ret = drm_syncobj_array_wait(dev, file_private,
-				     args, syncobjs);
+				     args, NULL, syncobjs, false);
+
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
+int
+drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_wait *args = data;
+	struct drm_syncobj **syncobjs;
+	int ret = 0;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE))
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	ret = drm_syncobj_array_wait(dev, file_private,
+				     NULL, args, syncobjs, true);
 
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
 	return ret;
 }
 
+
 int
 drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_private)
@@ -930,3 +1183,138 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 
 	return ret;
 }
+
+int
+drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj **syncobjs;
+	struct dma_fence_chain **chains;
+	uint64_t *points;
+	uint32_t i, j;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -EOPNOTSUPP;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	points = kmalloc_array(args->count_handles, sizeof(*points),
+			       GFP_KERNEL);
+	if (!points) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	if (!u64_to_user_ptr(args->points)) {
+		memset(points, 0, args->count_handles * sizeof(uint64_t));
+	} else if (copy_from_user(points, u64_to_user_ptr(args->points),
+				  sizeof(uint64_t) * args->count_handles)) {
+		ret = -EFAULT;
+		goto err_points;
+	}
+
+	chains = kmalloc_array(args->count_handles, sizeof(void *), GFP_KERNEL);
+	if (!chains) {
+		ret = -ENOMEM;
+		goto err_points;
+	}
+	for (i = 0; i < args->count_handles; i++) {
+		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+		if (!chains[i]) {
+			for (j = 0; j < i; j++)
+				kfree(chains[j]);
+			ret = -ENOMEM;
+			goto err_chains;
+		}
+	}
+
+	for (i = 0; i < args->count_handles; i++) {
+		struct dma_fence *fence = dma_fence_get_stub();
+
+		drm_syncobj_add_point(syncobjs[i], chains[i],
+				      fence, points[i]);
+		dma_fence_put(fence);
+	}
+err_chains:
+	kfree(chains);
+err_points:
+	kfree(points);
+out:
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
+int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj **syncobjs;
+	uint64_t __user *points = u64_to_user_ptr(args->points);
+	uint32_t i;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->count_handles; i++) {
+		struct dma_fence_chain *chain;
+		struct dma_fence *fence;
+		uint64_t point;
+
+		fence = drm_syncobj_fence_get(syncobjs[i]);
+		chain = to_dma_fence_chain(fence);
+		if (chain) {
+			struct dma_fence *iter, *last_signaled = NULL;
+
+			dma_fence_chain_for_each(iter, fence) {
+				if (!iter)
+					break;
+				dma_fence_put(last_signaled);
+				last_signaled = dma_fence_get(iter);
+				if (!to_dma_fence_chain(last_signaled)->prev_seqno)
+					/* It is most likely that timeline has
+					 * unorder points. */
+					break;
+			}
+			point = dma_fence_is_signaled(last_signaled) ?
+				last_signaled->seqno :
+				to_dma_fence_chain(last_signaled)->prev_seqno;
+			dma_fence_put(last_signaled);
+		} else {
+			point = 0;
+		}
+		ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
+		ret = ret ? -EFAULT : 0;
+		if (ret)
+			break;
+	}
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/lima/Kconfig b/drivers/gpu/drm/lima/Kconfig
new file mode 100644
index 000000000000..f11314448093
--- /dev/null
+++ b/drivers/gpu/drm/lima/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0 OR MIT
+# Copyright 2017-2019 Qiang Yu <yuq825@gmail.com>
+
+config DRM_LIMA
+       tristate "LIMA (DRM support for ARM Mali 400/450 GPU)"
+       depends on DRM
+       depends on ARM || ARM64 || COMPILE_TEST
+       select DRM_SCHED
+       help
+         DRM driver for ARM Mali 400/450 GPUs.
diff --git a/drivers/gpu/drm/lima/Makefile b/drivers/gpu/drm/lima/Makefile
new file mode 100644
index 000000000000..38cc70281ba5
--- /dev/null
+++ b/drivers/gpu/drm/lima/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0 OR MIT
+# Copyright 2017-2019 Qiang Yu <yuq825@gmail.com>
+
+lima-y := \
+	lima_drv.o \
+	lima_device.o \
+	lima_pmu.o \
+	lima_l2_cache.o \
+	lima_mmu.o \
+	lima_gp.o \
+	lima_pp.o \
+	lima_gem.o \
+	lima_vm.o \
+	lima_sched.o \
+	lima_ctx.o \
+	lima_gem_prime.o \
+	lima_dlbu.o \
+	lima_bcast.o \
+	lima_object.o
+
+obj-$(CONFIG_DRM_LIMA) += lima.o
diff --git a/drivers/gpu/drm/lima/lima_bcast.c b/drivers/gpu/drm/lima/lima_bcast.c
new file mode 100644
index 000000000000..288398027bfa
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_bcast.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/io.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_bcast.h"
+#include "lima_regs.h"
+
+#define bcast_write(reg, data) writel(data, ip->iomem + reg)
+#define bcast_read(reg) readl(ip->iomem + reg)
+
+void lima_bcast_enable(struct lima_device *dev, int num_pp)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	struct lima_ip *ip = dev->ip + lima_ip_bcast;
+	int i, mask = bcast_read(LIMA_BCAST_BROADCAST_MASK) & 0xffff0000;
+
+	for (i = 0; i < num_pp; i++) {
+		struct lima_ip *pp = pipe->processor[i];
+
+		mask |= 1 << (pp->id - lima_ip_pp0);
+	}
+
+	bcast_write(LIMA_BCAST_BROADCAST_MASK, mask);
+}
+
+int lima_bcast_init(struct lima_ip *ip)
+{
+	int i, mask = 0;
+
+	for (i = lima_ip_pp0; i <= lima_ip_pp7; i++) {
+		if (ip->dev->ip[i].present)
+			mask |= 1 << (i - lima_ip_pp0);
+	}
+
+	bcast_write(LIMA_BCAST_BROADCAST_MASK, mask << 16);
+	bcast_write(LIMA_BCAST_INTERRUPT_MASK, mask);
+	return 0;
+}
+
+void lima_bcast_fini(struct lima_ip *ip)
+{
+
+}
+
diff --git a/drivers/gpu/drm/lima/lima_bcast.h b/drivers/gpu/drm/lima/lima_bcast.h
new file mode 100644
index 000000000000..c47e58563d0a
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_bcast.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_BCAST_H__
+#define __LIMA_BCAST_H__
+
+struct lima_ip;
+
+int lima_bcast_init(struct lima_ip *ip);
+void lima_bcast_fini(struct lima_ip *ip);
+
+void lima_bcast_enable(struct lima_device *dev, int num_pp);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c
new file mode 100644
index 000000000000..22fff6caa961
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_ctx.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/slab.h>
+
+#include "lima_device.h"
+#include "lima_ctx.h"
+
+int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id)
+{
+	struct lima_ctx *ctx;
+	int i, err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	ctx->dev = dev;
+	kref_init(&ctx->refcnt);
+
+	for (i = 0; i < lima_pipe_num; i++) {
+		err = lima_sched_context_init(dev->pipe + i, ctx->context + i, &ctx->guilty);
+		if (err)
+			goto err_out0;
+	}
+
+	err = xa_alloc(&mgr->handles, id, ctx, xa_limit_32b, GFP_KERNEL);
+	if (err < 0)
+		goto err_out0;
+
+	return 0;
+
+err_out0:
+	for (i--; i >= 0; i--)
+		lima_sched_context_fini(dev->pipe + i, ctx->context + i);
+	kfree(ctx);
+	return err;
+}
+
+static void lima_ctx_do_release(struct kref *ref)
+{
+	struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt);
+	int i;
+
+	for (i = 0; i < lima_pipe_num; i++)
+		lima_sched_context_fini(ctx->dev->pipe + i, ctx->context + i);
+	kfree(ctx);
+}
+
+int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id)
+{
+	struct lima_ctx *ctx;
+	int ret = 0;
+
+	mutex_lock(&mgr->lock);
+	ctx = xa_erase(&mgr->handles, id);
+	if (ctx)
+		kref_put(&ctx->refcnt, lima_ctx_do_release);
+	else
+		ret = -EINVAL;
+	mutex_unlock(&mgr->lock);
+	return ret;
+}
+
+struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id)
+{
+	struct lima_ctx *ctx;
+
+	mutex_lock(&mgr->lock);
+	ctx = xa_load(&mgr->handles, id);
+	if (ctx)
+		kref_get(&ctx->refcnt);
+	mutex_unlock(&mgr->lock);
+	return ctx;
+}
+
+void lima_ctx_put(struct lima_ctx *ctx)
+{
+	kref_put(&ctx->refcnt, lima_ctx_do_release);
+}
+
+void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr)
+{
+	mutex_init(&mgr->lock);
+	xa_init_flags(&mgr->handles, XA_FLAGS_ALLOC);
+}
+
+void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr)
+{
+	struct lima_ctx *ctx;
+	unsigned long id;
+
+	xa_for_each(&mgr->handles, id, ctx) {
+		kref_put(&ctx->refcnt, lima_ctx_do_release);
+	}
+
+	xa_destroy(&mgr->handles);
+	mutex_destroy(&mgr->lock);
+}
diff --git a/drivers/gpu/drm/lima/lima_ctx.h b/drivers/gpu/drm/lima/lima_ctx.h
new file mode 100644
index 000000000000..6154e5c9bfe4
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_ctx.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_CTX_H__
+#define __LIMA_CTX_H__
+
+#include <linux/xarray.h>
+
+#include "lima_device.h"
+
+struct lima_ctx {
+	struct kref refcnt;
+	struct lima_device *dev;
+	struct lima_sched_context context[lima_pipe_num];
+	atomic_t guilty;
+};
+
+struct lima_ctx_mgr {
+	struct mutex lock;
+	struct xarray handles;
+};
+
+int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id);
+int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id);
+struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id);
+void lima_ctx_put(struct lima_ctx *ctx);
+void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr);
+void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
new file mode 100644
index 000000000000..570d0e93f9a9
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+
+#include "lima_device.h"
+#include "lima_gp.h"
+#include "lima_pp.h"
+#include "lima_mmu.h"
+#include "lima_pmu.h"
+#include "lima_l2_cache.h"
+#include "lima_dlbu.h"
+#include "lima_bcast.h"
+#include "lima_vm.h"
+
+struct lima_ip_desc {
+	char *name;
+	char *irq_name;
+	bool must_have[lima_gpu_num];
+	int offset[lima_gpu_num];
+
+	int (*init)(struct lima_ip *ip);
+	void (*fini)(struct lima_ip *ip);
+};
+
+#define LIMA_IP_DESC(ipname, mst0, mst1, off0, off1, func, irq) \
+	[lima_ip_##ipname] = { \
+		.name = #ipname, \
+		.irq_name = irq, \
+		.must_have = { \
+			[lima_gpu_mali400] = mst0, \
+			[lima_gpu_mali450] = mst1, \
+		}, \
+		.offset = { \
+			[lima_gpu_mali400] = off0, \
+			[lima_gpu_mali450] = off1, \
+		}, \
+		.init = lima_##func##_init, \
+		.fini = lima_##func##_fini, \
+	}
+
+static struct lima_ip_desc lima_ip_desc[lima_ip_num] = {
+	LIMA_IP_DESC(pmu,         false, false, 0x02000, 0x02000, pmu,      "pmu"),
+	LIMA_IP_DESC(l2_cache0,   true,  true,  0x01000, 0x10000, l2_cache, NULL),
+	LIMA_IP_DESC(l2_cache1,   false, true,  -1,      0x01000, l2_cache, NULL),
+	LIMA_IP_DESC(l2_cache2,   false, false, -1,      0x11000, l2_cache, NULL),
+	LIMA_IP_DESC(gp,          true,  true,  0x00000, 0x00000, gp,       "gp"),
+	LIMA_IP_DESC(pp0,         true,  true,  0x08000, 0x08000, pp,       "pp0"),
+	LIMA_IP_DESC(pp1,         false, false, 0x0A000, 0x0A000, pp,       "pp1"),
+	LIMA_IP_DESC(pp2,         false, false, 0x0C000, 0x0C000, pp,       "pp2"),
+	LIMA_IP_DESC(pp3,         false, false, 0x0E000, 0x0E000, pp,       "pp3"),
+	LIMA_IP_DESC(pp4,         false, false, -1,      0x28000, pp,       "pp4"),
+	LIMA_IP_DESC(pp5,         false, false, -1,      0x2A000, pp,       "pp5"),
+	LIMA_IP_DESC(pp6,         false, false, -1,      0x2C000, pp,       "pp6"),
+	LIMA_IP_DESC(pp7,         false, false, -1,      0x2E000, pp,       "pp7"),
+	LIMA_IP_DESC(gpmmu,       true,  true,  0x03000, 0x03000, mmu,      "gpmmu"),
+	LIMA_IP_DESC(ppmmu0,      true,  true,  0x04000, 0x04000, mmu,      "ppmmu0"),
+	LIMA_IP_DESC(ppmmu1,      false, false, 0x05000, 0x05000, mmu,      "ppmmu1"),
+	LIMA_IP_DESC(ppmmu2,      false, false, 0x06000, 0x06000, mmu,      "ppmmu2"),
+	LIMA_IP_DESC(ppmmu3,      false, false, 0x07000, 0x07000, mmu,      "ppmmu3"),
+	LIMA_IP_DESC(ppmmu4,      false, false, -1,      0x1C000, mmu,      "ppmmu4"),
+	LIMA_IP_DESC(ppmmu5,      false, false, -1,      0x1D000, mmu,      "ppmmu5"),
+	LIMA_IP_DESC(ppmmu6,      false, false, -1,      0x1E000, mmu,      "ppmmu6"),
+	LIMA_IP_DESC(ppmmu7,      false, false, -1,      0x1F000, mmu,      "ppmmu7"),
+	LIMA_IP_DESC(dlbu,        false, true,  -1,      0x14000, dlbu,     NULL),
+	LIMA_IP_DESC(bcast,       false, true,  -1,      0x13000, bcast,    NULL),
+	LIMA_IP_DESC(pp_bcast,    false, true,  -1,      0x16000, pp_bcast, "pp"),
+	LIMA_IP_DESC(ppmmu_bcast, false, true,  -1,      0x15000, mmu,      NULL),
+};
+
+const char *lima_ip_name(struct lima_ip *ip)
+{
+	return lima_ip_desc[ip->id].name;
+}
+
+static int lima_clk_init(struct lima_device *dev)
+{
+	int err;
+	unsigned long bus_rate, gpu_rate;
+
+	dev->clk_bus = devm_clk_get(dev->dev, "bus");
+	if (IS_ERR(dev->clk_bus)) {
+		dev_err(dev->dev, "get bus clk failed %ld\n", PTR_ERR(dev->clk_bus));
+		return PTR_ERR(dev->clk_bus);
+	}
+
+	dev->clk_gpu = devm_clk_get(dev->dev, "core");
+	if (IS_ERR(dev->clk_gpu)) {
+		dev_err(dev->dev, "get core clk failed %ld\n", PTR_ERR(dev->clk_gpu));
+		return PTR_ERR(dev->clk_gpu);
+	}
+
+	bus_rate = clk_get_rate(dev->clk_bus);
+	dev_info(dev->dev, "bus rate = %lu\n", bus_rate);
+
+	gpu_rate = clk_get_rate(dev->clk_gpu);
+	dev_info(dev->dev, "mod rate = %lu", gpu_rate);
+
+	err = clk_prepare_enable(dev->clk_bus);
+	if (err)
+		return err;
+
+	err = clk_prepare_enable(dev->clk_gpu);
+	if (err)
+		goto error_out0;
+
+	dev->reset = devm_reset_control_get_optional(dev->dev, NULL);
+	if (IS_ERR(dev->reset)) {
+		err = PTR_ERR(dev->reset);
+		goto error_out1;
+	} else if (dev->reset != NULL) {
+		err = reset_control_deassert(dev->reset);
+		if (err)
+			goto error_out1;
+	}
+
+	return 0;
+
+error_out1:
+	clk_disable_unprepare(dev->clk_gpu);
+error_out0:
+	clk_disable_unprepare(dev->clk_bus);
+	return err;
+}
+
+static void lima_clk_fini(struct lima_device *dev)
+{
+	if (dev->reset != NULL)
+		reset_control_assert(dev->reset);
+	clk_disable_unprepare(dev->clk_gpu);
+	clk_disable_unprepare(dev->clk_bus);
+}
+
+static int lima_regulator_init(struct lima_device *dev)
+{
+	int ret;
+
+	dev->regulator = devm_regulator_get_optional(dev->dev, "mali");
+	if (IS_ERR(dev->regulator)) {
+		ret = PTR_ERR(dev->regulator);
+		dev->regulator = NULL;
+		if (ret == -ENODEV)
+			return 0;
+		dev_err(dev->dev, "failed to get regulator: %d\n", ret);
+		return ret;
+	}
+
+	ret = regulator_enable(dev->regulator);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to enable regulator: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void lima_regulator_fini(struct lima_device *dev)
+{
+	if (dev->regulator)
+		regulator_disable(dev->regulator);
+}
+
+static int lima_init_ip(struct lima_device *dev, int index)
+{
+	struct lima_ip_desc *desc = lima_ip_desc + index;
+	struct lima_ip *ip = dev->ip + index;
+	int offset = desc->offset[dev->id];
+	bool must = desc->must_have[dev->id];
+	int err;
+
+	if (offset < 0)
+		return 0;
+
+	ip->dev = dev;
+	ip->id = index;
+	ip->iomem = dev->iomem + offset;
+	if (desc->irq_name) {
+		err = platform_get_irq_byname(dev->pdev, desc->irq_name);
+		if (err < 0)
+			goto out;
+		ip->irq = err;
+	}
+
+	err = desc->init(ip);
+	if (!err) {
+		ip->present = true;
+		return 0;
+	}
+
+out:
+	return must ? err : 0;
+}
+
+static void lima_fini_ip(struct lima_device *ldev, int index)
+{
+	struct lima_ip_desc *desc = lima_ip_desc + index;
+	struct lima_ip *ip = ldev->ip + index;
+
+	if (ip->present)
+		desc->fini(ip);
+}
+
+static int lima_init_gp_pipe(struct lima_device *dev)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
+	int err;
+
+	err = lima_sched_pipe_init(pipe, "gp");
+	if (err)
+		return err;
+
+	pipe->l2_cache[pipe->num_l2_cache++] = dev->ip + lima_ip_l2_cache0;
+	pipe->mmu[pipe->num_mmu++] = dev->ip + lima_ip_gpmmu;
+	pipe->processor[pipe->num_processor++] = dev->ip + lima_ip_gp;
+
+	err = lima_gp_pipe_init(dev);
+	if (err) {
+		lima_sched_pipe_fini(pipe);
+		return err;
+	}
+
+	return 0;
+}
+
+static void lima_fini_gp_pipe(struct lima_device *dev)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
+
+	lima_gp_pipe_fini(dev);
+	lima_sched_pipe_fini(pipe);
+}
+
+static int lima_init_pp_pipe(struct lima_device *dev)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	int err, i;
+
+	err = lima_sched_pipe_init(pipe, "pp");
+	if (err)
+		return err;
+
+	for (i = 0; i < LIMA_SCHED_PIPE_MAX_PROCESSOR; i++) {
+		struct lima_ip *pp = dev->ip + lima_ip_pp0 + i;
+		struct lima_ip *ppmmu = dev->ip + lima_ip_ppmmu0 + i;
+		struct lima_ip *l2_cache;
+
+		if (dev->id == lima_gpu_mali400)
+			l2_cache = dev->ip + lima_ip_l2_cache0;
+		else
+			l2_cache = dev->ip + lima_ip_l2_cache1 + (i >> 2);
+
+		if (pp->present && ppmmu->present && l2_cache->present) {
+			pipe->mmu[pipe->num_mmu++] = ppmmu;
+			pipe->processor[pipe->num_processor++] = pp;
+			if (!pipe->l2_cache[i >> 2])
+				pipe->l2_cache[pipe->num_l2_cache++] = l2_cache;
+		}
+	}
+
+	if (dev->ip[lima_ip_bcast].present) {
+		pipe->bcast_processor = dev->ip + lima_ip_pp_bcast;
+		pipe->bcast_mmu = dev->ip + lima_ip_ppmmu_bcast;
+	}
+
+	err = lima_pp_pipe_init(dev);
+	if (err) {
+		lima_sched_pipe_fini(pipe);
+		return err;
+	}
+
+	return 0;
+}
+
+static void lima_fini_pp_pipe(struct lima_device *dev)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+
+	lima_pp_pipe_fini(dev);
+	lima_sched_pipe_fini(pipe);
+}
+
+int lima_device_init(struct lima_device *ldev)
+{
+	int err, i;
+	struct resource *res;
+
+	dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32));
+
+	err = lima_clk_init(ldev);
+	if (err) {
+		dev_err(ldev->dev, "clk init fail %d\n", err);
+		return err;
+	}
+
+	err = lima_regulator_init(ldev);
+	if (err) {
+		dev_err(ldev->dev, "regulator init fail %d\n", err);
+		goto err_out0;
+	}
+
+	ldev->empty_vm = lima_vm_create(ldev);
+	if (!ldev->empty_vm) {
+		err = -ENOMEM;
+		goto err_out1;
+	}
+
+	ldev->va_start = 0;
+	if (ldev->id == lima_gpu_mali450) {
+		ldev->va_end = LIMA_VA_RESERVE_START;
+		ldev->dlbu_cpu = dma_alloc_wc(
+			ldev->dev, LIMA_PAGE_SIZE,
+			&ldev->dlbu_dma, GFP_KERNEL);
+		if (!ldev->dlbu_cpu) {
+			err = -ENOMEM;
+			goto err_out2;
+		}
+	} else
+		ldev->va_end = LIMA_VA_RESERVE_END;
+
+	res = platform_get_resource(ldev->pdev, IORESOURCE_MEM, 0);
+	ldev->iomem = devm_ioremap_resource(ldev->dev, res);
+	if (IS_ERR(ldev->iomem)) {
+		dev_err(ldev->dev, "fail to ioremap iomem\n");
+		err = PTR_ERR(ldev->iomem);
+		goto err_out3;
+	}
+
+	for (i = 0; i < lima_ip_num; i++) {
+		err = lima_init_ip(ldev, i);
+		if (err)
+			goto err_out4;
+	}
+
+	err = lima_init_gp_pipe(ldev);
+	if (err)
+		goto err_out4;
+
+	err = lima_init_pp_pipe(ldev);
+	if (err)
+		goto err_out5;
+
+	return 0;
+
+err_out5:
+	lima_fini_gp_pipe(ldev);
+err_out4:
+	while (--i >= 0)
+		lima_fini_ip(ldev, i);
+err_out3:
+	if (ldev->dlbu_cpu)
+		dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
+			    ldev->dlbu_cpu, ldev->dlbu_dma);
+err_out2:
+	lima_vm_put(ldev->empty_vm);
+err_out1:
+	lima_regulator_fini(ldev);
+err_out0:
+	lima_clk_fini(ldev);
+	return err;
+}
+
+void lima_device_fini(struct lima_device *ldev)
+{
+	int i;
+
+	lima_fini_pp_pipe(ldev);
+	lima_fini_gp_pipe(ldev);
+
+	for (i = lima_ip_num - 1; i >= 0; i--)
+		lima_fini_ip(ldev, i);
+
+	if (ldev->dlbu_cpu)
+		dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
+			    ldev->dlbu_cpu, ldev->dlbu_dma);
+
+	lima_vm_put(ldev->empty_vm);
+
+	lima_regulator_fini(ldev);
+
+	lima_clk_fini(ldev);
+}
diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h
new file mode 100644
index 000000000000..31158d86271c
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_device.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DEVICE_H__
+#define __LIMA_DEVICE_H__
+
+#include <drm/drm_device.h>
+#include <linux/delay.h>
+
+#include "lima_sched.h"
+
+enum lima_gpu_id {
+	lima_gpu_mali400 = 0,
+	lima_gpu_mali450,
+	lima_gpu_num,
+};
+
+enum lima_ip_id {
+	lima_ip_pmu,
+	lima_ip_gpmmu,
+	lima_ip_ppmmu0,
+	lima_ip_ppmmu1,
+	lima_ip_ppmmu2,
+	lima_ip_ppmmu3,
+	lima_ip_ppmmu4,
+	lima_ip_ppmmu5,
+	lima_ip_ppmmu6,
+	lima_ip_ppmmu7,
+	lima_ip_gp,
+	lima_ip_pp0,
+	lima_ip_pp1,
+	lima_ip_pp2,
+	lima_ip_pp3,
+	lima_ip_pp4,
+	lima_ip_pp5,
+	lima_ip_pp6,
+	lima_ip_pp7,
+	lima_ip_l2_cache0,
+	lima_ip_l2_cache1,
+	lima_ip_l2_cache2,
+	lima_ip_dlbu,
+	lima_ip_bcast,
+	lima_ip_pp_bcast,
+	lima_ip_ppmmu_bcast,
+	lima_ip_num,
+};
+
+struct lima_device;
+
+struct lima_ip {
+	struct lima_device *dev;
+	enum lima_ip_id id;
+	bool present;
+
+	void __iomem *iomem;
+	int irq;
+
+	union {
+		/* gp/pp */
+		bool async_reset;
+		/* l2 cache */
+		spinlock_t lock;
+	} data;
+};
+
+enum lima_pipe_id {
+	lima_pipe_gp,
+	lima_pipe_pp,
+	lima_pipe_num,
+};
+
+struct lima_device {
+	struct device *dev;
+	struct drm_device *ddev;
+	struct platform_device *pdev;
+
+	enum lima_gpu_id id;
+	u32 gp_version;
+	u32 pp_version;
+	int num_pp;
+
+	void __iomem *iomem;
+	struct clk *clk_bus;
+	struct clk *clk_gpu;
+	struct reset_control *reset;
+	struct regulator *regulator;
+
+	struct lima_ip ip[lima_ip_num];
+	struct lima_sched_pipe pipe[lima_pipe_num];
+
+	struct lima_vm *empty_vm;
+	uint64_t va_start;
+	uint64_t va_end;
+
+	u32 *dlbu_cpu;
+	dma_addr_t dlbu_dma;
+};
+
+static inline struct lima_device *
+to_lima_dev(struct drm_device *dev)
+{
+	return dev->dev_private;
+}
+
+int lima_device_init(struct lima_device *ldev);
+void lima_device_fini(struct lima_device *ldev);
+
+const char *lima_ip_name(struct lima_ip *ip);
+
+typedef int (*lima_poll_func_t)(struct lima_ip *);
+
+static inline int lima_poll_timeout(struct lima_ip *ip, lima_poll_func_t func,
+				    int sleep_us, int timeout_us)
+{
+	ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
+
+	might_sleep_if(sleep_us);
+	while (1) {
+		if (func(ip))
+			return 0;
+
+		if (timeout_us && ktime_compare(ktime_get(), timeout) > 0)
+			return -ETIMEDOUT;
+
+		if (sleep_us)
+			usleep_range((sleep_us >> 2) + 1, sleep_us);
+	}
+	return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_dlbu.c b/drivers/gpu/drm/lima/lima_dlbu.c
new file mode 100644
index 000000000000..8399ceffb94b
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_dlbu.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/io.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_dlbu.h"
+#include "lima_vm.h"
+#include "lima_regs.h"
+
+#define dlbu_write(reg, data) writel(data, ip->iomem + reg)
+#define dlbu_read(reg) readl(ip->iomem + reg)
+
+void lima_dlbu_enable(struct lima_device *dev, int num_pp)
+{
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	struct lima_ip *ip = dev->ip + lima_ip_dlbu;
+	int i, mask = 0;
+
+	for (i = 0; i < num_pp; i++) {
+		struct lima_ip *pp = pipe->processor[i];
+
+		mask |= 1 << (pp->id - lima_ip_pp0);
+	}
+
+	dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, mask);
+}
+
+void lima_dlbu_disable(struct lima_device *dev)
+{
+	struct lima_ip *ip = dev->ip + lima_ip_dlbu;
+
+	dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, 0);
+}
+
+void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg)
+{
+	dlbu_write(LIMA_DLBU_TLLIST_VBASEADDR, reg[0]);
+	dlbu_write(LIMA_DLBU_FB_DIM, reg[1]);
+	dlbu_write(LIMA_DLBU_TLLIST_CONF, reg[2]);
+	dlbu_write(LIMA_DLBU_START_TILE_POS, reg[3]);
+}
+
+int lima_dlbu_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+
+	dlbu_write(LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR, dev->dlbu_dma | 1);
+	dlbu_write(LIMA_DLBU_MASTER_TLLIST_VADDR, LIMA_VA_RESERVE_DLBU);
+
+	return 0;
+}
+
+void lima_dlbu_fini(struct lima_ip *ip)
+{
+
+}
diff --git a/drivers/gpu/drm/lima/lima_dlbu.h b/drivers/gpu/drm/lima/lima_dlbu.h
new file mode 100644
index 000000000000..16f877984466
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_dlbu.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DLBU_H__
+#define __LIMA_DLBU_H__
+
+struct lima_ip;
+struct lima_device;
+
+void lima_dlbu_enable(struct lima_device *dev, int num_pp);
+void lima_dlbu_disable(struct lima_device *dev);
+
+void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg);
+
+int lima_dlbu_init(struct lima_ip *ip);
+void lima_dlbu_fini(struct lima_ip *ip);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
new file mode 100644
index 000000000000..f9a281a62083
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_drv.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_prime.h>
+#include <drm/lima_drm.h>
+
+#include "lima_drv.h"
+#include "lima_gem.h"
+#include "lima_gem_prime.h"
+#include "lima_vm.h"
+
+int lima_sched_timeout_ms;
+
+MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
+module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
+
+static int lima_ioctl_get_param(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_get_param *args = data;
+	struct lima_device *ldev = to_lima_dev(dev);
+
+	if (args->pad)
+		return -EINVAL;
+
+	switch (args->param) {
+	case DRM_LIMA_PARAM_GPU_ID:
+		switch (ldev->id) {
+		case lima_gpu_mali400:
+			args->value = DRM_LIMA_PARAM_GPU_ID_MALI400;
+			break;
+		case lima_gpu_mali450:
+			args->value = DRM_LIMA_PARAM_GPU_ID_MALI450;
+			break;
+		default:
+			args->value = DRM_LIMA_PARAM_GPU_ID_UNKNOWN;
+			break;
+		}
+		break;
+
+	case DRM_LIMA_PARAM_NUM_PP:
+		args->value = ldev->pipe[lima_pipe_pp].num_processor;
+		break;
+
+	case DRM_LIMA_PARAM_GP_VERSION:
+		args->value = ldev->gp_version;
+		break;
+
+	case DRM_LIMA_PARAM_PP_VERSION:
+		args->value = ldev->pp_version;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int lima_ioctl_gem_create(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_gem_create *args = data;
+
+	if (args->pad)
+		return -EINVAL;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	return lima_gem_create_handle(dev, file, args->size, args->flags, &args->handle);
+}
+
+static int lima_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_gem_info *args = data;
+
+	return lima_gem_get_info(file, args->handle, &args->va, &args->offset);
+}
+
+static int lima_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_gem_submit *args = data;
+	struct lima_device *ldev = to_lima_dev(dev);
+	struct lima_drm_priv *priv = file->driver_priv;
+	struct drm_lima_gem_submit_bo *bos;
+	struct lima_sched_pipe *pipe;
+	struct lima_sched_task *task;
+	struct lima_ctx *ctx;
+	struct lima_submit submit = {0};
+	size_t size;
+	int err = 0;
+
+	if (args->pipe >= lima_pipe_num || args->nr_bos == 0)
+		return -EINVAL;
+
+	if (args->flags & ~(LIMA_SUBMIT_FLAG_EXPLICIT_FENCE))
+		return -EINVAL;
+
+	pipe = ldev->pipe + args->pipe;
+	if (args->frame_size != pipe->frame_size)
+		return -EINVAL;
+
+	bos = kvcalloc(args->nr_bos, sizeof(*submit.bos) + sizeof(*submit.lbos), GFP_KERNEL);
+	if (!bos)
+		return -ENOMEM;
+
+	size = args->nr_bos * sizeof(*submit.bos);
+	if (copy_from_user(bos, u64_to_user_ptr(args->bos), size)) {
+		err = -EFAULT;
+		goto out0;
+	}
+
+	task = kmem_cache_zalloc(pipe->task_slab, GFP_KERNEL);
+	if (!task) {
+		err = -ENOMEM;
+		goto out0;
+	}
+
+	task->frame = task + 1;
+	if (copy_from_user(task->frame, u64_to_user_ptr(args->frame), args->frame_size)) {
+		err = -EFAULT;
+		goto out1;
+	}
+
+	err = pipe->task_validate(pipe, task);
+	if (err)
+		goto out1;
+
+	ctx = lima_ctx_get(&priv->ctx_mgr, args->ctx);
+	if (!ctx) {
+		err = -ENOENT;
+		goto out1;
+	}
+
+	submit.pipe = args->pipe;
+	submit.bos = bos;
+	submit.lbos = (void *)bos + size;
+	submit.nr_bos = args->nr_bos;
+	submit.task = task;
+	submit.ctx = ctx;
+	submit.flags = args->flags;
+	submit.in_sync[0] = args->in_sync[0];
+	submit.in_sync[1] = args->in_sync[1];
+	submit.out_sync = args->out_sync;
+
+	err = lima_gem_submit(file, &submit);
+
+	lima_ctx_put(ctx);
+out1:
+	if (err)
+		kmem_cache_free(pipe->task_slab, task);
+out0:
+	kvfree(bos);
+	return err;
+}
+
+static int lima_ioctl_gem_wait(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_gem_wait *args = data;
+
+	if (args->op & ~(LIMA_GEM_WAIT_READ|LIMA_GEM_WAIT_WRITE))
+		return -EINVAL;
+
+	return lima_gem_wait(file, args->handle, args->op, args->timeout_ns);
+}
+
+static int lima_ioctl_ctx_create(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_ctx_create *args = data;
+	struct lima_drm_priv *priv = file->driver_priv;
+	struct lima_device *ldev = to_lima_dev(dev);
+
+	if (args->_pad)
+		return -EINVAL;
+
+	return lima_ctx_create(ldev, &priv->ctx_mgr, &args->id);
+}
+
+static int lima_ioctl_ctx_free(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_lima_ctx_create *args = data;
+	struct lima_drm_priv *priv = file->driver_priv;
+
+	if (args->_pad)
+		return -EINVAL;
+
+	return lima_ctx_free(&priv->ctx_mgr, args->id);
+}
+
+static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file)
+{
+	int err;
+	struct lima_drm_priv *priv;
+	struct lima_device *ldev = to_lima_dev(dev);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->vm = lima_vm_create(ldev);
+	if (!priv->vm) {
+		err = -ENOMEM;
+		goto err_out0;
+	}
+
+	lima_ctx_mgr_init(&priv->ctx_mgr);
+
+	file->driver_priv = priv;
+	return 0;
+
+err_out0:
+	kfree(priv);
+	return err;
+}
+
+static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct lima_drm_priv *priv = file->driver_priv;
+
+	lima_ctx_mgr_fini(&priv->ctx_mgr);
+	lima_vm_put(priv->vm);
+	kfree(priv);
+}
+
+static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(LIMA_GET_PARAM, lima_ioctl_get_param, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_GEM_CREATE, lima_ioctl_gem_create, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_GEM_INFO, lima_ioctl_gem_info, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_GEM_SUBMIT, lima_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_GEM_WAIT, lima_ioctl_gem_wait, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_CTX_CREATE, lima_ioctl_ctx_create, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(LIMA_CTX_FREE, lima_ioctl_ctx_free, DRM_AUTH|DRM_RENDER_ALLOW),
+};
+
+static const struct file_operations lima_drm_driver_fops = {
+	.owner              = THIS_MODULE,
+	.open               = drm_open,
+	.release            = drm_release,
+	.unlocked_ioctl     = drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl       = drm_compat_ioctl,
+#endif
+	.mmap               = lima_gem_mmap,
+};
+
+static struct drm_driver lima_drm_driver = {
+	.driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME | DRIVER_SYNCOBJ,
+	.open               = lima_drm_driver_open,
+	.postclose          = lima_drm_driver_postclose,
+	.ioctls             = lima_drm_driver_ioctls,
+	.num_ioctls         = ARRAY_SIZE(lima_drm_driver_ioctls),
+	.fops               = &lima_drm_driver_fops,
+	.gem_free_object_unlocked = lima_gem_free_object,
+	.gem_open_object    = lima_gem_object_open,
+	.gem_close_object   = lima_gem_object_close,
+	.gem_vm_ops         = &lima_gem_vm_ops,
+	.name               = "lima",
+	.desc               = "lima DRM",
+	.date               = "20190217",
+	.major              = 1,
+	.minor              = 0,
+	.patchlevel         = 0,
+
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import_sg_table = lima_gem_prime_import_sg_table,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.gem_prime_get_sg_table = lima_gem_prime_get_sg_table,
+	.gem_prime_mmap = lima_gem_prime_mmap,
+};
+
+static int lima_pdev_probe(struct platform_device *pdev)
+{
+	struct lima_device *ldev;
+	struct drm_device *ddev;
+	int err;
+
+	err = lima_sched_slab_init();
+	if (err)
+		return err;
+
+	ldev = devm_kzalloc(&pdev->dev, sizeof(*ldev), GFP_KERNEL);
+	if (!ldev) {
+		err = -ENOMEM;
+		goto err_out0;
+	}
+
+	ldev->pdev = pdev;
+	ldev->dev = &pdev->dev;
+	ldev->id = (enum lima_gpu_id)of_device_get_match_data(&pdev->dev);
+
+	platform_set_drvdata(pdev, ldev);
+
+	/* Allocate and initialize the DRM device. */
+	ddev = drm_dev_alloc(&lima_drm_driver, &pdev->dev);
+	if (IS_ERR(ddev))
+		return PTR_ERR(ddev);
+
+	ddev->dev_private = ldev;
+	ldev->ddev = ddev;
+
+	err = lima_device_init(ldev);
+	if (err) {
+		dev_err(&pdev->dev, "Fatal error during GPU init\n");
+		goto err_out1;
+	}
+
+	/*
+	 * Register the DRM device with the core and the connectors with
+	 * sysfs.
+	 */
+	err = drm_dev_register(ddev, 0);
+	if (err < 0)
+		goto err_out2;
+
+	return 0;
+
+err_out2:
+	lima_device_fini(ldev);
+err_out1:
+	drm_dev_put(ddev);
+err_out0:
+	lima_sched_slab_fini();
+	return err;
+}
+
+static int lima_pdev_remove(struct platform_device *pdev)
+{
+	struct lima_device *ldev = platform_get_drvdata(pdev);
+	struct drm_device *ddev = ldev->ddev;
+
+	drm_dev_unregister(ddev);
+	lima_device_fini(ldev);
+	drm_dev_put(ddev);
+	lima_sched_slab_fini();
+	return 0;
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "arm,mali-400", .data = (void *)lima_gpu_mali400 },
+	{ .compatible = "arm,mali-450", .data = (void *)lima_gpu_mali450 },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static struct platform_driver lima_platform_driver = {
+	.probe      = lima_pdev_probe,
+	.remove     = lima_pdev_remove,
+	.driver     = {
+		.name   = "lima",
+		.of_match_table = dt_match,
+	},
+};
+
+static int __init lima_init(void)
+{
+	return platform_driver_register(&lima_platform_driver);
+}
+module_init(lima_init);
+
+static void __exit lima_exit(void)
+{
+	platform_driver_unregister(&lima_platform_driver);
+}
+module_exit(lima_exit);
+
+MODULE_AUTHOR("Lima Project Developers");
+MODULE_DESCRIPTION("Lima DRM Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/lima/lima_drv.h b/drivers/gpu/drm/lima/lima_drv.h
new file mode 100644
index 000000000000..69c7344715c9
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_drv.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DRV_H__
+#define __LIMA_DRV_H__
+
+#include <drm/drm_file.h>
+
+#include "lima_ctx.h"
+
+extern int lima_sched_timeout_ms;
+
+struct lima_vm;
+struct lima_bo;
+struct lima_sched_task;
+
+struct drm_lima_gem_submit_bo;
+
+struct lima_drm_priv {
+	struct lima_vm *vm;
+	struct lima_ctx_mgr ctx_mgr;
+};
+
+struct lima_submit {
+	struct lima_ctx *ctx;
+	int pipe;
+	u32 flags;
+
+	struct drm_lima_gem_submit_bo *bos;
+	struct lima_bo **lbos;
+	u32 nr_bos;
+
+	u32 in_sync[2];
+	u32 out_sync;
+
+	struct lima_sched_task *task;
+};
+
+static inline struct lima_drm_priv *
+to_lima_drm_priv(struct drm_file *file)
+{
+	return file->driver_priv;
+}
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
new file mode 100644
index 000000000000..2d3cf96f6c58
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem.c
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/sync_file.h>
+#include <linux/pfn_t.h>
+
+#include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
+#include <drm/drm_utils.h>
+
+#include <drm/lima_drm.h>
+
+#include "lima_drv.h"
+#include "lima_gem.h"
+#include "lima_gem_prime.h"
+#include "lima_vm.h"
+#include "lima_object.h"
+
+int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
+			   u32 size, u32 flags, u32 *handle)
+{
+	int err;
+	struct lima_bo *bo;
+	struct lima_device *ldev = to_lima_dev(dev);
+
+	bo = lima_bo_create(ldev, size, flags, NULL, NULL);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	err = drm_gem_handle_create(file, &bo->gem, handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_put_unlocked(&bo->gem);
+
+	return err;
+}
+
+void lima_gem_free_object(struct drm_gem_object *obj)
+{
+	struct lima_bo *bo = to_lima_bo(obj);
+
+	if (!list_empty(&bo->va))
+		dev_err(obj->dev->dev, "lima gem free bo still has va\n");
+
+	lima_bo_destroy(bo);
+}
+
+int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file)
+{
+	struct lima_bo *bo = to_lima_bo(obj);
+	struct lima_drm_priv *priv = to_lima_drm_priv(file);
+	struct lima_vm *vm = priv->vm;
+
+	return lima_vm_bo_add(vm, bo, true);
+}
+
+void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file)
+{
+	struct lima_bo *bo = to_lima_bo(obj);
+	struct lima_drm_priv *priv = to_lima_drm_priv(file);
+	struct lima_vm *vm = priv->vm;
+
+	lima_vm_bo_del(vm, bo);
+}
+
+int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset)
+{
+	struct drm_gem_object *obj;
+	struct lima_bo *bo;
+	struct lima_drm_priv *priv = to_lima_drm_priv(file);
+	struct lima_vm *vm = priv->vm;
+	int err;
+
+	obj = drm_gem_object_lookup(file, handle);
+	if (!obj)
+		return -ENOENT;
+
+	bo = to_lima_bo(obj);
+
+	*va = lima_vm_get_va(vm, bo);
+
+	err = drm_gem_create_mmap_offset(obj);
+	if (!err)
+		*offset = drm_vma_node_offset_addr(&obj->vma_node);
+
+	drm_gem_object_put_unlocked(obj);
+	return err;
+}
+
+static vm_fault_t lima_gem_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct lima_bo *bo = to_lima_bo(obj);
+	pfn_t pfn;
+	pgoff_t pgoff;
+
+	/* We don't use vmf->pgoff since that has the fake offset: */
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
+
+	return vmf_insert_mixed(vma, vmf->address, pfn);
+}
+
+const struct vm_operations_struct lima_gem_vm_ops = {
+	.fault = lima_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+void lima_set_vma_flags(struct vm_area_struct *vma)
+{
+	pgprot_t prot = vm_get_page_prot(vma->vm_flags);
+
+	vma->vm_flags |= VM_MIXEDMAP;
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_page_prot = pgprot_writecombine(prot);
+}
+
+int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret)
+		return ret;
+
+	lima_set_vma_flags(vma);
+	return 0;
+}
+
+static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
+			    bool write, bool explicit)
+{
+	int err = 0;
+
+	if (!write) {
+		err = reservation_object_reserve_shared(bo->gem.resv, 1);
+		if (err)
+			return err;
+	}
+
+	/* explicit sync use user passed dep fence */
+	if (explicit)
+		return 0;
+
+	/* implicit sync use bo fence in resv obj */
+	if (write) {
+		unsigned nr_fences;
+		struct dma_fence **fences;
+		int i;
+
+		err = reservation_object_get_fences_rcu(
+			bo->gem.resv, NULL, &nr_fences, &fences);
+		if (err || !nr_fences)
+			return err;
+
+		for (i = 0; i < nr_fences; i++) {
+			err = lima_sched_task_add_dep(task, fences[i]);
+			if (err)
+				break;
+		}
+
+		/* for error case free remaining fences */
+		for ( ; i < nr_fences; i++)
+			dma_fence_put(fences[i]);
+
+		kfree(fences);
+	} else {
+		struct dma_fence *fence;
+
+		fence = reservation_object_get_excl_rcu(bo->gem.resv);
+		if (fence) {
+			err = lima_sched_task_add_dep(task, fence);
+			if (err)
+				dma_fence_put(fence);
+		}
+	}
+
+	return err;
+}
+
+static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos,
+			     struct ww_acquire_ctx *ctx)
+{
+	int i, ret = 0, contended, slow_locked = -1;
+
+	ww_acquire_init(ctx, &reservation_ww_class);
+
+retry:
+	for (i = 0; i < nr_bos; i++) {
+		if (i == slow_locked) {
+			slow_locked = -1;
+			continue;
+		}
+
+		ret = ww_mutex_lock_interruptible(&bos[i]->gem.resv->lock, ctx);
+		if (ret < 0) {
+			contended = i;
+			goto err;
+		}
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		ww_mutex_unlock(&bos[i]->gem.resv->lock);
+
+	if (slow_locked >= 0)
+		ww_mutex_unlock(&bos[slow_locked]->gem.resv->lock);
+
+	if (ret == -EDEADLK) {
+		/* we lost out in a seqno race, lock and retry.. */
+		ret = ww_mutex_lock_slow_interruptible(
+			&bos[contended]->gem.resv->lock, ctx);
+		if (!ret) {
+			slow_locked = contended;
+			goto retry;
+		}
+	}
+	ww_acquire_fini(ctx);
+
+	return ret;
+}
+
+static void lima_gem_unlock_bos(struct lima_bo **bos, u32 nr_bos,
+				struct ww_acquire_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < nr_bos; i++)
+		ww_mutex_unlock(&bos[i]->gem.resv->lock);
+	ww_acquire_fini(ctx);
+}
+
+static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit)
+{
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(submit->in_sync); i++) {
+		struct dma_fence *fence = NULL;
+
+		if (!submit->in_sync[i])
+			continue;
+
+		err = drm_syncobj_find_fence(file, submit->in_sync[i],
+					     0, 0, &fence);
+		if (err)
+			return err;
+
+		err = lima_sched_task_add_dep(submit->task, fence);
+		if (err) {
+			dma_fence_put(fence);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
+{
+	int i, err = 0;
+	struct ww_acquire_ctx ctx;
+	struct lima_drm_priv *priv = to_lima_drm_priv(file);
+	struct lima_vm *vm = priv->vm;
+	struct drm_syncobj *out_sync = NULL;
+	struct dma_fence *fence;
+	struct lima_bo **bos = submit->lbos;
+
+	if (submit->out_sync) {
+		out_sync = drm_syncobj_find(file, submit->out_sync);
+		if (!out_sync)
+			return -ENOENT;
+	}
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct drm_gem_object *obj;
+		struct lima_bo *bo;
+
+		obj = drm_gem_object_lookup(file, submit->bos[i].handle);
+		if (!obj) {
+			err = -ENOENT;
+			goto err_out0;
+		}
+
+		bo = to_lima_bo(obj);
+
+		/* increase refcnt of gpu va map to prevent unmapped when executing,
+		 * will be decreased when task done
+		 */
+		err = lima_vm_bo_add(vm, bo, false);
+		if (err) {
+			drm_gem_object_put_unlocked(obj);
+			goto err_out0;
+		}
+
+		bos[i] = bo;
+	}
+
+	err = lima_gem_lock_bos(bos, submit->nr_bos, &ctx);
+	if (err)
+		goto err_out0;
+
+	err = lima_sched_task_init(
+		submit->task, submit->ctx->context + submit->pipe,
+		bos, submit->nr_bos, vm);
+	if (err)
+		goto err_out1;
+
+	err = lima_gem_add_deps(file, submit);
+	if (err)
+		goto err_out2;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		err = lima_gem_sync_bo(
+			submit->task, bos[i],
+			submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE,
+			submit->flags & LIMA_SUBMIT_FLAG_EXPLICIT_FENCE);
+		if (err)
+			goto err_out2;
+	}
+
+	fence = lima_sched_context_queue_task(
+		submit->ctx->context + submit->pipe, submit->task);
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
+			reservation_object_add_excl_fence(bos[i]->gem.resv, fence);
+		else
+			reservation_object_add_shared_fence(bos[i]->gem.resv, fence);
+	}
+
+	lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
+
+	for (i = 0; i < submit->nr_bos; i++)
+		drm_gem_object_put_unlocked(&bos[i]->gem);
+
+	if (out_sync) {
+		drm_syncobj_replace_fence(out_sync, fence);
+		drm_syncobj_put(out_sync);
+	}
+
+	dma_fence_put(fence);
+
+	return 0;
+
+err_out2:
+	lima_sched_task_fini(submit->task);
+err_out1:
+	lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
+err_out0:
+	for (i = 0; i < submit->nr_bos; i++) {
+		if (!bos[i])
+			break;
+		lima_vm_bo_del(vm, bos[i]);
+		drm_gem_object_put_unlocked(&bos[i]->gem);
+	}
+	if (out_sync)
+		drm_syncobj_put(out_sync);
+	return err;
+}
+
+int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns)
+{
+	bool write = op & LIMA_GEM_WAIT_WRITE;
+	long ret, timeout;
+
+	if (!op)
+		return 0;
+
+	timeout = drm_timeout_abs_to_jiffies(timeout_ns);
+
+	ret = drm_gem_reservation_object_wait(file, handle, write, timeout);
+	if (ret == 0)
+		ret = timeout ? -ETIMEDOUT : -EBUSY;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/lima/lima_gem.h b/drivers/gpu/drm/lima/lima_gem.h
new file mode 100644
index 000000000000..556111a01135
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_GEM_H__
+#define __LIMA_GEM_H__
+
+struct lima_bo;
+struct lima_submit;
+
+extern const struct vm_operations_struct lima_gem_vm_ops;
+
+struct lima_bo *lima_gem_create_bo(struct drm_device *dev, u32 size, u32 flags);
+int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
+			   u32 size, u32 flags, u32 *handle);
+void lima_gem_free_object(struct drm_gem_object *obj);
+int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file);
+void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file);
+int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset);
+int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+int lima_gem_submit(struct drm_file *file, struct lima_submit *submit);
+int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns);
+
+void lima_set_vma_flags(struct vm_area_struct *vma);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_gem_prime.c b/drivers/gpu/drm/lima/lima_gem_prime.c
new file mode 100644
index 000000000000..9c6d9f1dba55
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem_prime.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/dma-buf.h>
+#include <drm/drm_prime.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+
+#include "lima_device.h"
+#include "lima_object.h"
+#include "lima_gem.h"
+#include "lima_gem_prime.h"
+
+struct drm_gem_object *lima_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *sgt)
+{
+	struct lima_device *ldev = to_lima_dev(dev);
+	struct lima_bo *bo;
+
+	bo = lima_bo_create(ldev, attach->dmabuf->size, 0, sgt,
+			    attach->dmabuf->resv);
+	if (IS_ERR(bo))
+		return ERR_CAST(bo);
+
+	return &bo->gem;
+}
+
+struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	struct lima_bo *bo = to_lima_bo(obj);
+	int npages = obj->size >> PAGE_SHIFT;
+
+	return drm_prime_pages_to_sg(bo->pages, npages);
+}
+
+int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	if (ret)
+		return ret;
+
+	lima_set_vma_flags(vma);
+	return 0;
+}
diff --git a/drivers/gpu/drm/lima/lima_gem_prime.h b/drivers/gpu/drm/lima/lima_gem_prime.h
new file mode 100644
index 000000000000..34b4d35c21e3
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gem_prime.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_GEM_PRIME_H__
+#define __LIMA_GEM_PRIME_H__
+
+struct drm_gem_object *lima_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *sgt);
+struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj);
+int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_gp.c b/drivers/gpu/drm/lima/lima_gp.c
new file mode 100644
index 000000000000..ccf49faedebf
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gp.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <drm/lima_drm.h>
+
+#include "lima_device.h"
+#include "lima_gp.h"
+#include "lima_regs.h"
+
+#define gp_write(reg, data) writel(data, ip->iomem + reg)
+#define gp_read(reg) readl(ip->iomem + reg)
+
+static irqreturn_t lima_gp_irq_handler(int irq, void *data)
+{
+	struct lima_ip *ip = data;
+	struct lima_device *dev = ip->dev;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
+	u32 state = gp_read(LIMA_GP_INT_STAT);
+	u32 status = gp_read(LIMA_GP_STATUS);
+	bool done = false;
+
+	/* for shared irq case */
+	if (!state)
+		return IRQ_NONE;
+
+	if (state & LIMA_GP_IRQ_MASK_ERROR) {
+		dev_err(dev->dev, "gp error irq state=%x status=%x\n",
+			state, status);
+
+		/* mask all interrupts before hard reset */
+		gp_write(LIMA_GP_INT_MASK, 0);
+
+		pipe->error = true;
+		done = true;
+	} else {
+		bool valid = state & (LIMA_GP_IRQ_VS_END_CMD_LST |
+				      LIMA_GP_IRQ_PLBU_END_CMD_LST);
+		bool active = status & (LIMA_GP_STATUS_VS_ACTIVE |
+					LIMA_GP_STATUS_PLBU_ACTIVE);
+		done = valid && !active;
+	}
+
+	gp_write(LIMA_GP_INT_CLEAR, state);
+
+	if (done)
+		lima_sched_pipe_task_done(pipe);
+
+	return IRQ_HANDLED;
+}
+
+static void lima_gp_soft_reset_async(struct lima_ip *ip)
+{
+	if (ip->data.async_reset)
+		return;
+
+	gp_write(LIMA_GP_INT_MASK, 0);
+	gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_RESET_COMPLETED);
+	gp_write(LIMA_GP_CMD, LIMA_GP_CMD_SOFT_RESET);
+	ip->data.async_reset = true;
+}
+
+static int lima_gp_soft_reset_async_wait(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+	u32 v;
+
+	if (!ip->data.async_reset)
+		return 0;
+
+	err = readl_poll_timeout(ip->iomem + LIMA_GP_INT_RAWSTAT, v,
+				 v & LIMA_GP_IRQ_RESET_COMPLETED,
+				 0, 100);
+	if (err) {
+		dev_err(dev->dev, "gp soft reset time out\n");
+		return err;
+	}
+
+	gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
+	gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
+
+	ip->data.async_reset = false;
+	return 0;
+}
+
+static int lima_gp_task_validate(struct lima_sched_pipe *pipe,
+				 struct lima_sched_task *task)
+{
+	struct drm_lima_gp_frame *frame = task->frame;
+	u32 *f = frame->frame;
+	(void)pipe;
+
+	if (f[LIMA_GP_VSCL_START_ADDR >> 2] >
+	    f[LIMA_GP_VSCL_END_ADDR >> 2] ||
+	    f[LIMA_GP_PLBUCL_START_ADDR >> 2] >
+	    f[LIMA_GP_PLBUCL_END_ADDR >> 2] ||
+	    f[LIMA_GP_PLBU_ALLOC_START_ADDR >> 2] >
+	    f[LIMA_GP_PLBU_ALLOC_END_ADDR >> 2])
+		return -EINVAL;
+
+	if (f[LIMA_GP_VSCL_START_ADDR >> 2] ==
+	    f[LIMA_GP_VSCL_END_ADDR >> 2] &&
+	    f[LIMA_GP_PLBUCL_START_ADDR >> 2] ==
+	    f[LIMA_GP_PLBUCL_END_ADDR >> 2])
+		return -EINVAL;
+
+	return 0;
+}
+
+static void lima_gp_task_run(struct lima_sched_pipe *pipe,
+			     struct lima_sched_task *task)
+{
+	struct lima_ip *ip = pipe->processor[0];
+	struct drm_lima_gp_frame *frame = task->frame;
+	u32 *f = frame->frame;
+	u32 cmd = 0;
+	int i;
+
+	if (f[LIMA_GP_VSCL_START_ADDR >> 2] !=
+	    f[LIMA_GP_VSCL_END_ADDR >> 2])
+		cmd |= LIMA_GP_CMD_START_VS;
+	if (f[LIMA_GP_PLBUCL_START_ADDR >> 2] !=
+	    f[LIMA_GP_PLBUCL_END_ADDR >> 2])
+		cmd |= LIMA_GP_CMD_START_PLBU;
+
+	/* before any hw ops, wait last success task async soft reset */
+	lima_gp_soft_reset_async_wait(ip);
+
+	for (i = 0; i < LIMA_GP_FRAME_REG_NUM; i++)
+		writel(f[i], ip->iomem + LIMA_GP_VSCL_START_ADDR + i * 4);
+
+	gp_write(LIMA_GP_CMD, LIMA_GP_CMD_UPDATE_PLBU_ALLOC);
+	gp_write(LIMA_GP_CMD, cmd);
+}
+
+static int lima_gp_hard_reset_poll(struct lima_ip *ip)
+{
+	gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC01A0000);
+	return gp_read(LIMA_GP_PERF_CNT_0_LIMIT) == 0xC01A0000;
+}
+
+static int lima_gp_hard_reset(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int ret;
+
+	gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC0FFE000);
+	gp_write(LIMA_GP_INT_MASK, 0);
+	gp_write(LIMA_GP_CMD, LIMA_GP_CMD_RESET);
+	ret = lima_poll_timeout(ip, lima_gp_hard_reset_poll, 10, 100);
+	if (ret) {
+		dev_err(dev->dev, "gp hard reset timeout\n");
+		return ret;
+	}
+
+	gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0);
+	gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
+	gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
+	return 0;
+}
+
+static void lima_gp_task_fini(struct lima_sched_pipe *pipe)
+{
+	lima_gp_soft_reset_async(pipe->processor[0]);
+}
+
+static void lima_gp_task_error(struct lima_sched_pipe *pipe)
+{
+	struct lima_ip *ip = pipe->processor[0];
+
+	dev_err(ip->dev->dev, "gp task error int_state=%x status=%x\n",
+		gp_read(LIMA_GP_INT_STAT), gp_read(LIMA_GP_STATUS));
+
+	lima_gp_hard_reset(ip);
+}
+
+static void lima_gp_task_mmu_error(struct lima_sched_pipe *pipe)
+{
+	lima_sched_pipe_task_done(pipe);
+}
+
+static void lima_gp_print_version(struct lima_ip *ip)
+{
+	u32 version, major, minor;
+	char *name;
+
+	version = gp_read(LIMA_GP_VERSION);
+	major = (version >> 8) & 0xFF;
+	minor = version & 0xFF;
+	switch (version >> 16) {
+	case 0xA07:
+	    name = "mali200";
+		break;
+	case 0xC07:
+		name = "mali300";
+		break;
+	case 0xB07:
+		name = "mali400";
+		break;
+	case 0xD07:
+		name = "mali450";
+		break;
+	default:
+		name = "unknown";
+		break;
+	}
+	dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
+		 lima_ip_name(ip), name, major, minor);
+}
+
+static struct kmem_cache *lima_gp_task_slab;
+static int lima_gp_task_slab_refcnt;
+
+int lima_gp_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+
+	lima_gp_print_version(ip);
+
+	ip->data.async_reset = false;
+	lima_gp_soft_reset_async(ip);
+	err = lima_gp_soft_reset_async_wait(ip);
+	if (err)
+		return err;
+
+	err = devm_request_irq(dev->dev, ip->irq, lima_gp_irq_handler,
+			       IRQF_SHARED, lima_ip_name(ip), ip);
+	if (err) {
+		dev_err(dev->dev, "gp %s fail to request irq\n",
+			lima_ip_name(ip));
+		return err;
+	}
+
+	dev->gp_version = gp_read(LIMA_GP_VERSION);
+
+	return 0;
+}
+
+void lima_gp_fini(struct lima_ip *ip)
+{
+
+}
+
+int lima_gp_pipe_init(struct lima_device *dev)
+{
+	int frame_size = sizeof(struct drm_lima_gp_frame);
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
+
+	if (!lima_gp_task_slab) {
+		lima_gp_task_slab = kmem_cache_create_usercopy(
+			"lima_gp_task", sizeof(struct lima_sched_task) + frame_size,
+			0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
+			frame_size, NULL);
+		if (!lima_gp_task_slab)
+			return -ENOMEM;
+	}
+	lima_gp_task_slab_refcnt++;
+
+	pipe->frame_size = frame_size;
+	pipe->task_slab = lima_gp_task_slab;
+
+	pipe->task_validate = lima_gp_task_validate;
+	pipe->task_run = lima_gp_task_run;
+	pipe->task_fini = lima_gp_task_fini;
+	pipe->task_error = lima_gp_task_error;
+	pipe->task_mmu_error = lima_gp_task_mmu_error;
+
+	return 0;
+}
+
+void lima_gp_pipe_fini(struct lima_device *dev)
+{
+	if (!--lima_gp_task_slab_refcnt) {
+		kmem_cache_destroy(lima_gp_task_slab);
+		lima_gp_task_slab = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_gp.h b/drivers/gpu/drm/lima/lima_gp.h
new file mode 100644
index 000000000000..516e5c1babbb
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_gp.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_GP_H__
+#define __LIMA_GP_H__
+
+struct lima_ip;
+struct lima_device;
+
+int lima_gp_init(struct lima_ip *ip);
+void lima_gp_fini(struct lima_ip *ip);
+
+int lima_gp_pipe_init(struct lima_device *dev);
+void lima_gp_pipe_fini(struct lima_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_l2_cache.c b/drivers/gpu/drm/lima/lima_l2_cache.c
new file mode 100644
index 000000000000..6873a7af5a5c
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_l2_cache.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/iopoll.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_l2_cache.h"
+#include "lima_regs.h"
+
+#define l2_cache_write(reg, data) writel(data, ip->iomem + reg)
+#define l2_cache_read(reg) readl(ip->iomem + reg)
+
+static int lima_l2_cache_wait_idle(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+	u32 v;
+
+	err = readl_poll_timeout(ip->iomem + LIMA_L2_CACHE_STATUS, v,
+				 !(v & LIMA_L2_CACHE_STATUS_COMMAND_BUSY),
+				 0, 1000);
+	if (err) {
+		dev_err(dev->dev, "l2 cache wait command timeout\n");
+		return err;
+	}
+	return 0;
+}
+
+int lima_l2_cache_flush(struct lima_ip *ip)
+{
+	int ret;
+
+	spin_lock(&ip->data.lock);
+	l2_cache_write(LIMA_L2_CACHE_COMMAND, LIMA_L2_CACHE_COMMAND_CLEAR_ALL);
+	ret = lima_l2_cache_wait_idle(ip);
+	spin_unlock(&ip->data.lock);
+	return ret;
+}
+
+int lima_l2_cache_init(struct lima_ip *ip)
+{
+	int i, err;
+	u32 size;
+	struct lima_device *dev = ip->dev;
+
+	/* l2_cache2 only exists when one of PP4-7 present */
+	if (ip->id == lima_ip_l2_cache2) {
+		for (i = lima_ip_pp4; i <= lima_ip_pp7; i++) {
+			if (dev->ip[i].present)
+				break;
+		}
+		if (i > lima_ip_pp7)
+			return -ENODEV;
+	}
+
+	spin_lock_init(&ip->data.lock);
+
+	size = l2_cache_read(LIMA_L2_CACHE_SIZE);
+	dev_info(dev->dev, "l2 cache %uK, %u-way, %ubyte cache line, %ubit external bus\n",
+		 1 << (((size >> 16) & 0xff) - 10),
+		 1 << ((size >> 8) & 0xff),
+		 1 << (size & 0xff),
+		 1 << ((size >> 24) & 0xff));
+
+	err = lima_l2_cache_flush(ip);
+	if (err)
+		return err;
+
+	l2_cache_write(LIMA_L2_CACHE_ENABLE,
+		       LIMA_L2_CACHE_ENABLE_ACCESS|LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
+	l2_cache_write(LIMA_L2_CACHE_MAX_READS, 0x1c);
+
+	return 0;
+}
+
+void lima_l2_cache_fini(struct lima_ip *ip)
+{
+
+}
diff --git a/drivers/gpu/drm/lima/lima_l2_cache.h b/drivers/gpu/drm/lima/lima_l2_cache.h
new file mode 100644
index 000000000000..c63fb676ff14
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_l2_cache.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_L2_CACHE_H__
+#define __LIMA_L2_CACHE_H__
+
+struct lima_ip;
+
+int lima_l2_cache_init(struct lima_ip *ip);
+void lima_l2_cache_fini(struct lima_ip *ip);
+
+int lima_l2_cache_flush(struct lima_ip *ip);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_mmu.c b/drivers/gpu/drm/lima/lima_mmu.c
new file mode 100644
index 000000000000..8e1651d6a61f
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_mmu.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_mmu.h"
+#include "lima_vm.h"
+#include "lima_object.h"
+#include "lima_regs.h"
+
+#define mmu_write(reg, data) writel(data, ip->iomem + reg)
+#define mmu_read(reg) readl(ip->iomem + reg)
+
+#define lima_mmu_send_command(cmd, addr, val, cond)	     \
+({							     \
+	int __ret;					     \
+							     \
+	mmu_write(LIMA_MMU_COMMAND, cmd);		     \
+	__ret = readl_poll_timeout(ip->iomem + (addr), val,  \
+				  cond, 0, 100);	     \
+	if (__ret)					     \
+		dev_err(dev->dev,			     \
+			"mmu command %x timeout\n", cmd);    \
+	__ret;						     \
+})
+
+static irqreturn_t lima_mmu_irq_handler(int irq, void *data)
+{
+	struct lima_ip *ip = data;
+	struct lima_device *dev = ip->dev;
+	u32 status = mmu_read(LIMA_MMU_INT_STATUS);
+	struct lima_sched_pipe *pipe;
+
+	/* for shared irq case */
+	if (!status)
+		return IRQ_NONE;
+
+	if (status & LIMA_MMU_INT_PAGE_FAULT) {
+		u32 fault = mmu_read(LIMA_MMU_PAGE_FAULT_ADDR);
+
+		dev_err(dev->dev, "mmu page fault at 0x%x from bus id %d of type %s on %s\n",
+			fault, LIMA_MMU_STATUS_BUS_ID(status),
+			status & LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE ? "write" : "read",
+			lima_ip_name(ip));
+	}
+
+	if (status & LIMA_MMU_INT_READ_BUS_ERROR)
+		dev_err(dev->dev, "mmu %s irq bus error\n", lima_ip_name(ip));
+
+	/* mask all interrupts before resume */
+	mmu_write(LIMA_MMU_INT_MASK, 0);
+	mmu_write(LIMA_MMU_INT_CLEAR, status);
+
+	pipe = dev->pipe + (ip->id == lima_ip_gpmmu ? lima_pipe_gp : lima_pipe_pp);
+	lima_sched_pipe_mmu_error(pipe);
+
+	return IRQ_HANDLED;
+}
+
+int lima_mmu_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+	u32 v;
+
+	if (ip->id == lima_ip_ppmmu_bcast)
+		return 0;
+
+	mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
+	if (mmu_read(LIMA_MMU_DTE_ADDR) != 0xCAFEB000) {
+		dev_err(dev->dev, "mmu %s dte write test fail\n", lima_ip_name(ip));
+		return -EIO;
+	}
+
+	mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_HARD_RESET);
+	err = lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
+				    LIMA_MMU_DTE_ADDR, v, v == 0);
+	if (err)
+		return err;
+
+	err = devm_request_irq(dev->dev, ip->irq, lima_mmu_irq_handler,
+			       IRQF_SHARED, lima_ip_name(ip), ip);
+	if (err) {
+		dev_err(dev->dev, "mmu %s fail to request irq\n", lima_ip_name(ip));
+		return err;
+	}
+
+	mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
+	mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
+	return lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
+				     LIMA_MMU_STATUS, v,
+				     v & LIMA_MMU_STATUS_PAGING_ENABLED);
+}
+
+void lima_mmu_fini(struct lima_ip *ip)
+{
+
+}
+
+void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm)
+{
+	struct lima_device *dev = ip->dev;
+	u32 v;
+
+	lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_STALL,
+			      LIMA_MMU_STATUS, v,
+			      v & LIMA_MMU_STATUS_STALL_ACTIVE);
+
+	if (vm)
+		mmu_write(LIMA_MMU_DTE_ADDR, vm->pd.dma);
+
+	/* flush the TLB */
+	mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_ZAP_CACHE);
+
+	lima_mmu_send_command(LIMA_MMU_COMMAND_DISABLE_STALL,
+			      LIMA_MMU_STATUS, v,
+			      !(v & LIMA_MMU_STATUS_STALL_ACTIVE));
+}
+
+void lima_mmu_page_fault_resume(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	u32 status = mmu_read(LIMA_MMU_STATUS);
+	u32 v;
+
+	if (status & LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE) {
+		dev_info(dev->dev, "mmu resume\n");
+
+		mmu_write(LIMA_MMU_INT_MASK, 0);
+		mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
+		lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
+				      LIMA_MMU_DTE_ADDR, v, v == 0);
+		mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
+		mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
+		lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
+				      LIMA_MMU_STATUS, v,
+				      v & LIMA_MMU_STATUS_PAGING_ENABLED);
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_mmu.h b/drivers/gpu/drm/lima/lima_mmu.h
new file mode 100644
index 000000000000..8c78319bcc8e
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_mmu.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_MMU_H__
+#define __LIMA_MMU_H__
+
+struct lima_ip;
+struct lima_vm;
+
+int lima_mmu_init(struct lima_ip *ip);
+void lima_mmu_fini(struct lima_ip *ip);
+
+void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm);
+void lima_mmu_page_fault_resume(struct lima_ip *ip);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_object.c b/drivers/gpu/drm/lima/lima_object.c
new file mode 100644
index 000000000000..5c41f859a72f
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_object.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <drm/drm_prime.h>
+#include <linux/pagemap.h>
+#include <linux/dma-mapping.h>
+
+#include "lima_object.h"
+
+void lima_bo_destroy(struct lima_bo *bo)
+{
+	if (bo->sgt) {
+		kfree(bo->pages);
+		drm_prime_gem_destroy(&bo->gem, bo->sgt);
+	} else {
+		if (bo->pages_dma_addr) {
+			int i, npages = bo->gem.size >> PAGE_SHIFT;
+
+			for (i = 0; i < npages; i++) {
+				if (bo->pages_dma_addr[i])
+					dma_unmap_page(bo->gem.dev->dev,
+						       bo->pages_dma_addr[i],
+						       PAGE_SIZE, DMA_BIDIRECTIONAL);
+			}
+		}
+
+		if (bo->pages)
+			drm_gem_put_pages(&bo->gem, bo->pages, true, true);
+	}
+
+	kfree(bo->pages_dma_addr);
+	drm_gem_object_release(&bo->gem);
+	kfree(bo);
+}
+
+static struct lima_bo *lima_bo_create_struct(struct lima_device *dev, u32 size, u32 flags,
+					     struct reservation_object *resv)
+{
+	struct lima_bo *bo;
+	int err;
+
+	size = PAGE_ALIGN(size);
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&bo->lock);
+	INIT_LIST_HEAD(&bo->va);
+	bo->gem.resv = resv;
+
+	err = drm_gem_object_init(dev->ddev, &bo->gem, size);
+	if (err) {
+		kfree(bo);
+		return ERR_PTR(err);
+	}
+
+	return bo;
+}
+
+struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
+			       u32 flags, struct sg_table *sgt,
+			       struct reservation_object *resv)
+{
+	int i, err;
+	size_t npages;
+	struct lima_bo *bo, *ret;
+
+	bo = lima_bo_create_struct(dev, size, flags, resv);
+	if (IS_ERR(bo))
+		return bo;
+
+	npages = bo->gem.size >> PAGE_SHIFT;
+
+	bo->pages_dma_addr = kcalloc(npages, sizeof(dma_addr_t), GFP_KERNEL);
+	if (!bo->pages_dma_addr) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err_out;
+	}
+
+	if (sgt) {
+		bo->sgt = sgt;
+
+		bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL);
+		if (!bo->pages) {
+			ret = ERR_PTR(-ENOMEM);
+			goto err_out;
+		}
+
+		err = drm_prime_sg_to_page_addr_arrays(
+			sgt, bo->pages, bo->pages_dma_addr, npages);
+		if (err) {
+			ret = ERR_PTR(err);
+			goto err_out;
+		}
+	} else {
+		mapping_set_gfp_mask(bo->gem.filp->f_mapping, GFP_DMA32);
+		bo->pages = drm_gem_get_pages(&bo->gem);
+		if (IS_ERR(bo->pages)) {
+			ret = ERR_CAST(bo->pages);
+			bo->pages = NULL;
+			goto err_out;
+		}
+
+		for (i = 0; i < npages; i++) {
+			dma_addr_t addr = dma_map_page(dev->dev, bo->pages[i], 0,
+						       PAGE_SIZE, DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(dev->dev, addr)) {
+				ret = ERR_PTR(-EFAULT);
+				goto err_out;
+			}
+			bo->pages_dma_addr[i] = addr;
+		}
+
+	}
+
+	return bo;
+
+err_out:
+	lima_bo_destroy(bo);
+	return ret;
+}
diff --git a/drivers/gpu/drm/lima/lima_object.h b/drivers/gpu/drm/lima/lima_object.h
new file mode 100644
index 000000000000..6738724afb7b
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_object.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2018-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_OBJECT_H__
+#define __LIMA_OBJECT_H__
+
+#include <drm/drm_gem.h>
+
+#include "lima_device.h"
+
+struct lima_bo {
+	struct drm_gem_object gem;
+
+	struct page **pages;
+	dma_addr_t *pages_dma_addr;
+	struct sg_table *sgt;
+	void *vaddr;
+
+	struct mutex lock;
+	struct list_head va;
+};
+
+static inline struct lima_bo *
+to_lima_bo(struct drm_gem_object *obj)
+{
+	return container_of(obj, struct lima_bo, gem);
+}
+
+struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
+			       u32 flags, struct sg_table *sgt,
+			       struct reservation_object *resv);
+void lima_bo_destroy(struct lima_bo *bo);
+void *lima_bo_vmap(struct lima_bo *bo);
+void lima_bo_vunmap(struct lima_bo *bo);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_pmu.c b/drivers/gpu/drm/lima/lima_pmu.c
new file mode 100644
index 000000000000..571f6d661581
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pmu.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/iopoll.h>
+#include <linux/device.h>
+
+#include "lima_device.h"
+#include "lima_pmu.h"
+#include "lima_regs.h"
+
+#define pmu_write(reg, data) writel(data, ip->iomem + reg)
+#define pmu_read(reg) readl(ip->iomem + reg)
+
+static int lima_pmu_wait_cmd(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+	u32 v;
+
+	err = readl_poll_timeout(ip->iomem + LIMA_PMU_INT_RAWSTAT,
+				 v, v & LIMA_PMU_INT_CMD_MASK,
+				 100, 100000);
+	if (err) {
+		dev_err(dev->dev, "timeout wait pmd cmd\n");
+		return err;
+	}
+
+	pmu_write(LIMA_PMU_INT_CLEAR, LIMA_PMU_INT_CMD_MASK);
+	return 0;
+}
+
+int lima_pmu_init(struct lima_ip *ip)
+{
+	int err;
+	u32 stat;
+
+	pmu_write(LIMA_PMU_INT_MASK, 0);
+
+	/* If this value is too low, when in high GPU clk freq,
+	 * GPU will be in unstable state.
+	 */
+	pmu_write(LIMA_PMU_SW_DELAY, 0xffff);
+
+	/* status reg 1=off 0=on */
+	stat = pmu_read(LIMA_PMU_STATUS);
+
+	/* power up all ip */
+	if (stat) {
+		pmu_write(LIMA_PMU_POWER_UP, stat);
+		err = lima_pmu_wait_cmd(ip);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+void lima_pmu_fini(struct lima_ip *ip)
+{
+
+}
diff --git a/drivers/gpu/drm/lima/lima_pmu.h b/drivers/gpu/drm/lima/lima_pmu.h
new file mode 100644
index 000000000000..a2a18775eb07
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pmu.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_PMU_H__
+#define __LIMA_PMU_H__
+
+struct lima_ip;
+
+int lima_pmu_init(struct lima_ip *ip);
+void lima_pmu_fini(struct lima_ip *ip);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
new file mode 100644
index 000000000000..d29721e177bf
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <drm/lima_drm.h>
+
+#include "lima_device.h"
+#include "lima_pp.h"
+#include "lima_dlbu.h"
+#include "lima_bcast.h"
+#include "lima_vm.h"
+#include "lima_regs.h"
+
+#define pp_write(reg, data) writel(data, ip->iomem + reg)
+#define pp_read(reg) readl(ip->iomem + reg)
+
+static void lima_pp_handle_irq(struct lima_ip *ip, u32 state)
+{
+	struct lima_device *dev = ip->dev;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+
+	if (state & LIMA_PP_IRQ_MASK_ERROR) {
+		u32 status = pp_read(LIMA_PP_STATUS);
+
+		dev_err(dev->dev, "pp error irq state=%x status=%x\n",
+			state, status);
+
+		pipe->error = true;
+
+		/* mask all interrupts before hard reset */
+		pp_write(LIMA_PP_INT_MASK, 0);
+	}
+
+	pp_write(LIMA_PP_INT_CLEAR, state);
+}
+
+static irqreturn_t lima_pp_irq_handler(int irq, void *data)
+{
+	struct lima_ip *ip = data;
+	struct lima_device *dev = ip->dev;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	u32 state = pp_read(LIMA_PP_INT_STATUS);
+
+	/* for shared irq case */
+	if (!state)
+		return IRQ_NONE;
+
+	lima_pp_handle_irq(ip, state);
+
+	if (atomic_dec_and_test(&pipe->task))
+		lima_sched_pipe_task_done(pipe);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
+{
+	int i;
+	irqreturn_t ret = IRQ_NONE;
+	struct lima_ip *pp_bcast = data;
+	struct lima_device *dev = pp_bcast->dev;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+	struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
+
+	for (i = 0; i < frame->num_pp; i++) {
+		struct lima_ip *ip = pipe->processor[i];
+		u32 status, state;
+
+		if (pipe->done & (1 << i))
+			continue;
+
+		/* status read first in case int state change in the middle
+		 * which may miss the interrupt handling
+		 */
+		status = pp_read(LIMA_PP_STATUS);
+		state = pp_read(LIMA_PP_INT_STATUS);
+
+		if (state) {
+			lima_pp_handle_irq(ip, state);
+			ret = IRQ_HANDLED;
+		} else {
+			if (status & LIMA_PP_STATUS_RENDERING_ACTIVE)
+				continue;
+		}
+
+		pipe->done |= (1 << i);
+		if (atomic_dec_and_test(&pipe->task))
+			lima_sched_pipe_task_done(pipe);
+	}
+
+	return ret;
+}
+
+static void lima_pp_soft_reset_async(struct lima_ip *ip)
+{
+	if (ip->data.async_reset)
+		return;
+
+	pp_write(LIMA_PP_INT_MASK, 0);
+	pp_write(LIMA_PP_INT_RAWSTAT, LIMA_PP_IRQ_MASK_ALL);
+	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_SOFT_RESET);
+	ip->data.async_reset = true;
+}
+
+static int lima_pp_soft_reset_poll(struct lima_ip *ip)
+{
+	return !(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_RENDERING_ACTIVE) &&
+		pp_read(LIMA_PP_INT_RAWSTAT) == LIMA_PP_IRQ_RESET_COMPLETED;
+}
+
+static int lima_pp_soft_reset_async_wait_one(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int ret;
+
+	ret = lima_poll_timeout(ip, lima_pp_soft_reset_poll, 0, 100);
+	if (ret) {
+		dev_err(dev->dev, "pp %s reset time out\n", lima_ip_name(ip));
+		return ret;
+	}
+
+	pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
+	pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
+	return 0;
+}
+
+static int lima_pp_soft_reset_async_wait(struct lima_ip *ip)
+{
+	int i, err = 0;
+
+	if (!ip->data.async_reset)
+		return 0;
+
+	if (ip->id == lima_ip_pp_bcast) {
+		struct lima_device *dev = ip->dev;
+		struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+		struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
+
+		for (i = 0; i < frame->num_pp; i++)
+			err |= lima_pp_soft_reset_async_wait_one(pipe->processor[i]);
+	} else
+		err = lima_pp_soft_reset_async_wait_one(ip);
+
+	ip->data.async_reset = false;
+	return err;
+}
+
+static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
+{
+	int i, j, n = 0;
+
+	for (i = 0; i < LIMA_PP_FRAME_REG_NUM; i++)
+		writel(frame[i], ip->iomem + LIMA_PP_FRAME + i * 4);
+
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < LIMA_PP_WB_REG_NUM; j++)
+			writel(wb[n++], ip->iomem + LIMA_PP_WB(i) + j * 4);
+	}
+}
+
+static int lima_pp_hard_reset_poll(struct lima_ip *ip)
+{
+	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
+	return pp_read(LIMA_PP_PERF_CNT_0_LIMIT) == 0xC01A0000;
+}
+
+static int lima_pp_hard_reset(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int ret;
+
+	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
+	pp_write(LIMA_PP_INT_MASK, 0);
+	pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
+	ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
+	if (ret) {
+		dev_err(dev->dev, "pp hard reset timeout\n");
+		return ret;
+	}
+
+	pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0);
+	pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
+	pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
+	return 0;
+}
+
+static void lima_pp_print_version(struct lima_ip *ip)
+{
+	u32 version, major, minor;
+	char *name;
+
+	version = pp_read(LIMA_PP_VERSION);
+	major = (version >> 8) & 0xFF;
+	minor = version & 0xFF;
+	switch (version >> 16) {
+	case 0xC807:
+	    name = "mali200";
+		break;
+	case 0xCE07:
+		name = "mali300";
+		break;
+	case 0xCD07:
+		name = "mali400";
+		break;
+	case 0xCF07:
+		name = "mali450";
+		break;
+	default:
+		name = "unknown";
+		break;
+	}
+	dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
+		 lima_ip_name(ip), name, major, minor);
+}
+
+int lima_pp_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+
+	lima_pp_print_version(ip);
+
+	ip->data.async_reset = false;
+	lima_pp_soft_reset_async(ip);
+	err = lima_pp_soft_reset_async_wait(ip);
+	if (err)
+		return err;
+
+	err = devm_request_irq(dev->dev, ip->irq, lima_pp_irq_handler,
+			       IRQF_SHARED, lima_ip_name(ip), ip);
+	if (err) {
+		dev_err(dev->dev, "pp %s fail to request irq\n",
+			lima_ip_name(ip));
+		return err;
+	}
+
+	dev->pp_version = pp_read(LIMA_PP_VERSION);
+
+	return 0;
+}
+
+void lima_pp_fini(struct lima_ip *ip)
+{
+
+}
+
+int lima_pp_bcast_init(struct lima_ip *ip)
+{
+	struct lima_device *dev = ip->dev;
+	int err;
+
+	err = devm_request_irq(dev->dev, ip->irq, lima_pp_bcast_irq_handler,
+			       IRQF_SHARED, lima_ip_name(ip), ip);
+	if (err) {
+		dev_err(dev->dev, "pp %s fail to request irq\n",
+			lima_ip_name(ip));
+		return err;
+	}
+
+	return 0;
+}
+
+void lima_pp_bcast_fini(struct lima_ip *ip)
+{
+
+}
+
+static int lima_pp_task_validate(struct lima_sched_pipe *pipe,
+				 struct lima_sched_task *task)
+{
+	u32 num_pp;
+
+	if (pipe->bcast_processor) {
+		struct drm_lima_m450_pp_frame *f = task->frame;
+
+		num_pp = f->num_pp;
+
+		if (f->_pad)
+			return -EINVAL;
+	} else {
+		struct drm_lima_m400_pp_frame *f = task->frame;
+
+		num_pp = f->num_pp;
+	}
+
+	if (num_pp == 0 || num_pp > pipe->num_processor)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void lima_pp_task_run(struct lima_sched_pipe *pipe,
+			     struct lima_sched_task *task)
+{
+	if (pipe->bcast_processor) {
+		struct drm_lima_m450_pp_frame *frame = task->frame;
+		struct lima_device *dev = pipe->bcast_processor->dev;
+		struct lima_ip *ip = pipe->bcast_processor;
+		int i;
+
+		pipe->done = 0;
+		atomic_set(&pipe->task, frame->num_pp);
+
+		if (frame->use_dlbu) {
+			lima_dlbu_enable(dev, frame->num_pp);
+
+			frame->frame[LIMA_PP_FRAME >> 2] = LIMA_VA_RESERVE_DLBU;
+			lima_dlbu_set_reg(dev->ip + lima_ip_dlbu, frame->dlbu_regs);
+		} else
+			lima_dlbu_disable(dev);
+
+		lima_bcast_enable(dev, frame->num_pp);
+
+		lima_pp_soft_reset_async_wait(ip);
+
+		lima_pp_write_frame(ip, frame->frame, frame->wb);
+
+		for (i = 0; i < frame->num_pp; i++) {
+			struct lima_ip *ip = pipe->processor[i];
+
+			pp_write(LIMA_PP_STACK, frame->fragment_stack_address[i]);
+			if (!frame->use_dlbu)
+				pp_write(LIMA_PP_FRAME, frame->plbu_array_address[i]);
+		}
+
+		pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
+	} else {
+		struct drm_lima_m400_pp_frame *frame = task->frame;
+		int i;
+
+		atomic_set(&pipe->task, frame->num_pp);
+
+		for (i = 0; i < frame->num_pp; i++) {
+			struct lima_ip *ip = pipe->processor[i];
+
+			frame->frame[LIMA_PP_FRAME >> 2] =
+				frame->plbu_array_address[i];
+			frame->frame[LIMA_PP_STACK >> 2] =
+				frame->fragment_stack_address[i];
+
+			lima_pp_soft_reset_async_wait(ip);
+
+			lima_pp_write_frame(ip, frame->frame, frame->wb);
+
+			pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
+		}
+	}
+}
+
+static void lima_pp_task_fini(struct lima_sched_pipe *pipe)
+{
+	if (pipe->bcast_processor)
+		lima_pp_soft_reset_async(pipe->bcast_processor);
+	else {
+		int i;
+
+		for (i = 0; i < pipe->num_processor; i++)
+			lima_pp_soft_reset_async(pipe->processor[i]);
+	}
+}
+
+static void lima_pp_task_error(struct lima_sched_pipe *pipe)
+{
+	int i;
+
+	for (i = 0; i < pipe->num_processor; i++) {
+		struct lima_ip *ip = pipe->processor[i];
+
+		dev_err(ip->dev->dev, "pp task error %d int_state=%x status=%x\n",
+			i, pp_read(LIMA_PP_INT_STATUS), pp_read(LIMA_PP_STATUS));
+
+		lima_pp_hard_reset(ip);
+	}
+}
+
+static void lima_pp_task_mmu_error(struct lima_sched_pipe *pipe)
+{
+	if (atomic_dec_and_test(&pipe->task))
+		lima_sched_pipe_task_done(pipe);
+}
+
+static struct kmem_cache *lima_pp_task_slab;
+static int lima_pp_task_slab_refcnt;
+
+int lima_pp_pipe_init(struct lima_device *dev)
+{
+	int frame_size;
+	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
+
+	if (dev->id == lima_gpu_mali400)
+		frame_size = sizeof(struct drm_lima_m400_pp_frame);
+	else
+		frame_size = sizeof(struct drm_lima_m450_pp_frame);
+
+	if (!lima_pp_task_slab) {
+		lima_pp_task_slab = kmem_cache_create_usercopy(
+			"lima_pp_task", sizeof(struct lima_sched_task) + frame_size,
+			0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
+			frame_size, NULL);
+		if (!lima_pp_task_slab)
+			return -ENOMEM;
+	}
+	lima_pp_task_slab_refcnt++;
+
+	pipe->frame_size = frame_size;
+	pipe->task_slab = lima_pp_task_slab;
+
+	pipe->task_validate = lima_pp_task_validate;
+	pipe->task_run = lima_pp_task_run;
+	pipe->task_fini = lima_pp_task_fini;
+	pipe->task_error = lima_pp_task_error;
+	pipe->task_mmu_error = lima_pp_task_mmu_error;
+
+	return 0;
+}
+
+void lima_pp_pipe_fini(struct lima_device *dev)
+{
+	if (!--lima_pp_task_slab_refcnt) {
+		kmem_cache_destroy(lima_pp_task_slab);
+		lima_pp_task_slab = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_pp.h b/drivers/gpu/drm/lima/lima_pp.h
new file mode 100644
index 000000000000..bf60c77b2633
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_pp.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_PP_H__
+#define __LIMA_PP_H__
+
+struct lima_ip;
+struct lima_device;
+
+int lima_pp_init(struct lima_ip *ip);
+void lima_pp_fini(struct lima_ip *ip);
+
+int lima_pp_bcast_init(struct lima_ip *ip);
+void lima_pp_bcast_fini(struct lima_ip *ip);
+
+int lima_pp_pipe_init(struct lima_device *dev);
+void lima_pp_pipe_fini(struct lima_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_regs.h b/drivers/gpu/drm/lima/lima_regs.h
new file mode 100644
index 000000000000..ace8ecefbe90
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_regs.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2010-2017 ARM Limited. All rights reserved.
+ * Copyright 2017-2019 Qiang Yu <yuq825@gmail.com>
+ */
+
+#ifndef __LIMA_REGS_H__
+#define __LIMA_REGS_H__
+
+/* This file's register definition is collected from the
+ * official ARM Mali Utgard GPU kernel driver source code
+ */
+
+/* PMU regs */
+#define LIMA_PMU_POWER_UP                  0x00
+#define LIMA_PMU_POWER_DOWN                0x04
+#define   LIMA_PMU_POWER_GP0_MASK          BIT(0)
+#define   LIMA_PMU_POWER_L2_MASK           BIT(1)
+#define   LIMA_PMU_POWER_PP_MASK(i)        BIT(2 + i)
+
+/*
+ * On Mali450 each block automatically starts up its corresponding L2
+ * and the PPs are not fully independent controllable.
+ * Instead PP0, PP1-3 and PP4-7 can be turned on or off.
+ */
+#define   LIMA450_PMU_POWER_PP0_MASK       BIT(1)
+#define   LIMA450_PMU_POWER_PP13_MASK      BIT(2)
+#define   LIMA450_PMU_POWER_PP47_MASK      BIT(3)
+
+#define LIMA_PMU_STATUS                    0x08
+#define LIMA_PMU_INT_MASK                  0x0C
+#define LIMA_PMU_INT_RAWSTAT               0x10
+#define LIMA_PMU_INT_CLEAR                 0x18
+#define   LIMA_PMU_INT_CMD_MASK            BIT(0)
+#define LIMA_PMU_SW_DELAY                  0x1C
+
+/* L2 cache regs */
+#define LIMA_L2_CACHE_SIZE                   0x0004
+#define LIMA_L2_CACHE_STATUS                 0x0008
+#define   LIMA_L2_CACHE_STATUS_COMMAND_BUSY  BIT(0)
+#define   LIMA_L2_CACHE_STATUS_DATA_BUSY     BIT(1)
+#define LIMA_L2_CACHE_COMMAND                0x0010
+#define   LIMA_L2_CACHE_COMMAND_CLEAR_ALL    BIT(0)
+#define LIMA_L2_CACHE_CLEAR_PAGE             0x0014
+#define LIMA_L2_CACHE_MAX_READS              0x0018
+#define LIMA_L2_CACHE_ENABLE                 0x001C
+#define   LIMA_L2_CACHE_ENABLE_ACCESS        BIT(0)
+#define   LIMA_L2_CACHE_ENABLE_READ_ALLOCATE BIT(1)
+#define LIMA_L2_CACHE_PERFCNT_SRC0           0x0020
+#define LIMA_L2_CACHE_PERFCNT_VAL0           0x0024
+#define LIMA_L2_CACHE_PERFCNT_SRC1           0x0028
+#define LIMA_L2_CACHE_ERFCNT_VAL1            0x002C
+
+/* GP regs */
+#define LIMA_GP_VSCL_START_ADDR                0x00
+#define LIMA_GP_VSCL_END_ADDR                  0x04
+#define LIMA_GP_PLBUCL_START_ADDR              0x08
+#define LIMA_GP_PLBUCL_END_ADDR                0x0c
+#define LIMA_GP_PLBU_ALLOC_START_ADDR          0x10
+#define LIMA_GP_PLBU_ALLOC_END_ADDR            0x14
+#define LIMA_GP_CMD                            0x20
+#define   LIMA_GP_CMD_START_VS                 BIT(0)
+#define   LIMA_GP_CMD_START_PLBU               BIT(1)
+#define   LIMA_GP_CMD_UPDATE_PLBU_ALLOC        BIT(4)
+#define   LIMA_GP_CMD_RESET                    BIT(5)
+#define   LIMA_GP_CMD_FORCE_HANG               BIT(6)
+#define   LIMA_GP_CMD_STOP_BUS                 BIT(9)
+#define   LIMA_GP_CMD_SOFT_RESET               BIT(10)
+#define LIMA_GP_INT_RAWSTAT                    0x24
+#define LIMA_GP_INT_CLEAR                      0x28
+#define LIMA_GP_INT_MASK                       0x2C
+#define LIMA_GP_INT_STAT                       0x30
+#define   LIMA_GP_IRQ_VS_END_CMD_LST           BIT(0)
+#define   LIMA_GP_IRQ_PLBU_END_CMD_LST         BIT(1)
+#define   LIMA_GP_IRQ_PLBU_OUT_OF_MEM          BIT(2)
+#define   LIMA_GP_IRQ_VS_SEM_IRQ               BIT(3)
+#define   LIMA_GP_IRQ_PLBU_SEM_IRQ             BIT(4)
+#define   LIMA_GP_IRQ_HANG                     BIT(5)
+#define   LIMA_GP_IRQ_FORCE_HANG               BIT(6)
+#define   LIMA_GP_IRQ_PERF_CNT_0_LIMIT         BIT(7)
+#define   LIMA_GP_IRQ_PERF_CNT_1_LIMIT         BIT(8)
+#define   LIMA_GP_IRQ_WRITE_BOUND_ERR          BIT(9)
+#define   LIMA_GP_IRQ_SYNC_ERROR               BIT(10)
+#define   LIMA_GP_IRQ_AXI_BUS_ERROR            BIT(11)
+#define   LIMA_GP_IRQ_AXI_BUS_STOPPED          BIT(12)
+#define   LIMA_GP_IRQ_VS_INVALID_CMD           BIT(13)
+#define   LIMA_GP_IRQ_PLB_INVALID_CMD          BIT(14)
+#define   LIMA_GP_IRQ_RESET_COMPLETED          BIT(19)
+#define   LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW      BIT(20)
+#define   LIMA_GP_IRQ_SEMAPHORE_OVERFLOW       BIT(21)
+#define   LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  BIT(22)
+#define LIMA_GP_WRITE_BOUND_LOW                0x34
+#define LIMA_GP_PERF_CNT_0_ENABLE              0x3C
+#define LIMA_GP_PERF_CNT_1_ENABLE              0x40
+#define LIMA_GP_PERF_CNT_0_SRC                 0x44
+#define LIMA_GP_PERF_CNT_1_SRC                 0x48
+#define LIMA_GP_PERF_CNT_0_VALUE               0x4C
+#define LIMA_GP_PERF_CNT_1_VALUE               0x50
+#define LIMA_GP_PERF_CNT_0_LIMIT               0x54
+#define LIMA_GP_STATUS                         0x68
+#define   LIMA_GP_STATUS_VS_ACTIVE             BIT(1)
+#define   LIMA_GP_STATUS_BUS_STOPPED           BIT(2)
+#define   LIMA_GP_STATUS_PLBU_ACTIVE           BIT(3)
+#define   LIMA_GP_STATUS_BUS_ERROR             BIT(6)
+#define   LIMA_GP_STATUS_WRITE_BOUND_ERR       BIT(8)
+#define LIMA_GP_VERSION                        0x6C
+#define LIMA_GP_VSCL_START_ADDR_READ           0x80
+#define LIMA_GP_PLBCL_START_ADDR_READ          0x84
+#define LIMA_GP_CONTR_AXI_BUS_ERROR_STAT       0x94
+
+#define LIMA_GP_IRQ_MASK_ALL		   \
+	(				   \
+	 LIMA_GP_IRQ_VS_END_CMD_LST      | \
+	 LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
+	 LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
+	 LIMA_GP_IRQ_VS_SEM_IRQ          | \
+	 LIMA_GP_IRQ_PLBU_SEM_IRQ        | \
+	 LIMA_GP_IRQ_HANG                | \
+	 LIMA_GP_IRQ_FORCE_HANG          | \
+	 LIMA_GP_IRQ_PERF_CNT_0_LIMIT    | \
+	 LIMA_GP_IRQ_PERF_CNT_1_LIMIT    | \
+	 LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
+	 LIMA_GP_IRQ_SYNC_ERROR          | \
+	 LIMA_GP_IRQ_AXI_BUS_ERROR       | \
+	 LIMA_GP_IRQ_AXI_BUS_STOPPED     | \
+	 LIMA_GP_IRQ_VS_INVALID_CMD      | \
+	 LIMA_GP_IRQ_PLB_INVALID_CMD     | \
+	 LIMA_GP_IRQ_RESET_COMPLETED     | \
+	 LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
+	 LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
+	 LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+#define LIMA_GP_IRQ_MASK_ERROR             \
+	(                                  \
+	 LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
+	 LIMA_GP_IRQ_FORCE_HANG          | \
+	 LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
+	 LIMA_GP_IRQ_SYNC_ERROR          | \
+	 LIMA_GP_IRQ_AXI_BUS_ERROR       | \
+	 LIMA_GP_IRQ_VS_INVALID_CMD      | \
+	 LIMA_GP_IRQ_PLB_INVALID_CMD     | \
+	 LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
+	 LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
+	 LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
+
+#define LIMA_GP_IRQ_MASK_USED		   \
+	(				   \
+	 LIMA_GP_IRQ_VS_END_CMD_LST      | \
+	 LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
+	 LIMA_GP_IRQ_MASK_ERROR)
+
+/* PP regs */
+#define LIMA_PP_FRAME                        0x0000
+#define LIMA_PP_RSW			     0x0004
+#define LIMA_PP_STACK			     0x0030
+#define LIMA_PP_STACK_SIZE		     0x0034
+#define LIMA_PP_ORIGIN_OFFSET_X	             0x0040
+#define LIMA_PP_WB(i)                        (0x0100 * (i + 1))
+#define   LIMA_PP_WB_SOURCE_SELECT           0x0000
+#define	  LIMA_PP_WB_SOURCE_ADDR             0x0004
+
+#define LIMA_PP_VERSION                      0x1000
+#define LIMA_PP_CURRENT_REND_LIST_ADDR       0x1004
+#define LIMA_PP_STATUS                       0x1008
+#define   LIMA_PP_STATUS_RENDERING_ACTIVE    BIT(0)
+#define   LIMA_PP_STATUS_BUS_STOPPED         BIT(4)
+#define LIMA_PP_CTRL                         0x100c
+#define   LIMA_PP_CTRL_STOP_BUS              BIT(0)
+#define   LIMA_PP_CTRL_FLUSH_CACHES          BIT(3)
+#define   LIMA_PP_CTRL_FORCE_RESET           BIT(5)
+#define   LIMA_PP_CTRL_START_RENDERING       BIT(6)
+#define   LIMA_PP_CTRL_SOFT_RESET            BIT(7)
+#define LIMA_PP_INT_RAWSTAT                  0x1020
+#define LIMA_PP_INT_CLEAR                    0x1024
+#define LIMA_PP_INT_MASK                     0x1028
+#define LIMA_PP_INT_STATUS                   0x102c
+#define   LIMA_PP_IRQ_END_OF_FRAME           BIT(0)
+#define   LIMA_PP_IRQ_END_OF_TILE            BIT(1)
+#define   LIMA_PP_IRQ_HANG                   BIT(2)
+#define   LIMA_PP_IRQ_FORCE_HANG             BIT(3)
+#define   LIMA_PP_IRQ_BUS_ERROR              BIT(4)
+#define   LIMA_PP_IRQ_BUS_STOP               BIT(5)
+#define   LIMA_PP_IRQ_CNT_0_LIMIT            BIT(6)
+#define   LIMA_PP_IRQ_CNT_1_LIMIT            BIT(7)
+#define   LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR   BIT(8)
+#define   LIMA_PP_IRQ_INVALID_PLIST_COMMAND  BIT(9)
+#define   LIMA_PP_IRQ_CALL_STACK_UNDERFLOW   BIT(10)
+#define   LIMA_PP_IRQ_CALL_STACK_OVERFLOW    BIT(11)
+#define   LIMA_PP_IRQ_RESET_COMPLETED        BIT(12)
+#define LIMA_PP_WRITE_BOUNDARY_LOW           0x1044
+#define LIMA_PP_BUS_ERROR_STATUS             0x1050
+#define LIMA_PP_PERF_CNT_0_ENABLE            0x1080
+#define LIMA_PP_PERF_CNT_0_SRC               0x1084
+#define LIMA_PP_PERF_CNT_0_LIMIT             0x1088
+#define LIMA_PP_PERF_CNT_0_VALUE             0x108c
+#define LIMA_PP_PERF_CNT_1_ENABLE            0x10a0
+#define LIMA_PP_PERF_CNT_1_SRC               0x10a4
+#define LIMA_PP_PERF_CNT_1_LIMIT             0x10a8
+#define LIMA_PP_PERF_CNT_1_VALUE             0x10ac
+#define LIMA_PP_PERFMON_CONTR                0x10b0
+#define LIMA_PP_PERFMON_BASE                 0x10b4
+
+#define LIMA_PP_IRQ_MASK_ALL                 \
+	(                                    \
+	 LIMA_PP_IRQ_END_OF_FRAME          | \
+	 LIMA_PP_IRQ_END_OF_TILE           | \
+	 LIMA_PP_IRQ_HANG                  | \
+	 LIMA_PP_IRQ_FORCE_HANG            | \
+	 LIMA_PP_IRQ_BUS_ERROR             | \
+	 LIMA_PP_IRQ_BUS_STOP              | \
+	 LIMA_PP_IRQ_CNT_0_LIMIT           | \
+	 LIMA_PP_IRQ_CNT_1_LIMIT           | \
+	 LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
+	 LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
+	 LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
+	 LIMA_PP_IRQ_CALL_STACK_OVERFLOW   | \
+	 LIMA_PP_IRQ_RESET_COMPLETED)
+
+#define LIMA_PP_IRQ_MASK_ERROR               \
+	(                                    \
+	 LIMA_PP_IRQ_FORCE_HANG            | \
+	 LIMA_PP_IRQ_BUS_ERROR             | \
+	 LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
+	 LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
+	 LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
+	 LIMA_PP_IRQ_CALL_STACK_OVERFLOW)
+
+#define LIMA_PP_IRQ_MASK_USED                \
+	(                                    \
+	 LIMA_PP_IRQ_END_OF_FRAME          | \
+	 LIMA_PP_IRQ_MASK_ERROR)
+
+/* MMU regs */
+#define LIMA_MMU_DTE_ADDR                     0x0000
+#define LIMA_MMU_STATUS                       0x0004
+#define   LIMA_MMU_STATUS_PAGING_ENABLED      BIT(0)
+#define   LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE   BIT(1)
+#define   LIMA_MMU_STATUS_STALL_ACTIVE        BIT(2)
+#define   LIMA_MMU_STATUS_IDLE                BIT(3)
+#define   LIMA_MMU_STATUS_REPLAY_BUFFER_EMPTY BIT(4)
+#define   LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE BIT(5)
+#define   LIMA_MMU_STATUS_BUS_ID(x)           ((x >> 6) & 0x1F)
+#define LIMA_MMU_COMMAND                      0x0008
+#define   LIMA_MMU_COMMAND_ENABLE_PAGING      0x00
+#define   LIMA_MMU_COMMAND_DISABLE_PAGING     0x01
+#define   LIMA_MMU_COMMAND_ENABLE_STALL       0x02
+#define   LIMA_MMU_COMMAND_DISABLE_STALL      0x03
+#define   LIMA_MMU_COMMAND_ZAP_CACHE          0x04
+#define   LIMA_MMU_COMMAND_PAGE_FAULT_DONE    0x05
+#define   LIMA_MMU_COMMAND_HARD_RESET         0x06
+#define LIMA_MMU_PAGE_FAULT_ADDR              0x000C
+#define LIMA_MMU_ZAP_ONE_LINE                 0x0010
+#define LIMA_MMU_INT_RAWSTAT                  0x0014
+#define LIMA_MMU_INT_CLEAR                    0x0018
+#define LIMA_MMU_INT_MASK                     0x001C
+#define   LIMA_MMU_INT_PAGE_FAULT             BIT(0)
+#define   LIMA_MMU_INT_READ_BUS_ERROR         BIT(1)
+#define LIMA_MMU_INT_STATUS                   0x0020
+
+#define LIMA_VM_FLAG_PRESENT          BIT(0)
+#define LIMA_VM_FLAG_READ_PERMISSION  BIT(1)
+#define LIMA_VM_FLAG_WRITE_PERMISSION BIT(2)
+#define LIMA_VM_FLAG_OVERRIDE_CACHE   BIT(3)
+#define LIMA_VM_FLAG_WRITE_CACHEABLE  BIT(4)
+#define LIMA_VM_FLAG_WRITE_ALLOCATE   BIT(5)
+#define LIMA_VM_FLAG_WRITE_BUFFERABLE BIT(6)
+#define LIMA_VM_FLAG_READ_CACHEABLE   BIT(7)
+#define LIMA_VM_FLAG_READ_ALLOCATE    BIT(8)
+#define LIMA_VM_FLAG_MASK             0x1FF
+
+#define LIMA_VM_FLAGS_CACHE (			 \
+		LIMA_VM_FLAG_PRESENT |		 \
+		LIMA_VM_FLAG_READ_PERMISSION |	 \
+		LIMA_VM_FLAG_WRITE_PERMISSION |	 \
+		LIMA_VM_FLAG_OVERRIDE_CACHE |	 \
+		LIMA_VM_FLAG_WRITE_CACHEABLE |	 \
+		LIMA_VM_FLAG_WRITE_BUFFERABLE |	 \
+		LIMA_VM_FLAG_READ_CACHEABLE |	 \
+		LIMA_VM_FLAG_READ_ALLOCATE)
+
+#define LIMA_VM_FLAGS_UNCACHE (			\
+		LIMA_VM_FLAG_PRESENT |		\
+		LIMA_VM_FLAG_READ_PERMISSION |	\
+		LIMA_VM_FLAG_WRITE_PERMISSION)
+
+/* DLBU regs */
+#define LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR  0x0000
+#define	LIMA_DLBU_MASTER_TLLIST_VADDR      0x0004
+#define	LIMA_DLBU_TLLIST_VBASEADDR         0x0008
+#define	LIMA_DLBU_FB_DIM                   0x000C
+#define	LIMA_DLBU_TLLIST_CONF              0x0010
+#define	LIMA_DLBU_START_TILE_POS           0x0014
+#define	LIMA_DLBU_PP_ENABLE_MASK           0x0018
+
+/* BCAST regs */
+#define LIMA_BCAST_BROADCAST_MASK    0x0
+#define LIMA_BCAST_INTERRUPT_MASK    0x4
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
new file mode 100644
index 000000000000..97bd9c1deb87
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include "lima_drv.h"
+#include "lima_sched.h"
+#include "lima_vm.h"
+#include "lima_mmu.h"
+#include "lima_l2_cache.h"
+#include "lima_object.h"
+
+struct lima_fence {
+	struct dma_fence base;
+	struct lima_sched_pipe *pipe;
+};
+
+static struct kmem_cache *lima_fence_slab;
+static int lima_fence_slab_refcnt;
+
+int lima_sched_slab_init(void)
+{
+	if (!lima_fence_slab) {
+		lima_fence_slab = kmem_cache_create(
+			"lima_fence", sizeof(struct lima_fence), 0,
+			SLAB_HWCACHE_ALIGN, NULL);
+		if (!lima_fence_slab)
+			return -ENOMEM;
+	}
+
+	lima_fence_slab_refcnt++;
+	return 0;
+}
+
+void lima_sched_slab_fini(void)
+{
+	if (!--lima_fence_slab_refcnt) {
+		kmem_cache_destroy(lima_fence_slab);
+		lima_fence_slab = NULL;
+	}
+}
+
+static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct lima_fence, base);
+}
+
+static const char *lima_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "lima";
+}
+
+static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct lima_fence *f = to_lima_fence(fence);
+
+	return f->pipe->base.name;
+}
+
+static void lima_fence_release_rcu(struct rcu_head *rcu)
+{
+	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+	struct lima_fence *fence = to_lima_fence(f);
+
+	kmem_cache_free(lima_fence_slab, fence);
+}
+
+static void lima_fence_release(struct dma_fence *fence)
+{
+	struct lima_fence *f = to_lima_fence(fence);
+
+	call_rcu(&f->base.rcu, lima_fence_release_rcu);
+}
+
+static const struct dma_fence_ops lima_fence_ops = {
+	.get_driver_name = lima_fence_get_driver_name,
+	.get_timeline_name = lima_fence_get_timeline_name,
+	.release = lima_fence_release,
+};
+
+static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
+{
+	struct lima_fence *fence;
+
+	fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence->pipe = pipe;
+	dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
+		       pipe->fence_context, ++pipe->fence_seqno);
+
+	return fence;
+}
+
+static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
+{
+	return container_of(job, struct lima_sched_task, base);
+}
+
+static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
+{
+	return container_of(sched, struct lima_sched_pipe, base);
+}
+
+int lima_sched_task_init(struct lima_sched_task *task,
+			 struct lima_sched_context *context,
+			 struct lima_bo **bos, int num_bos,
+			 struct lima_vm *vm)
+{
+	int err, i;
+
+	task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL);
+	if (!task->bos)
+		return -ENOMEM;
+
+	for (i = 0; i < num_bos; i++)
+		drm_gem_object_get(&bos[i]->gem);
+
+	err = drm_sched_job_init(&task->base, &context->base, vm);
+	if (err) {
+		kfree(task->bos);
+		return err;
+	}
+
+	task->num_bos = num_bos;
+	task->vm = lima_vm_get(vm);
+	return 0;
+}
+
+void lima_sched_task_fini(struct lima_sched_task *task)
+{
+	int i;
+
+	drm_sched_job_cleanup(&task->base);
+
+	for (i = 0; i < task->num_dep; i++)
+		dma_fence_put(task->dep[i]);
+
+	kfree(task->dep);
+
+	if (task->bos) {
+		for (i = 0; i < task->num_bos; i++)
+			drm_gem_object_put_unlocked(&task->bos[i]->gem);
+		kfree(task->bos);
+	}
+
+	lima_vm_put(task->vm);
+}
+
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
+{
+	int i, new_dep = 4;
+
+	/* same context's fence is definitly earlier then this task */
+	if (fence->context == task->base.s_fence->finished.context) {
+		dma_fence_put(fence);
+		return 0;
+	}
+
+	if (task->dep && task->num_dep == task->max_dep)
+		new_dep = task->max_dep * 2;
+
+	if (task->max_dep < new_dep) {
+		void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
+
+		if (!dep)
+			return -ENOMEM;
+
+		task->max_dep = new_dep;
+		task->dep = dep;
+	}
+
+	for (i = 0; i < task->num_dep; i++) {
+		if (task->dep[i]->context == fence->context &&
+		    dma_fence_is_later(fence, task->dep[i])) {
+			dma_fence_put(task->dep[i]);
+			task->dep[i] = fence;
+			return 0;
+		}
+	}
+
+	task->dep[task->num_dep++] = fence;
+	return 0;
+}
+
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+			    struct lima_sched_context *context,
+			    atomic_t *guilty)
+{
+	struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
+
+	return drm_sched_entity_init(&context->base, &rq, 1, guilty);
+}
+
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+			     struct lima_sched_context *context)
+{
+	drm_sched_entity_fini(&context->base);
+}
+
+struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
+						struct lima_sched_task *task)
+{
+	struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
+
+	drm_sched_entity_push_job(&task->base, &context->base);
+	return fence;
+}
+
+static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
+					       struct drm_sched_entity *entity)
+{
+	struct lima_sched_task *task = to_lima_task(job);
+	int i;
+
+	for (i = 0; i < task->num_dep; i++) {
+		struct dma_fence *fence = task->dep[i];
+
+		if (!task->dep[i])
+			continue;
+
+		task->dep[i] = NULL;
+
+		if (!dma_fence_is_signaled(fence))
+			return fence;
+
+		dma_fence_put(fence);
+	}
+
+	return NULL;
+}
+
+static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
+{
+	struct lima_sched_task *task = to_lima_task(job);
+	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+	struct lima_fence *fence;
+	struct dma_fence *ret;
+	struct lima_vm *vm = NULL, *last_vm = NULL;
+	int i;
+
+	/* after GPU reset */
+	if (job->s_fence->finished.error < 0)
+		return NULL;
+
+	fence = lima_fence_create(pipe);
+	if (!fence)
+		return NULL;
+	task->fence = &fence->base;
+
+	/* for caller usage of the fence, otherwise irq handler
+	 * may consume the fence before caller use it
+	 */
+	ret = dma_fence_get(task->fence);
+
+	pipe->current_task = task;
+
+	/* this is needed for MMU to work correctly, otherwise GP/PP
+	 * will hang or page fault for unknown reason after running for
+	 * a while.
+	 *
+	 * Need to investigate:
+	 * 1. is it related to TLB
+	 * 2. how much performance will be affected by L2 cache flush
+	 * 3. can we reduce the calling of this function because all
+	 *    GP/PP use the same L2 cache on mali400
+	 *
+	 * TODO:
+	 * 1. move this to task fini to save some wait time?
+	 * 2. when GP/PP use different l2 cache, need PP wait GP l2
+	 *    cache flush?
+	 */
+	for (i = 0; i < pipe->num_l2_cache; i++)
+		lima_l2_cache_flush(pipe->l2_cache[i]);
+
+	if (task->vm != pipe->current_vm) {
+		vm = lima_vm_get(task->vm);
+		last_vm = pipe->current_vm;
+		pipe->current_vm = task->vm;
+	}
+
+	if (pipe->bcast_mmu)
+		lima_mmu_switch_vm(pipe->bcast_mmu, vm);
+	else {
+		for (i = 0; i < pipe->num_mmu; i++)
+			lima_mmu_switch_vm(pipe->mmu[i], vm);
+	}
+
+	if (last_vm)
+		lima_vm_put(last_vm);
+
+	pipe->error = false;
+	pipe->task_run(pipe, task);
+
+	return task->fence;
+}
+
+static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
+					 struct lima_sched_task *task)
+{
+	drm_sched_stop(&pipe->base);
+
+	if (task)
+		drm_sched_increase_karma(&task->base);
+
+	pipe->task_error(pipe);
+
+	if (pipe->bcast_mmu)
+		lima_mmu_page_fault_resume(pipe->bcast_mmu);
+	else {
+		int i;
+
+		for (i = 0; i < pipe->num_mmu; i++)
+			lima_mmu_page_fault_resume(pipe->mmu[i]);
+	}
+
+	if (pipe->current_vm)
+		lima_vm_put(pipe->current_vm);
+
+	pipe->current_vm = NULL;
+	pipe->current_task = NULL;
+
+	drm_sched_resubmit_jobs(&pipe->base);
+	drm_sched_start(&pipe->base, true);
+}
+
+static void lima_sched_timedout_job(struct drm_sched_job *job)
+{
+	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+	struct lima_sched_task *task = to_lima_task(job);
+
+	DRM_ERROR("lima job timeout\n");
+
+	lima_sched_handle_error_task(pipe, task);
+}
+
+static void lima_sched_free_job(struct drm_sched_job *job)
+{
+	struct lima_sched_task *task = to_lima_task(job);
+	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+	struct lima_vm *vm = task->vm;
+	struct lima_bo **bos = task->bos;
+	int i;
+
+	dma_fence_put(task->fence);
+
+	for (i = 0; i < task->num_bos; i++)
+		lima_vm_bo_del(vm, bos[i]);
+
+	lima_sched_task_fini(task);
+	kmem_cache_free(pipe->task_slab, task);
+}
+
+const struct drm_sched_backend_ops lima_sched_ops = {
+	.dependency = lima_sched_dependency,
+	.run_job = lima_sched_run_job,
+	.timedout_job = lima_sched_timedout_job,
+	.free_job = lima_sched_free_job,
+};
+
+static void lima_sched_error_work(struct work_struct *work)
+{
+	struct lima_sched_pipe *pipe =
+		container_of(work, struct lima_sched_pipe, error_work);
+	struct lima_sched_task *task = pipe->current_task;
+
+	lima_sched_handle_error_task(pipe, task);
+}
+
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
+{
+	long timeout;
+
+	if (lima_sched_timeout_ms <= 0)
+		timeout = MAX_SCHEDULE_TIMEOUT;
+	else
+		timeout = msecs_to_jiffies(lima_sched_timeout_ms);
+
+	pipe->fence_context = dma_fence_context_alloc(1);
+	spin_lock_init(&pipe->fence_lock);
+
+	INIT_WORK(&pipe->error_work, lima_sched_error_work);
+
+	return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
+}
+
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
+{
+	drm_sched_fini(&pipe->base);
+}
+
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
+{
+	if (pipe->error)
+		schedule_work(&pipe->error_work);
+	else {
+		struct lima_sched_task *task = pipe->current_task;
+
+		pipe->task_fini(pipe);
+		dma_fence_signal(task->fence);
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h
new file mode 100644
index 000000000000..b017cfa7e327
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_SCHED_H__
+#define __LIMA_SCHED_H__
+
+#include <drm/gpu_scheduler.h>
+
+struct lima_vm;
+
+struct lima_sched_task {
+	struct drm_sched_job base;
+
+	struct lima_vm *vm;
+	void *frame;
+
+	struct dma_fence **dep;
+	int num_dep;
+	int max_dep;
+
+	struct lima_bo **bos;
+	int num_bos;
+
+	/* pipe fence */
+	struct dma_fence *fence;
+};
+
+struct lima_sched_context {
+	struct drm_sched_entity base;
+};
+
+#define LIMA_SCHED_PIPE_MAX_MMU       8
+#define LIMA_SCHED_PIPE_MAX_L2_CACHE  2
+#define LIMA_SCHED_PIPE_MAX_PROCESSOR 8
+
+struct lima_ip;
+
+struct lima_sched_pipe {
+	struct drm_gpu_scheduler base;
+
+	u64 fence_context;
+	u32 fence_seqno;
+	spinlock_t fence_lock;
+
+	struct lima_sched_task *current_task;
+	struct lima_vm *current_vm;
+
+	struct lima_ip *mmu[LIMA_SCHED_PIPE_MAX_MMU];
+	int num_mmu;
+
+	struct lima_ip *l2_cache[LIMA_SCHED_PIPE_MAX_L2_CACHE];
+	int num_l2_cache;
+
+	struct lima_ip *processor[LIMA_SCHED_PIPE_MAX_PROCESSOR];
+	int num_processor;
+
+	struct lima_ip *bcast_processor;
+	struct lima_ip *bcast_mmu;
+
+	u32 done;
+	bool error;
+	atomic_t task;
+
+	int frame_size;
+	struct kmem_cache *task_slab;
+
+	int (*task_validate)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
+	void (*task_run)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
+	void (*task_fini)(struct lima_sched_pipe *pipe);
+	void (*task_error)(struct lima_sched_pipe *pipe);
+	void (*task_mmu_error)(struct lima_sched_pipe *pipe);
+
+	struct work_struct error_work;
+};
+
+int lima_sched_task_init(struct lima_sched_task *task,
+			 struct lima_sched_context *context,
+			 struct lima_bo **bos, int num_bos,
+			 struct lima_vm *vm);
+void lima_sched_task_fini(struct lima_sched_task *task);
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence);
+
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+			    struct lima_sched_context *context,
+			    atomic_t *guilty);
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+			     struct lima_sched_context *context);
+struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
+						struct lima_sched_task *task);
+
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name);
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe);
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe);
+
+static inline void lima_sched_pipe_mmu_error(struct lima_sched_pipe *pipe)
+{
+	pipe->error = true;
+	pipe->task_mmu_error(pipe);
+}
+
+int lima_sched_slab_init(void);
+void lima_sched_slab_fini(void);
+
+#endif
diff --git a/drivers/gpu/drm/lima/lima_vm.c b/drivers/gpu/drm/lima/lima_vm.c
new file mode 100644
index 000000000000..19e88ca16527
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_vm.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include "lima_device.h"
+#include "lima_vm.h"
+#include "lima_object.h"
+#include "lima_regs.h"
+
+struct lima_bo_va {
+	struct list_head list;
+	unsigned int ref_count;
+
+	struct drm_mm_node node;
+
+	struct lima_vm *vm;
+};
+
+#define LIMA_VM_PD_SHIFT 22
+#define LIMA_VM_PT_SHIFT 12
+#define LIMA_VM_PB_SHIFT (LIMA_VM_PD_SHIFT + LIMA_VM_NUM_PT_PER_BT_SHIFT)
+#define LIMA_VM_BT_SHIFT LIMA_VM_PT_SHIFT
+
+#define LIMA_VM_PT_MASK ((1 << LIMA_VM_PD_SHIFT) - 1)
+#define LIMA_VM_BT_MASK ((1 << LIMA_VM_PB_SHIFT) - 1)
+
+#define LIMA_PDE(va) (va >> LIMA_VM_PD_SHIFT)
+#define LIMA_PTE(va) ((va & LIMA_VM_PT_MASK) >> LIMA_VM_PT_SHIFT)
+#define LIMA_PBE(va) (va >> LIMA_VM_PB_SHIFT)
+#define LIMA_BTE(va) ((va & LIMA_VM_BT_MASK) >> LIMA_VM_BT_SHIFT)
+
+
+static void lima_vm_unmap_page_table(struct lima_vm *vm, u32 start, u32 end)
+{
+	u32 addr;
+
+	for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
+		u32 pbe = LIMA_PBE(addr);
+		u32 bte = LIMA_BTE(addr);
+
+		vm->bts[pbe].cpu[bte] = 0;
+	}
+}
+
+static int lima_vm_map_page_table(struct lima_vm *vm, dma_addr_t *dma,
+				  u32 start, u32 end)
+{
+	u64 addr;
+	int i = 0;
+
+	for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
+		u32 pbe = LIMA_PBE(addr);
+		u32 bte = LIMA_BTE(addr);
+
+		if (!vm->bts[pbe].cpu) {
+			dma_addr_t pts;
+			u32 *pd;
+			int j;
+
+			vm->bts[pbe].cpu = dma_alloc_wc(
+				vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
+				&vm->bts[pbe].dma, GFP_KERNEL | __GFP_ZERO);
+			if (!vm->bts[pbe].cpu) {
+				if (addr != start)
+					lima_vm_unmap_page_table(vm, start, addr - 1);
+				return -ENOMEM;
+			}
+
+			pts = vm->bts[pbe].dma;
+			pd = vm->pd.cpu + (pbe << LIMA_VM_NUM_PT_PER_BT_SHIFT);
+			for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
+				pd[j] = pts | LIMA_VM_FLAG_PRESENT;
+				pts += LIMA_PAGE_SIZE;
+			}
+		}
+
+		vm->bts[pbe].cpu[bte] = dma[i++] | LIMA_VM_FLAGS_CACHE;
+	}
+
+	return 0;
+}
+
+static struct lima_bo_va *
+lima_vm_bo_find(struct lima_vm *vm, struct lima_bo *bo)
+{
+	struct lima_bo_va *bo_va, *ret = NULL;
+
+	list_for_each_entry(bo_va, &bo->va, list) {
+		if (bo_va->vm == vm) {
+			ret = bo_va;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
+{
+	struct lima_bo_va *bo_va;
+	int err;
+
+	mutex_lock(&bo->lock);
+
+	bo_va = lima_vm_bo_find(vm, bo);
+	if (bo_va) {
+		bo_va->ref_count++;
+		mutex_unlock(&bo->lock);
+		return 0;
+	}
+
+	/* should not create new bo_va if not asked by caller */
+	if (!create) {
+		mutex_unlock(&bo->lock);
+		return -ENOENT;
+	}
+
+	bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
+	if (!bo_va) {
+		err = -ENOMEM;
+		goto err_out0;
+	}
+
+	bo_va->vm = vm;
+	bo_va->ref_count = 1;
+
+	mutex_lock(&vm->lock);
+
+	err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
+	if (err)
+		goto err_out1;
+
+	err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
+				     bo_va->node.start + bo_va->node.size - 1);
+	if (err)
+		goto err_out2;
+
+	mutex_unlock(&vm->lock);
+
+	list_add_tail(&bo_va->list, &bo->va);
+
+	mutex_unlock(&bo->lock);
+	return 0;
+
+err_out2:
+	drm_mm_remove_node(&bo_va->node);
+err_out1:
+	mutex_unlock(&vm->lock);
+	kfree(bo_va);
+err_out0:
+	mutex_unlock(&bo->lock);
+	return err;
+}
+
+void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo)
+{
+	struct lima_bo_va *bo_va;
+
+	mutex_lock(&bo->lock);
+
+	bo_va = lima_vm_bo_find(vm, bo);
+	if (--bo_va->ref_count > 0) {
+		mutex_unlock(&bo->lock);
+		return;
+	}
+
+	mutex_lock(&vm->lock);
+
+	lima_vm_unmap_page_table(vm, bo_va->node.start,
+				 bo_va->node.start + bo_va->node.size - 1);
+
+	drm_mm_remove_node(&bo_va->node);
+
+	mutex_unlock(&vm->lock);
+
+	list_del(&bo_va->list);
+
+	mutex_unlock(&bo->lock);
+
+	kfree(bo_va);
+}
+
+u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo)
+{
+	struct lima_bo_va *bo_va;
+	u32 ret;
+
+	mutex_lock(&bo->lock);
+
+	bo_va = lima_vm_bo_find(vm, bo);
+	ret = bo_va->node.start;
+
+	mutex_unlock(&bo->lock);
+
+	return ret;
+}
+
+struct lima_vm *lima_vm_create(struct lima_device *dev)
+{
+	struct lima_vm *vm;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return NULL;
+
+	vm->dev = dev;
+	mutex_init(&vm->lock);
+	kref_init(&vm->refcount);
+
+	vm->pd.cpu = dma_alloc_wc(dev->dev, LIMA_PAGE_SIZE, &vm->pd.dma,
+				  GFP_KERNEL | __GFP_ZERO);
+	if (!vm->pd.cpu)
+		goto err_out0;
+
+	if (dev->dlbu_cpu) {
+		int err = lima_vm_map_page_table(
+			vm, &dev->dlbu_dma, LIMA_VA_RESERVE_DLBU,
+			LIMA_VA_RESERVE_DLBU + LIMA_PAGE_SIZE - 1);
+		if (err)
+			goto err_out1;
+	}
+
+	drm_mm_init(&vm->mm, dev->va_start, dev->va_end - dev->va_start);
+
+	return vm;
+
+err_out1:
+	dma_free_wc(dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
+err_out0:
+	kfree(vm);
+	return NULL;
+}
+
+void lima_vm_release(struct kref *kref)
+{
+	struct lima_vm *vm = container_of(kref, struct lima_vm, refcount);
+	int i;
+
+	drm_mm_takedown(&vm->mm);
+
+	for (i = 0; i < LIMA_VM_NUM_BT; i++) {
+		if (vm->bts[i].cpu)
+			dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
+				    vm->bts[i].cpu, vm->bts[i].dma);
+	}
+
+	if (vm->pd.cpu)
+		dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
+
+	kfree(vm);
+}
+
+void lima_vm_print(struct lima_vm *vm)
+{
+	int i, j, k;
+	u32 *pd, *pt;
+
+	if (!vm->pd.cpu)
+		return;
+
+	pd = vm->pd.cpu;
+	for (i = 0; i < LIMA_VM_NUM_BT; i++) {
+		if (!vm->bts[i].cpu)
+			continue;
+
+		pt = vm->bts[i].cpu;
+		for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
+			int idx = (i << LIMA_VM_NUM_PT_PER_BT_SHIFT) + j;
+
+			printk(KERN_INFO "lima vm pd %03x:%08x\n", idx, pd[idx]);
+
+			for (k = 0; k < LIMA_PAGE_ENT_NUM; k++) {
+				u32 pte = *pt++;
+
+				if (pte)
+					printk(KERN_INFO "  pt %03x:%08x\n", k, pte);
+			}
+		}
+	}
+}
diff --git a/drivers/gpu/drm/lima/lima_vm.h b/drivers/gpu/drm/lima/lima_vm.h
new file mode 100644
index 000000000000..caee2f8a29b4
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_vm.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_VM_H__
+#define __LIMA_VM_H__
+
+#include <drm/drm_mm.h>
+#include <linux/kref.h>
+
+#define LIMA_PAGE_SIZE    4096
+#define LIMA_PAGE_MASK    (LIMA_PAGE_SIZE - 1)
+#define LIMA_PAGE_ENT_NUM (LIMA_PAGE_SIZE / sizeof(u32))
+
+#define LIMA_VM_NUM_PT_PER_BT_SHIFT 3
+#define LIMA_VM_NUM_PT_PER_BT (1 << LIMA_VM_NUM_PT_PER_BT_SHIFT)
+#define LIMA_VM_NUM_BT (LIMA_PAGE_ENT_NUM >> LIMA_VM_NUM_PT_PER_BT_SHIFT)
+
+#define LIMA_VA_RESERVE_START  0xFFF00000
+#define LIMA_VA_RESERVE_DLBU   LIMA_VA_RESERVE_START
+#define LIMA_VA_RESERVE_END    0x100000000
+
+struct lima_device;
+
+struct lima_vm_page {
+	u32 *cpu;
+	dma_addr_t dma;
+};
+
+struct lima_vm {
+	struct mutex lock;
+	struct kref refcount;
+
+	struct drm_mm mm;
+
+	struct lima_device *dev;
+
+	struct lima_vm_page pd;
+	struct lima_vm_page bts[LIMA_VM_NUM_BT];
+};
+
+int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create);
+void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo);
+
+u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo);
+
+struct lima_vm *lima_vm_create(struct lima_device *dev);
+void lima_vm_release(struct kref *kref);
+
+static inline struct lima_vm *lima_vm_get(struct lima_vm *vm)
+{
+	kref_get(&vm->refcount);
+	return vm;
+}
+
+static inline void lima_vm_put(struct lima_vm *vm)
+{
+	kref_put(&vm->refcount, lima_vm_release);
+}
+
+void lima_vm_print(struct lima_vm *vm);
+
+#endif
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index f53f817356db..e36dbb4df867 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -38,6 +38,15 @@ config DRM_PANEL_SIMPLE
 	  that it can be automatically turned off when the panel goes into a
 	  low power state.
 
+config DRM_PANEL_FEIYANG_FY07024DI26A30D
+	tristate "Feiyang FY07024DI26A30-D MIPI-DSI LCD panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y if you want to enable support for panels based on the
+	  Feiyang FY07024DI26A30-D MIPI-DSI interface.
+
 config DRM_PANEL_ILITEK_IL9322
 	tristate "Ilitek ILI9322 320x240 QVGA panels"
 	depends on OF && SPI
@@ -149,6 +158,19 @@ config DRM_PANEL_RAYDIUM_RM68200
 	  Say Y here if you want to enable support for Raydium RM68200
 	  720x1280 DSI video mode panel.
 
+config DRM_PANEL_ROCKTECH_JH057N00900
+	tristate "Rocktech JH057N00900 MIPI touchscreen panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for Rocktech JH057N00900
+	  MIPI DSI panel as e.g. used in the Librem 5 devkit. It has a
+	  resolution of 720x1440 pixels, a built in backlight and touch
+	  controller.
+	  Touch input support is provided by the goodix driver and needs to be
+	  selected separately.
+
 config DRM_PANEL_RONBO_RB070D30
 	tristate "Ronbo Electronics RB070D30 panel"
 	depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index 7834947a53b0..78e3dc376bdd 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_DRM_PANEL_ARM_VERSATILE) += panel-arm-versatile.o
 obj-$(CONFIG_DRM_PANEL_LVDS) += panel-lvds.o
 obj-$(CONFIG_DRM_PANEL_SIMPLE) += panel-simple.o
+obj-$(CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D) += panel-feiyang-fy07024di26a30d.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_IL9322) += panel-ilitek-ili9322.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9881C) += panel-ilitek-ili9881c.o
 obj-$(CONFIG_DRM_PANEL_INNOLUX_P079ZCA) += panel-innolux-p079zca.o
@@ -13,6 +14,7 @@ obj-$(CONFIG_DRM_PANEL_ORISETECH_OTM8009A) += panel-orisetech-otm8009a.o
 obj-$(CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00) += panel-panasonic-vvx10f034n00.o
 obj-$(CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN) += panel-raspberrypi-touchscreen.o
 obj-$(CONFIG_DRM_PANEL_RAYDIUM_RM68200) += panel-raydium-rm68200.o
+obj-$(CONFIG_DRM_PANEL_ROCKTECH_JH057N00900) += panel-rocktech-jh057n00900.o
 obj-$(CONFIG_DRM_PANEL_RONBO_RB070D30) += panel-ronbo-rb070d30.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D16D0) += panel-samsung-s6d16d0.o
diff --git a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
new file mode 100644
index 000000000000..dabf59e0f56f
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Amarula Solutions
+ * Author: Jagan Teki <jagan@amarulasolutions.com>
+ */
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
+#include <linux/backlight.h>
+#include <linux/gpio/consumer.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
+
+#define FEIYANG_INIT_CMD_LEN	2
+
+struct feiyang {
+	struct drm_panel	panel;
+	struct mipi_dsi_device	*dsi;
+
+	struct backlight_device	*backlight;
+	struct regulator	*dvdd;
+	struct regulator	*avdd;
+	struct gpio_desc	*reset;
+};
+
+static inline struct feiyang *panel_to_feiyang(struct drm_panel *panel)
+{
+	return container_of(panel, struct feiyang, panel);
+}
+
+struct feiyang_init_cmd {
+	u8 data[FEIYANG_INIT_CMD_LEN];
+};
+
+static const struct feiyang_init_cmd feiyang_init_cmds[] = {
+	{ .data = { 0x80, 0x58 } },
+	{ .data = { 0x81, 0x47 } },
+	{ .data = { 0x82, 0xD4 } },
+	{ .data = { 0x83, 0x88 } },
+	{ .data = { 0x84, 0xA9 } },
+	{ .data = { 0x85, 0xC3 } },
+	{ .data = { 0x86, 0x82 } },
+};
+
+static int feiyang_prepare(struct drm_panel *panel)
+{
+	struct feiyang *ctx = panel_to_feiyang(panel);
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	unsigned int i;
+	int ret;
+
+	ret = regulator_enable(ctx->dvdd);
+	if (ret)
+		return ret;
+
+	/* T1 (dvdd start + dvdd rise) 0 < T1 <= 10ms */
+	msleep(10);
+
+	ret = regulator_enable(ctx->avdd);
+	if (ret)
+		return ret;
+
+	/* T3 (dvdd rise + avdd start + avdd rise) T3 >= 20ms */
+	msleep(20);
+
+	gpiod_set_value(ctx->reset, 0);
+
+	/*
+	 * T5 + T6 (avdd rise + video & logic signal rise)
+	 * T5 >= 10ms, 0 < T6 <= 10ms
+	 */
+	msleep(20);
+
+	gpiod_set_value(ctx->reset, 1);
+
+	/* T12 (video & logic signal rise + backlight rise) T12 >= 200ms */
+	msleep(200);
+
+	for (i = 0; i < ARRAY_SIZE(feiyang_init_cmds); i++) {
+		const struct feiyang_init_cmd *cmd =
+						&feiyang_init_cmds[i];
+
+		ret = mipi_dsi_dcs_write_buffer(dsi, cmd->data,
+						FEIYANG_INIT_CMD_LEN);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int feiyang_enable(struct drm_panel *panel)
+{
+	struct feiyang *ctx = panel_to_feiyang(panel);
+
+	/* T12 (video & logic signal rise + backlight rise) T12 >= 200ms */
+	msleep(200);
+
+	mipi_dsi_dcs_set_display_on(ctx->dsi);
+	backlight_enable(ctx->backlight);
+
+	return 0;
+}
+
+static int feiyang_disable(struct drm_panel *panel)
+{
+	struct feiyang *ctx = panel_to_feiyang(panel);
+
+	backlight_disable(ctx->backlight);
+	return mipi_dsi_dcs_set_display_off(ctx->dsi);
+}
+
+static int feiyang_unprepare(struct drm_panel *panel)
+{
+	struct feiyang *ctx = panel_to_feiyang(panel);
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(ctx->dsi);
+	if (ret < 0)
+		DRM_DEV_ERROR(panel->dev, "failed to set display off: %d\n",
+			      ret);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(ctx->dsi);
+	if (ret < 0)
+		DRM_DEV_ERROR(panel->dev, "failed to enter sleep mode: %d\n",
+			      ret);
+
+	/* T13 (backlight fall + video & logic signal fall) T13 >= 200ms */
+	msleep(200);
+
+	gpiod_set_value(ctx->reset, 0);
+
+	regulator_disable(ctx->avdd);
+
+	/* T11 (dvdd rise to fall) 0 < T11 <= 10ms  */
+	msleep(10);
+
+	regulator_disable(ctx->dvdd);
+
+	return 0;
+}
+
+static const struct drm_display_mode feiyang_default_mode = {
+	.clock		= 55000,
+
+	.hdisplay	= 1024,
+	.hsync_start	= 1024 + 310,
+	.hsync_end	= 1024 + 310 + 20,
+	.htotal		= 1024 + 310 + 20 + 90,
+
+	.vdisplay	= 600,
+	.vsync_start	= 600 + 12,
+	.vsync_end	= 600 + 12 + 2,
+	.vtotal		= 600 + 12 + 2 + 21,
+	.vrefresh	= 60,
+
+	.type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+};
+
+static int feiyang_get_modes(struct drm_panel *panel)
+{
+	struct drm_connector *connector = panel->connector;
+	struct feiyang *ctx = panel_to_feiyang(panel);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &feiyang_default_mode);
+	if (!mode) {
+		DRM_DEV_ERROR(&ctx->dsi->dev, "failed to add mode %ux%ux@%u\n",
+			      feiyang_default_mode.hdisplay,
+			      feiyang_default_mode.vdisplay,
+			      feiyang_default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs feiyang_funcs = {
+	.disable = feiyang_disable,
+	.unprepare = feiyang_unprepare,
+	.prepare = feiyang_prepare,
+	.enable = feiyang_enable,
+	.get_modes = feiyang_get_modes,
+};
+
+static int feiyang_dsi_probe(struct mipi_dsi_device *dsi)
+{
+	struct feiyang *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(&dsi->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, ctx);
+	ctx->dsi = dsi;
+
+	drm_panel_init(&ctx->panel);
+	ctx->panel.dev = &dsi->dev;
+	ctx->panel.funcs = &feiyang_funcs;
+
+	ctx->dvdd = devm_regulator_get(&dsi->dev, "dvdd");
+	if (IS_ERR(ctx->dvdd)) {
+		DRM_DEV_ERROR(&dsi->dev, "Couldn't get dvdd regulator\n");
+		return PTR_ERR(ctx->dvdd);
+	}
+
+	ctx->avdd = devm_regulator_get(&dsi->dev, "avdd");
+	if (IS_ERR(ctx->avdd)) {
+		DRM_DEV_ERROR(&dsi->dev, "Couldn't get avdd regulator\n");
+		return PTR_ERR(ctx->avdd);
+	}
+
+	ctx->reset = devm_gpiod_get(&dsi->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset)) {
+		DRM_DEV_ERROR(&dsi->dev, "Couldn't get our reset GPIO\n");
+		return PTR_ERR(ctx->reset);
+	}
+
+	ctx->backlight = devm_of_find_backlight(&dsi->dev);
+	if (IS_ERR(ctx->backlight))
+		return PTR_ERR(ctx->backlight);
+
+	ret = drm_panel_add(&ctx->panel);
+	if (ret < 0)
+		return ret;
+
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO_BURST;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->lanes = 4;
+
+	return mipi_dsi_attach(dsi);
+}
+
+static int feiyang_dsi_remove(struct mipi_dsi_device *dsi)
+{
+	struct feiyang *ctx = mipi_dsi_get_drvdata(dsi);
+
+	mipi_dsi_detach(dsi);
+	drm_panel_remove(&ctx->panel);
+
+	return 0;
+}
+
+static const struct of_device_id feiyang_of_match[] = {
+	{ .compatible = "feiyang,fy07024di26a30d", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, feiyang_of_match);
+
+static struct mipi_dsi_driver feiyang_driver = {
+	.probe = feiyang_dsi_probe,
+	.remove = feiyang_dsi_remove,
+	.driver = {
+		.name = "feiyang-fy07024di26a30d",
+		.of_match_table = feiyang_of_match,
+	},
+};
+module_mipi_dsi_driver(feiyang_driver);
+
+MODULE_AUTHOR("Jagan Teki <jagan@amarulasolutions.com>");
+MODULE_DESCRIPTION("Feiyang FY07024DI26A30-D MIPI-DSI LCD panel");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
index 87fa316e1d7b..f27a7e426574 100644
--- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
+++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
@@ -67,15 +67,15 @@ struct otm8009a {
 };
 
 static const struct drm_display_mode default_mode = {
-	.clock = 32729,
+	.clock = 29700,
 	.hdisplay = 480,
-	.hsync_start = 480 + 120,
-	.hsync_end = 480 + 120 + 63,
-	.htotal = 480 + 120 + 63 + 120,
+	.hsync_start = 480 + 98,
+	.hsync_end = 480 + 98 + 32,
+	.htotal = 480 + 98 + 32 + 98,
 	.vdisplay = 800,
-	.vsync_start = 800 + 12,
-	.vsync_end = 800 + 12 + 12,
-	.vtotal = 800 + 12 + 12 + 12,
+	.vsync_start = 800 + 15,
+	.vsync_end = 800 + 15 + 10,
+	.vtotal = 800 + 15 + 10 + 14,
 	.vrefresh = 50,
 	.flags = 0,
 	.width_mm = 52,
@@ -248,6 +248,9 @@ static int otm8009a_init_sequence(struct otm8009a *ctx)
 	/* Send Command GRAM memory write (no parameters) */
 	dcs_write_seq(ctx, MIPI_DCS_WRITE_MEMORY_START);
 
+	/* Wait a short while to let the panel be ready before the 1st frame */
+	mdelay(10);
+
 	return 0;
 }
 
@@ -433,7 +436,8 @@ static int otm8009a_probe(struct mipi_dsi_device *dsi)
 	ctx->supply = devm_regulator_get(dev, "power");
 	if (IS_ERR(ctx->supply)) {
 		ret = PTR_ERR(ctx->supply);
-		dev_err(dev, "failed to request regulator: %d\n", ret);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "failed to request regulator: %d\n", ret);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/panel/panel-raydium-rm68200.c b/drivers/gpu/drm/panel/panel-raydium-rm68200.c
index 77593533abcd..14186827e591 100644
--- a/drivers/gpu/drm/panel/panel-raydium-rm68200.c
+++ b/drivers/gpu/drm/panel/panel-raydium-rm68200.c
@@ -383,7 +383,8 @@ static int rm68200_probe(struct mipi_dsi_device *dsi)
 	ctx->supply = devm_regulator_get(dev, "power");
 	if (IS_ERR(ctx->supply)) {
 		ret = PTR_ERR(ctx->supply);
-		dev_err(dev, "cannot get regulator: %d\n", ret);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "cannot get regulator: %d\n", ret);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/panel/panel-rocktech-jh057n00900.c b/drivers/gpu/drm/panel/panel-rocktech-jh057n00900.c
new file mode 100644
index 000000000000..158a6d548068
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-rocktech-jh057n00900.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Rockteck jh057n00900 5.5" MIPI-DSI panel driver
+ *
+ * Copyright (C) Purism SPC 2019
+ */
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+#include <linux/backlight.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/media-bus-format.h>
+#include <linux/module.h>
+#include <video/display_timing.h>
+#include <video/mipi_display.h>
+
+#define DRV_NAME "panel-rocktech-jh057n00900"
+
+/* Manufacturer specific Commands send via DSI */
+#define ST7703_CMD_ALL_PIXEL_OFF 0x22
+#define ST7703_CMD_ALL_PIXEL_ON	 0x23
+#define ST7703_CMD_SETDISP	 0xB2
+#define ST7703_CMD_SETRGBIF	 0xB3
+#define ST7703_CMD_SETCYC	 0xB4
+#define ST7703_CMD_SETBGP	 0xB5
+#define ST7703_CMD_SETVCOM	 0xB6
+#define ST7703_CMD_SETOTP	 0xB7
+#define ST7703_CMD_SETPOWER_EXT	 0xB8
+#define ST7703_CMD_SETEXTC	 0xB9
+#define ST7703_CMD_SETMIPI	 0xBA
+#define ST7703_CMD_SETVDC	 0xBC
+#define ST7703_CMD_SETSCR	 0xC0
+#define ST7703_CMD_SETPOWER	 0xC1
+#define ST7703_CMD_SETPANEL	 0xCC
+#define ST7703_CMD_SETGAMMA	 0xE0
+#define ST7703_CMD_SETEQ	 0xE3
+#define ST7703_CMD_SETGIP1	 0xE9
+#define ST7703_CMD_SETGIP2	 0xEA
+
+struct jh057n {
+	struct device *dev;
+	struct drm_panel panel;
+	struct gpio_desc *reset_gpio;
+	struct backlight_device *backlight;
+	bool prepared;
+
+	struct dentry *debugfs;
+};
+
+static inline struct jh057n *panel_to_jh057n(struct drm_panel *panel)
+{
+	return container_of(panel, struct jh057n, panel);
+}
+
+#define dsi_generic_write_seq(dsi, seq...) do {				\
+		static const u8 d[] = { seq };				\
+		int ret;						\
+		ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d));	\
+		if (ret < 0)						\
+			return ret;					\
+	} while (0)
+
+static int jh057n_init_sequence(struct jh057n *ctx)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	struct device *dev = ctx->dev;
+	int ret;
+
+	/*
+	 * Init sequence was supplied by the panel vendor. Most of the commands
+	 * resemble the ST7703 but the number of parameters often don't match
+	 * so it's likely a clone.
+	 */
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETEXTC,
+			      0xF1, 0x12, 0x83);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETRGBIF,
+			      0x10, 0x10, 0x05, 0x05, 0x03, 0xFF, 0x00, 0x00,
+			      0x00, 0x00);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETSCR,
+			      0x73, 0x73, 0x50, 0x50, 0x00, 0x00, 0x08, 0x70,
+			      0x00);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETVDC, 0x4E);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETPANEL, 0x0B);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETCYC, 0x80);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETDISP, 0xF0, 0x12, 0x30);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETEQ,
+			      0x07, 0x07, 0x0B, 0x0B, 0x03, 0x0B, 0x00, 0x00,
+			      0x00, 0x00, 0xFF, 0x00, 0xC0, 0x10);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETBGP, 0x08, 0x08);
+	msleep(20);
+
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETVCOM, 0x3F, 0x3F);
+	dsi_generic_write_seq(dsi, 0xBF, 0x02, 0x11, 0x00);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETGIP1,
+			      0x82, 0x10, 0x06, 0x05, 0x9E, 0x0A, 0xA5, 0x12,
+			      0x31, 0x23, 0x37, 0x83, 0x04, 0xBC, 0x27, 0x38,
+			      0x0C, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0C, 0x00,
+			      0x03, 0x00, 0x00, 0x00, 0x75, 0x75, 0x31, 0x88,
+			      0x88, 0x88, 0x88, 0x88, 0x88, 0x13, 0x88, 0x64,
+			      0x64, 0x20, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
+			      0x02, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETGIP2,
+			      0x02, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			      0x00, 0x00, 0x00, 0x00, 0x02, 0x46, 0x02, 0x88,
+			      0x88, 0x88, 0x88, 0x88, 0x88, 0x64, 0x88, 0x13,
+			      0x57, 0x13, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
+			      0x75, 0x88, 0x23, 0x14, 0x00, 0x00, 0x02, 0x00,
+			      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x0A,
+			      0xA5, 0x00, 0x00, 0x00, 0x00);
+	dsi_generic_write_seq(dsi, ST7703_CMD_SETGAMMA,
+			      0x00, 0x09, 0x0E, 0x29, 0x2D, 0x3C, 0x41, 0x37,
+			      0x07, 0x0B, 0x0D, 0x10, 0x11, 0x0F, 0x10, 0x11,
+			      0x18, 0x00, 0x09, 0x0E, 0x29, 0x2D, 0x3C, 0x41,
+			      0x37, 0x07, 0x0B, 0x0D, 0x10, 0x11, 0x0F, 0x10,
+			      0x11, 0x18);
+	msleep(20);
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "Failed to exit sleep mode");
+		return ret;
+	}
+	/* Panel is operational 120 msec after reset */
+	msleep(60);
+	ret = mipi_dsi_dcs_set_display_on(dsi);
+	if (ret)
+		return ret;
+
+	DRM_DEV_DEBUG_DRIVER(dev, "Panel init sequence done");
+	return 0;
+}
+
+static int jh057n_enable(struct drm_panel *panel)
+{
+	struct jh057n *ctx = panel_to_jh057n(panel);
+
+	return backlight_enable(ctx->backlight);
+}
+
+static int jh057n_disable(struct drm_panel *panel)
+{
+	struct jh057n *ctx = panel_to_jh057n(panel);
+
+	return backlight_disable(ctx->backlight);
+}
+
+static int jh057n_unprepare(struct drm_panel *panel)
+{
+	struct jh057n *ctx = panel_to_jh057n(panel);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+
+	if (!ctx->prepared)
+		return 0;
+
+	mipi_dsi_dcs_set_display_off(dsi);
+	ctx->prepared = false;
+
+	return 0;
+}
+
+static int jh057n_prepare(struct drm_panel *panel)
+{
+	struct jh057n *ctx = panel_to_jh057n(panel);
+	int ret;
+
+	if (ctx->prepared)
+		return 0;
+
+	DRM_DEV_DEBUG_DRIVER(ctx->dev, "Resetting the panel.");
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+	usleep_range(20, 40);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+	msleep(20);
+
+	ret = jh057n_init_sequence(ctx);
+	if (ret < 0) {
+		DRM_DEV_ERROR(ctx->dev, "Panel init sequence failed: %d", ret);
+		return ret;
+	}
+
+	ctx->prepared = true;
+
+	return 0;
+}
+
+static const struct drm_display_mode default_mode = {
+	.hdisplay    = 720,
+	.hsync_start = 720 + 90,
+	.hsync_end   = 720 + 90 + 20,
+	.htotal	     = 720 + 90 + 20 + 20,
+	.vdisplay    = 1440,
+	.vsync_start = 1440 + 20,
+	.vsync_end   = 1440 + 20 + 4,
+	.vtotal	     = 1440 + 20 + 4 + 12,
+	.vrefresh    = 60,
+	.clock	     = 75276,
+	.flags	     = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	.width_mm    = 65,
+	.height_mm   = 130,
+};
+
+static int jh057n_get_modes(struct drm_panel *panel)
+{
+	struct jh057n *ctx = panel_to_jh057n(panel);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &default_mode);
+	if (!mode) {
+		DRM_DEV_ERROR(ctx->dev, "Failed to add mode %ux%u@%u",
+			      default_mode.hdisplay, default_mode.vdisplay,
+			      default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	panel->connector->display_info.width_mm = mode->width_mm;
+	panel->connector->display_info.height_mm = mode->height_mm;
+	drm_mode_probed_add(panel->connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs jh057n_drm_funcs = {
+	.disable   = jh057n_disable,
+	.unprepare = jh057n_unprepare,
+	.prepare   = jh057n_prepare,
+	.enable	   = jh057n_enable,
+	.get_modes = jh057n_get_modes,
+};
+
+static int allpixelson_set(void *data, u64 val)
+{
+	struct jh057n *ctx = data;
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+
+	DRM_DEV_DEBUG_DRIVER(ctx->dev, "Setting all pixels on");
+	dsi_generic_write_seq(dsi, ST7703_CMD_ALL_PIXEL_ON);
+	msleep(val * 1000);
+	/* Reset the panel to get video back */
+	drm_panel_disable(&ctx->panel);
+	drm_panel_unprepare(&ctx->panel);
+	drm_panel_prepare(&ctx->panel);
+	drm_panel_enable(&ctx->panel);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(allpixelson_fops, NULL,
+			allpixelson_set, "%llu\n");
+
+static int jh057n_debugfs_init(struct jh057n *ctx)
+{
+	struct dentry *f;
+
+	ctx->debugfs = debugfs_create_dir(DRV_NAME, NULL);
+	if (!ctx->debugfs)
+		return -ENOMEM;
+
+	f = debugfs_create_file("allpixelson", 0600,
+				ctx->debugfs, ctx, &allpixelson_fops);
+	if (!f)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void jh057n_debugfs_remove(struct jh057n *ctx)
+{
+	debugfs_remove_recursive(ctx->debugfs);
+	ctx->debugfs = NULL;
+}
+
+static int jh057n_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct jh057n *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset_gpio)) {
+		DRM_DEV_ERROR(dev, "cannot get reset gpio");
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	mipi_dsi_set_drvdata(dsi, ctx);
+
+	ctx->dev = dev;
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO |
+		MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_VIDEO_SYNC_PULSE;
+
+	ctx->backlight = devm_of_find_backlight(dev);
+	if (IS_ERR(ctx->backlight))
+		return PTR_ERR(ctx->backlight);
+
+	drm_panel_init(&ctx->panel);
+	ctx->panel.dev = dev;
+	ctx->panel.funcs = &jh057n_drm_funcs;
+
+	drm_panel_add(&ctx->panel);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "mipi_dsi_attach failed. Is host ready?");
+		drm_panel_remove(&ctx->panel);
+		return ret;
+	}
+
+	DRM_DEV_INFO(dev, "%ux%u@%u %ubpp dsi %udl - ready",
+		     default_mode.hdisplay, default_mode.vdisplay,
+		     default_mode.vrefresh,
+		     mipi_dsi_pixel_format_to_bpp(dsi->format), dsi->lanes);
+
+	jh057n_debugfs_init(ctx);
+	return 0;
+}
+
+static void jh057n_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct jh057n *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = jh057n_unprepare(&ctx->panel);
+	if (ret < 0)
+		DRM_DEV_ERROR(&dsi->dev, "Failed to unprepare panel: %d\n",
+			      ret);
+
+	ret = jh057n_disable(&ctx->panel);
+	if (ret < 0)
+		DRM_DEV_ERROR(&dsi->dev, "Failed to disable panel: %d\n",
+			      ret);
+}
+
+static int jh057n_remove(struct mipi_dsi_device *dsi)
+{
+	struct jh057n *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	jh057n_shutdown(dsi);
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		DRM_DEV_ERROR(&dsi->dev, "Failed to detach from DSI host: %d\n",
+			      ret);
+
+	drm_panel_remove(&ctx->panel);
+
+	jh057n_debugfs_remove(ctx);
+
+	return 0;
+}
+
+static const struct of_device_id jh057n_of_match[] = {
+	{ .compatible = "rocktech,jh057n00900" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, jh057n_of_match);
+
+static struct mipi_dsi_driver jh057n_driver = {
+	.probe	= jh057n_probe,
+	.remove = jh057n_remove,
+	.shutdown = jh057n_shutdown,
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = jh057n_of_match,
+	},
+};
+module_mipi_dsi_driver(jh057n_driver);
+
+MODULE_AUTHOR("Guido Günther <agx@sigxcpu.org>");
+MODULE_DESCRIPTION("DRM driver for Rocktech JH057N00900 MIPI DSI panel");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig
index 1e75196f9659..2cdf3b62d559 100644
--- a/drivers/gpu/drm/rockchip/Kconfig
+++ b/drivers/gpu/drm/rockchip/Kconfig
@@ -77,4 +77,12 @@ config ROCKCHIP_RGB
 	  Some Rockchip CRTCs, like rv1108, can directly output parallel
 	  and serial RGB format to panel or connect to a conversion chip.
 	  say Y to enable its driver.
+
+config ROCKCHIP_RK3066_HDMI
+	bool "Rockchip specific extensions for RK3066 HDMI"
+	depends on DRM_ROCKCHIP
+	help
+	  This selects support for Rockchip SoC specific extensions
+	  for the RK3066 HDMI driver. If you want to enable
+	  HDMI on RK3066 based SoC, you should select this option.
 endif
diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index f6fc9d5dd0ad..524684ba7f6a 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -15,5 +15,6 @@ rockchipdrm-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi-rockchip.o
 rockchipdrm-$(CONFIG_ROCKCHIP_INNO_HDMI) += inno_hdmi.o
 rockchipdrm-$(CONFIG_ROCKCHIP_LVDS) += rockchip_lvds.o
 rockchipdrm-$(CONFIG_ROCKCHIP_RGB) += rockchip_rgb.o
+rockchipdrm-$(CONFIG_ROCKCHIP_RK3066_HDMI) += rk3066_hdmi.o
 
 obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o
diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
new file mode 100644
index 000000000000..85fc5f01f761
--- /dev/null
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -0,0 +1,876 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
+ *    Zheng Yang <zhengyang@rock-chips.com>
+ */
+
+#include <drm/drm_of.h>
+#include <drm/drm_probe_helper.h>
+
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "rk3066_hdmi.h"
+
+#include "rockchip_drm_drv.h"
+#include "rockchip_drm_vop.h"
+
+#define DEFAULT_PLLA_RATE 30000000
+
+struct hdmi_data_info {
+	int vic; /* The CEA Video ID (VIC) of the current drm display mode. */
+	bool sink_is_hdmi;
+	unsigned int enc_out_format;
+	unsigned int colorimetry;
+};
+
+struct rk3066_hdmi_i2c {
+	struct i2c_adapter adap;
+
+	u8 ddc_addr;
+	u8 segment_addr;
+	u8 stat;
+
+	struct mutex i2c_lock; /* For i2c operation. */
+	struct completion cmpltn;
+};
+
+struct rk3066_hdmi {
+	struct device *dev;
+	struct drm_device *drm_dev;
+	struct regmap *grf_regmap;
+	int irq;
+	struct clk *hclk;
+	void __iomem *regs;
+
+	struct drm_connector connector;
+	struct drm_encoder encoder;
+
+	struct rk3066_hdmi_i2c *i2c;
+	struct i2c_adapter *ddc;
+
+	unsigned int tmdsclk;
+
+	struct hdmi_data_info hdmi_data;
+	struct drm_display_mode previous_mode;
+};
+
+#define to_rk3066_hdmi(x) container_of(x, struct rk3066_hdmi, x)
+
+static inline u8 hdmi_readb(struct rk3066_hdmi *hdmi, u16 offset)
+{
+	return readl_relaxed(hdmi->regs + offset);
+}
+
+static inline void hdmi_writeb(struct rk3066_hdmi *hdmi, u16 offset, u32 val)
+{
+	writel_relaxed(val, hdmi->regs + offset);
+}
+
+static inline void hdmi_modb(struct rk3066_hdmi *hdmi, u16 offset,
+			     u32 msk, u32 val)
+{
+	u8 temp = hdmi_readb(hdmi, offset) & ~msk;
+
+	temp |= val & msk;
+	hdmi_writeb(hdmi, offset, temp);
+}
+
+static void rk3066_hdmi_i2c_init(struct rk3066_hdmi *hdmi)
+{
+	int ddc_bus_freq;
+
+	ddc_bus_freq = (hdmi->tmdsclk >> 2) / HDMI_SCL_RATE;
+
+	hdmi_writeb(hdmi, HDMI_DDC_BUS_FREQ_L, ddc_bus_freq & 0xFF);
+	hdmi_writeb(hdmi, HDMI_DDC_BUS_FREQ_H, (ddc_bus_freq >> 8) & 0xFF);
+
+	/* Clear the EDID interrupt flag and mute the interrupt. */
+	hdmi_modb(hdmi, HDMI_INTR_MASK1, HDMI_INTR_EDID_MASK, 0);
+	hdmi_writeb(hdmi, HDMI_INTR_STATUS1, HDMI_INTR_EDID_MASK);
+}
+
+static inline u8 rk3066_hdmi_get_power_mode(struct rk3066_hdmi *hdmi)
+{
+	return hdmi_readb(hdmi, HDMI_SYS_CTRL) & HDMI_SYS_POWER_MODE_MASK;
+}
+
+static void rk3066_hdmi_set_power_mode(struct rk3066_hdmi *hdmi, int mode)
+{
+	u8 current_mode, next_mode;
+	u8 i = 0;
+
+	current_mode = rk3066_hdmi_get_power_mode(hdmi);
+
+	DRM_DEV_DEBUG(hdmi->dev, "mode         :%d\n", mode);
+	DRM_DEV_DEBUG(hdmi->dev, "current_mode :%d\n", current_mode);
+
+	if (current_mode == mode)
+		return;
+
+	do {
+		if (current_mode > mode) {
+			next_mode = current_mode / 2;
+		} else {
+			if (current_mode < HDMI_SYS_POWER_MODE_A)
+				next_mode = HDMI_SYS_POWER_MODE_A;
+			else
+				next_mode = current_mode * 2;
+		}
+
+		DRM_DEV_DEBUG(hdmi->dev, "%d: next_mode :%d\n", i, next_mode);
+
+		if (next_mode != HDMI_SYS_POWER_MODE_D) {
+			hdmi_modb(hdmi, HDMI_SYS_CTRL,
+				  HDMI_SYS_POWER_MODE_MASK, next_mode);
+		} else {
+			hdmi_writeb(hdmi, HDMI_SYS_CTRL,
+				    HDMI_SYS_POWER_MODE_D |
+				    HDMI_SYS_PLL_RESET_MASK);
+			usleep_range(90, 100);
+			hdmi_writeb(hdmi, HDMI_SYS_CTRL,
+				    HDMI_SYS_POWER_MODE_D |
+				    HDMI_SYS_PLLB_RESET);
+			usleep_range(90, 100);
+			hdmi_writeb(hdmi, HDMI_SYS_CTRL,
+				    HDMI_SYS_POWER_MODE_D);
+		}
+		current_mode = next_mode;
+		i = i + 1;
+	} while ((next_mode != mode) && (i < 5));
+
+	/*
+	 * When the IP controller isn't configured with accurate video timing,
+	 * DDC_CLK should be equal to the PLLA frequency, which is 30MHz,
+	 * so we need to init the TMDS rate to the PCLK rate and reconfigure
+	 * the DDC clock.
+	 */
+	if (mode < HDMI_SYS_POWER_MODE_D)
+		hdmi->tmdsclk = DEFAULT_PLLA_RATE;
+}
+
+static int
+rk3066_hdmi_upload_frame(struct rk3066_hdmi *hdmi, int setup_rc,
+			 union hdmi_infoframe *frame, u32 frame_index,
+			 u32 mask, u32 disable, u32 enable)
+{
+	if (mask)
+		hdmi_modb(hdmi, HDMI_CP_AUTO_SEND_CTRL, mask, disable);
+
+	hdmi_writeb(hdmi, HDMI_CP_BUF_INDEX, frame_index);
+
+	if (setup_rc >= 0) {
+		u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE];
+		ssize_t rc, i;
+
+		rc = hdmi_infoframe_pack(frame, packed_frame,
+					 sizeof(packed_frame));
+		if (rc < 0)
+			return rc;
+
+		for (i = 0; i < rc; i++)
+			hdmi_writeb(hdmi, HDMI_CP_BUF_ACC_HB0 + i * 4,
+				    packed_frame[i]);
+
+		if (mask)
+			hdmi_modb(hdmi, HDMI_CP_AUTO_SEND_CTRL, mask, enable);
+	}
+
+	return setup_rc;
+}
+
+static int rk3066_hdmi_config_avi(struct rk3066_hdmi *hdmi,
+				  struct drm_display_mode *mode)
+{
+	union hdmi_infoframe frame;
+	int rc;
+
+	rc = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi,
+						      &hdmi->connector, mode);
+
+	if (hdmi->hdmi_data.enc_out_format == HDMI_COLORSPACE_YUV444)
+		frame.avi.colorspace = HDMI_COLORSPACE_YUV444;
+	else if (hdmi->hdmi_data.enc_out_format == HDMI_COLORSPACE_YUV422)
+		frame.avi.colorspace = HDMI_COLORSPACE_YUV422;
+	else
+		frame.avi.colorspace = HDMI_COLORSPACE_RGB;
+
+	frame.avi.colorimetry = hdmi->hdmi_data.colorimetry;
+	frame.avi.scan_mode = HDMI_SCAN_MODE_NONE;
+
+	return rk3066_hdmi_upload_frame(hdmi, rc, &frame,
+					HDMI_INFOFRAME_AVI, 0, 0, 0);
+}
+
+static int rk3066_hdmi_config_video_timing(struct rk3066_hdmi *hdmi,
+					   struct drm_display_mode *mode)
+{
+	int value, vsync_offset;
+
+	/* Set the details for the external polarity and interlace mode. */
+	value = HDMI_EXT_VIDEO_SET_EN;
+	value |= mode->flags & DRM_MODE_FLAG_PHSYNC ?
+		 HDMI_VIDEO_HSYNC_ACTIVE_HIGH : HDMI_VIDEO_HSYNC_ACTIVE_LOW;
+	value |= mode->flags & DRM_MODE_FLAG_PVSYNC ?
+		 HDMI_VIDEO_VSYNC_ACTIVE_HIGH : HDMI_VIDEO_VSYNC_ACTIVE_LOW;
+	value |= mode->flags & DRM_MODE_FLAG_INTERLACE ?
+		 HDMI_VIDEO_MODE_INTERLACE : HDMI_VIDEO_MODE_PROGRESSIVE;
+
+	if (hdmi->hdmi_data.vic == 2 || hdmi->hdmi_data.vic == 3)
+		vsync_offset = 6;
+	else
+		vsync_offset = 0;
+
+	value |= vsync_offset << HDMI_VIDEO_VSYNC_OFFSET_SHIFT;
+	hdmi_writeb(hdmi, HDMI_EXT_VIDEO_PARA, value);
+
+	/* Set the details for the external video timing. */
+	value = mode->htotal;
+	hdmi_writeb(hdmi, HDMI_EXT_HTOTAL_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_EXT_HTOTAL_H, (value >> 8) & 0xFF);
+
+	value = mode->htotal - mode->hdisplay;
+	hdmi_writeb(hdmi, HDMI_EXT_HBLANK_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_EXT_HBLANK_H, (value >> 8) & 0xFF);
+
+	value = mode->htotal - mode->hsync_start;
+	hdmi_writeb(hdmi, HDMI_EXT_HDELAY_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_EXT_HDELAY_H, (value >> 8) & 0xFF);
+
+	value = mode->hsync_end - mode->hsync_start;
+	hdmi_writeb(hdmi, HDMI_EXT_HDURATION_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_EXT_HDURATION_H, (value >> 8) & 0xFF);
+
+	value = mode->vtotal;
+	hdmi_writeb(hdmi, HDMI_EXT_VTOTAL_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_EXT_VTOTAL_H, (value >> 8) & 0xFF);
+
+	value = mode->vtotal - mode->vdisplay;
+	hdmi_writeb(hdmi, HDMI_EXT_VBLANK_L, value & 0xFF);
+
+	value = mode->vtotal - mode->vsync_start + vsync_offset;
+	hdmi_writeb(hdmi, HDMI_EXT_VDELAY, value & 0xFF);
+
+	value = mode->vsync_end - mode->vsync_start;
+	hdmi_writeb(hdmi, HDMI_EXT_VDURATION, value & 0xFF);
+
+	return 0;
+}
+
+static void
+rk3066_hdmi_phy_write(struct rk3066_hdmi *hdmi, u16 offset, u8 value)
+{
+	hdmi_writeb(hdmi, offset, value);
+	hdmi_modb(hdmi, HDMI_SYS_CTRL,
+		  HDMI_SYS_PLL_RESET_MASK, HDMI_SYS_PLL_RESET);
+	usleep_range(90, 100);
+	hdmi_modb(hdmi, HDMI_SYS_CTRL, HDMI_SYS_PLL_RESET_MASK, 0);
+	usleep_range(900, 1000);
+}
+
+static void rk3066_hdmi_config_phy(struct rk3066_hdmi *hdmi)
+{
+	/* TMDS uses the same frequency as dclk. */
+	hdmi_writeb(hdmi, HDMI_DEEP_COLOR_MODE, 0x22);
+
+	/*
+	 * The semi-public documentation does not describe the hdmi registers
+	 * used by the function rk3066_hdmi_phy_write(), so we keep using
+	 * these magic values for now.
+	 */
+	if (hdmi->tmdsclk > 100000000) {
+		rk3066_hdmi_phy_write(hdmi, 0x158, 0x0E);
+		rk3066_hdmi_phy_write(hdmi, 0x15c, 0x00);
+		rk3066_hdmi_phy_write(hdmi, 0x160, 0x60);
+		rk3066_hdmi_phy_write(hdmi, 0x164, 0x00);
+		rk3066_hdmi_phy_write(hdmi, 0x168, 0xDA);
+		rk3066_hdmi_phy_write(hdmi, 0x16c, 0xA1);
+		rk3066_hdmi_phy_write(hdmi, 0x170, 0x0e);
+		rk3066_hdmi_phy_write(hdmi, 0x174, 0x22);
+		rk3066_hdmi_phy_write(hdmi, 0x178, 0x00);
+	} else if (hdmi->tmdsclk > 50000000) {
+		rk3066_hdmi_phy_write(hdmi, 0x158, 0x06);
+		rk3066_hdmi_phy_write(hdmi, 0x15c, 0x00);
+		rk3066_hdmi_phy_write(hdmi, 0x160, 0x60);
+		rk3066_hdmi_phy_write(hdmi, 0x164, 0x00);
+		rk3066_hdmi_phy_write(hdmi, 0x168, 0xCA);
+		rk3066_hdmi_phy_write(hdmi, 0x16c, 0xA3);
+		rk3066_hdmi_phy_write(hdmi, 0x170, 0x0e);
+		rk3066_hdmi_phy_write(hdmi, 0x174, 0x20);
+		rk3066_hdmi_phy_write(hdmi, 0x178, 0x00);
+	} else {
+		rk3066_hdmi_phy_write(hdmi, 0x158, 0x02);
+		rk3066_hdmi_phy_write(hdmi, 0x15c, 0x00);
+		rk3066_hdmi_phy_write(hdmi, 0x160, 0x60);
+		rk3066_hdmi_phy_write(hdmi, 0x164, 0x00);
+		rk3066_hdmi_phy_write(hdmi, 0x168, 0xC2);
+		rk3066_hdmi_phy_write(hdmi, 0x16c, 0xA2);
+		rk3066_hdmi_phy_write(hdmi, 0x170, 0x0e);
+		rk3066_hdmi_phy_write(hdmi, 0x174, 0x20);
+		rk3066_hdmi_phy_write(hdmi, 0x178, 0x00);
+	}
+}
+
+static int rk3066_hdmi_setup(struct rk3066_hdmi *hdmi,
+			     struct drm_display_mode *mode)
+{
+	hdmi->hdmi_data.vic = drm_match_cea_mode(mode);
+	hdmi->hdmi_data.enc_out_format = HDMI_COLORSPACE_RGB;
+
+	if (hdmi->hdmi_data.vic == 6 || hdmi->hdmi_data.vic == 7 ||
+	    hdmi->hdmi_data.vic == 21 || hdmi->hdmi_data.vic == 22 ||
+	    hdmi->hdmi_data.vic == 2 || hdmi->hdmi_data.vic == 3 ||
+	    hdmi->hdmi_data.vic == 17 || hdmi->hdmi_data.vic == 18)
+		hdmi->hdmi_data.colorimetry = HDMI_COLORIMETRY_ITU_601;
+	else
+		hdmi->hdmi_data.colorimetry = HDMI_COLORIMETRY_ITU_709;
+
+	hdmi->tmdsclk = mode->clock * 1000;
+
+	/* Mute video and audio output. */
+	hdmi_modb(hdmi, HDMI_VIDEO_CTRL2, HDMI_VIDEO_AUDIO_DISABLE_MASK,
+		  HDMI_AUDIO_DISABLE | HDMI_VIDEO_DISABLE);
+
+	/* Set power state to mode B. */
+	if (rk3066_hdmi_get_power_mode(hdmi) != HDMI_SYS_POWER_MODE_B)
+		rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_B);
+
+	/* Input video mode is RGB 24 bit. Use external data enable signal. */
+	hdmi_modb(hdmi, HDMI_AV_CTRL1,
+		  HDMI_VIDEO_DE_MASK, HDMI_VIDEO_EXTERNAL_DE);
+	hdmi_writeb(hdmi, HDMI_VIDEO_CTRL1,
+		    HDMI_VIDEO_OUTPUT_RGB444 |
+		    HDMI_VIDEO_INPUT_DATA_DEPTH_8BIT |
+		    HDMI_VIDEO_INPUT_COLOR_RGB);
+	hdmi_writeb(hdmi, HDMI_DEEP_COLOR_MODE, 0x20);
+
+	rk3066_hdmi_config_video_timing(hdmi, mode);
+
+	if (hdmi->hdmi_data.sink_is_hdmi) {
+		hdmi_modb(hdmi, HDMI_HDCP_CTRL, HDMI_VIDEO_MODE_MASK,
+			  HDMI_VIDEO_MODE_HDMI);
+		rk3066_hdmi_config_avi(hdmi, mode);
+	} else {
+		hdmi_modb(hdmi, HDMI_HDCP_CTRL, HDMI_VIDEO_MODE_MASK, 0);
+	}
+
+	rk3066_hdmi_config_phy(hdmi);
+
+	rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_E);
+
+	/*
+	 * When the IP controller is configured with accurate video
+	 * timing, the TMDS clock source should be switched to
+	 * DCLK_LCDC, so we need to init the TMDS rate to the pixel mode
+	 * clock rate and reconfigure the DDC clock.
+	 */
+	rk3066_hdmi_i2c_init(hdmi);
+
+	/* Unmute video output. */
+	hdmi_modb(hdmi, HDMI_VIDEO_CTRL2,
+		  HDMI_VIDEO_AUDIO_DISABLE_MASK, HDMI_AUDIO_DISABLE);
+	return 0;
+}
+
+static void
+rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder,
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adj_mode)
+{
+	struct rk3066_hdmi *hdmi = to_rk3066_hdmi(encoder);
+
+	/* Store the display mode for plugin/DPMS poweron events. */
+	memcpy(&hdmi->previous_mode, adj_mode, sizeof(hdmi->previous_mode));
+}
+
+static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder)
+{
+	struct rk3066_hdmi *hdmi = to_rk3066_hdmi(encoder);
+	int mux, val;
+
+	mux = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder);
+	if (mux)
+		val = (HDMI_VIDEO_SEL << 16) | HDMI_VIDEO_SEL;
+	else
+		val = HDMI_VIDEO_SEL << 16;
+
+	regmap_write(hdmi->grf_regmap, GRF_SOC_CON0, val);
+
+	DRM_DEV_DEBUG(hdmi->dev, "hdmi encoder enable select: vop%s\n",
+		      (mux) ? "1" : "0");
+
+	rk3066_hdmi_setup(hdmi, &hdmi->previous_mode);
+}
+
+static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct rk3066_hdmi *hdmi = to_rk3066_hdmi(encoder);
+
+	DRM_DEV_DEBUG(hdmi->dev, "hdmi encoder disable\n");
+
+	if (rk3066_hdmi_get_power_mode(hdmi) == HDMI_SYS_POWER_MODE_E) {
+		hdmi_writeb(hdmi, HDMI_VIDEO_CTRL2,
+			    HDMI_VIDEO_AUDIO_DISABLE_MASK);
+		hdmi_modb(hdmi, HDMI_VIDEO_CTRL2,
+			  HDMI_AUDIO_CP_LOGIC_RESET_MASK,
+			  HDMI_AUDIO_CP_LOGIC_RESET);
+		usleep_range(500, 510);
+	}
+	rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_A);
+}
+
+static bool
+rk3066_hdmi_encoder_mode_fixup(struct drm_encoder *encoder,
+			       const struct drm_display_mode *mode,
+			       struct drm_display_mode *adj_mode)
+{
+	return true;
+}
+
+static int
+rk3066_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
+				 struct drm_crtc_state *crtc_state,
+				 struct drm_connector_state *conn_state)
+{
+	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
+
+	s->output_mode = ROCKCHIP_OUT_MODE_P888;
+	s->output_type = DRM_MODE_CONNECTOR_HDMIA;
+
+	return 0;
+}
+
+static const
+struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = {
+	.enable       = rk3066_hdmi_encoder_enable,
+	.disable      = rk3066_hdmi_encoder_disable,
+	.mode_fixup   = rk3066_hdmi_encoder_mode_fixup,
+	.mode_set     = rk3066_hdmi_encoder_mode_set,
+	.atomic_check = rk3066_hdmi_encoder_atomic_check,
+};
+
+static const struct drm_encoder_funcs rk3066_hdmi_encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static enum drm_connector_status
+rk3066_hdmi_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct rk3066_hdmi *hdmi = to_rk3066_hdmi(connector);
+
+	return (hdmi_readb(hdmi, HDMI_HPG_MENS_STA) & HDMI_HPG_IN_STATUS_HIGH) ?
+		connector_status_connected : connector_status_disconnected;
+}
+
+static int rk3066_hdmi_connector_get_modes(struct drm_connector *connector)
+{
+	struct rk3066_hdmi *hdmi = to_rk3066_hdmi(connector);
+	struct edid *edid;
+	int ret = 0;
+
+	if (!hdmi->ddc)
+		return 0;
+
+	edid = drm_get_edid(connector, hdmi->ddc);
+	if (edid) {
+		hdmi->hdmi_data.sink_is_hdmi = drm_detect_hdmi_monitor(edid);
+		drm_connector_update_edid_property(connector, edid);
+		ret = drm_add_edid_modes(connector, edid);
+		kfree(edid);
+	}
+
+	return ret;
+}
+
+static enum drm_mode_status
+rk3066_hdmi_connector_mode_valid(struct drm_connector *connector,
+				 struct drm_display_mode *mode)
+{
+	u32 vic = drm_match_cea_mode(mode);
+
+	if (vic > 1)
+		return MODE_OK;
+	else
+		return MODE_BAD;
+}
+
+static struct drm_encoder *
+rk3066_hdmi_connector_best_encoder(struct drm_connector *connector)
+{
+	struct rk3066_hdmi *hdmi = to_rk3066_hdmi(connector);
+
+	return &hdmi->encoder;
+}
+
+static int
+rk3066_hdmi_probe_single_connector_modes(struct drm_connector *connector,
+					 uint32_t maxX, uint32_t maxY)
+{
+	if (maxX > 1920)
+		maxX = 1920;
+	if (maxY > 1080)
+		maxY = 1080;
+
+	return drm_helper_probe_single_connector_modes(connector, maxX, maxY);
+}
+
+static void rk3066_hdmi_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs rk3066_hdmi_connector_funcs = {
+	.fill_modes = rk3066_hdmi_probe_single_connector_modes,
+	.detect = rk3066_hdmi_connector_detect,
+	.destroy = rk3066_hdmi_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const
+struct drm_connector_helper_funcs rk3066_hdmi_connector_helper_funcs = {
+	.get_modes = rk3066_hdmi_connector_get_modes,
+	.mode_valid = rk3066_hdmi_connector_mode_valid,
+	.best_encoder = rk3066_hdmi_connector_best_encoder,
+};
+
+static int
+rk3066_hdmi_register(struct drm_device *drm, struct rk3066_hdmi *hdmi)
+{
+	struct drm_encoder *encoder = &hdmi->encoder;
+	struct device *dev = hdmi->dev;
+
+	encoder->possible_crtcs =
+		drm_of_find_possible_crtcs(drm, dev->of_node);
+
+	/*
+	 * If we failed to find the CRTC(s) which this encoder is
+	 * supposed to be connected to, it's because the CRTC has
+	 * not been registered yet.  Defer probing, and hope that
+	 * the required CRTC is added later.
+	 */
+	if (encoder->possible_crtcs == 0)
+		return -EPROBE_DEFER;
+
+	drm_encoder_helper_add(encoder, &rk3066_hdmi_encoder_helper_funcs);
+	drm_encoder_init(drm, encoder, &rk3066_hdmi_encoder_funcs,
+			 DRM_MODE_ENCODER_TMDS, NULL);
+
+	hdmi->connector.polled = DRM_CONNECTOR_POLL_HPD;
+
+	drm_connector_helper_add(&hdmi->connector,
+				 &rk3066_hdmi_connector_helper_funcs);
+	drm_connector_init(drm, &hdmi->connector,
+			   &rk3066_hdmi_connector_funcs,
+			   DRM_MODE_CONNECTOR_HDMIA);
+
+	drm_connector_attach_encoder(&hdmi->connector, encoder);
+
+	return 0;
+}
+
+static irqreturn_t rk3066_hdmi_hardirq(int irq, void *dev_id)
+{
+	struct rk3066_hdmi *hdmi = dev_id;
+	irqreturn_t ret = IRQ_NONE;
+	u8 interrupt;
+
+	if (rk3066_hdmi_get_power_mode(hdmi) == HDMI_SYS_POWER_MODE_A)
+		hdmi_writeb(hdmi, HDMI_SYS_CTRL, HDMI_SYS_POWER_MODE_B);
+
+	interrupt = hdmi_readb(hdmi, HDMI_INTR_STATUS1);
+	if (interrupt)
+		hdmi_writeb(hdmi, HDMI_INTR_STATUS1, interrupt);
+
+	if (interrupt & HDMI_INTR_EDID_MASK) {
+		hdmi->i2c->stat = interrupt;
+		complete(&hdmi->i2c->cmpltn);
+	}
+
+	if (interrupt & (HDMI_INTR_HOTPLUG | HDMI_INTR_MSENS))
+		ret = IRQ_WAKE_THREAD;
+
+	return ret;
+}
+
+static irqreturn_t rk3066_hdmi_irq(int irq, void *dev_id)
+{
+	struct rk3066_hdmi *hdmi = dev_id;
+
+	drm_helper_hpd_irq_event(hdmi->connector.dev);
+
+	return IRQ_HANDLED;
+}
+
+static int rk3066_hdmi_i2c_read(struct rk3066_hdmi *hdmi, struct i2c_msg *msgs)
+{
+	int length = msgs->len;
+	u8 *buf = msgs->buf;
+	int ret;
+
+	ret = wait_for_completion_timeout(&hdmi->i2c->cmpltn, HZ / 10);
+	if (!ret || hdmi->i2c->stat & HDMI_INTR_EDID_ERR)
+		return -EAGAIN;
+
+	while (length--)
+		*buf++ = hdmi_readb(hdmi, HDMI_DDC_READ_FIFO_ADDR);
+
+	return 0;
+}
+
+static int rk3066_hdmi_i2c_write(struct rk3066_hdmi *hdmi, struct i2c_msg *msgs)
+{
+	/*
+	 * The DDC module only supports read EDID message, so
+	 * we assume that each word write to this i2c adapter
+	 * should be the offset of the EDID word address.
+	 */
+	if (msgs->len != 1 ||
+	    (msgs->addr != DDC_ADDR && msgs->addr != DDC_SEGMENT_ADDR))
+		return -EINVAL;
+
+	reinit_completion(&hdmi->i2c->cmpltn);
+
+	if (msgs->addr == DDC_SEGMENT_ADDR)
+		hdmi->i2c->segment_addr = msgs->buf[0];
+	if (msgs->addr == DDC_ADDR)
+		hdmi->i2c->ddc_addr = msgs->buf[0];
+
+	/* Set edid word address 0x00/0x80. */
+	hdmi_writeb(hdmi, HDMI_EDID_WORD_ADDR, hdmi->i2c->ddc_addr);
+
+	/* Set edid segment pointer. */
+	hdmi_writeb(hdmi, HDMI_EDID_SEGMENT_POINTER, hdmi->i2c->segment_addr);
+
+	return 0;
+}
+
+static int rk3066_hdmi_i2c_xfer(struct i2c_adapter *adap,
+				struct i2c_msg *msgs, int num)
+{
+	struct rk3066_hdmi *hdmi = i2c_get_adapdata(adap);
+	struct rk3066_hdmi_i2c *i2c = hdmi->i2c;
+	int i, ret = 0;
+
+	mutex_lock(&i2c->i2c_lock);
+
+	rk3066_hdmi_i2c_init(hdmi);
+
+	/* Unmute HDMI EDID interrupt. */
+	hdmi_modb(hdmi, HDMI_INTR_MASK1,
+		  HDMI_INTR_EDID_MASK, HDMI_INTR_EDID_MASK);
+	i2c->stat = 0;
+
+	for (i = 0; i < num; i++) {
+		DRM_DEV_DEBUG(hdmi->dev,
+			      "xfer: num: %d/%d, len: %d, flags: %#x\n",
+			      i + 1, num, msgs[i].len, msgs[i].flags);
+
+		if (msgs[i].flags & I2C_M_RD)
+			ret = rk3066_hdmi_i2c_read(hdmi, &msgs[i]);
+		else
+			ret = rk3066_hdmi_i2c_write(hdmi, &msgs[i]);
+
+		if (ret < 0)
+			break;
+	}
+
+	if (!ret)
+		ret = num;
+
+	/* Mute HDMI EDID interrupt. */
+	hdmi_modb(hdmi, HDMI_INTR_MASK1, HDMI_INTR_EDID_MASK, 0);
+
+	mutex_unlock(&i2c->i2c_lock);
+
+	return ret;
+}
+
+static u32 rk3066_hdmi_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm rk3066_hdmi_algorithm = {
+	.master_xfer   = rk3066_hdmi_i2c_xfer,
+	.functionality = rk3066_hdmi_i2c_func,
+};
+
+static struct i2c_adapter *rk3066_hdmi_i2c_adapter(struct rk3066_hdmi *hdmi)
+{
+	struct i2c_adapter *adap;
+	struct rk3066_hdmi_i2c *i2c;
+	int ret;
+
+	i2c = devm_kzalloc(hdmi->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&i2c->i2c_lock);
+	init_completion(&i2c->cmpltn);
+
+	adap = &i2c->adap;
+	adap->class = I2C_CLASS_DDC;
+	adap->owner = THIS_MODULE;
+	adap->dev.parent = hdmi->dev;
+	adap->dev.of_node = hdmi->dev->of_node;
+	adap->algo = &rk3066_hdmi_algorithm;
+	strlcpy(adap->name, "RK3066 HDMI", sizeof(adap->name));
+	i2c_set_adapdata(adap, hdmi);
+
+	ret = i2c_add_adapter(adap);
+	if (ret) {
+		DRM_DEV_ERROR(hdmi->dev, "cannot add %s I2C adapter\n",
+			      adap->name);
+		devm_kfree(hdmi->dev, i2c);
+		return ERR_PTR(ret);
+	}
+
+	hdmi->i2c = i2c;
+
+	DRM_DEV_DEBUG(hdmi->dev, "registered %s I2C bus driver\n", adap->name);
+
+	return adap;
+}
+
+static int rk3066_hdmi_bind(struct device *dev, struct device *master,
+			    void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct drm_device *drm = data;
+	struct rk3066_hdmi *hdmi;
+	struct resource *iores;
+	int irq;
+	int ret;
+
+	hdmi = devm_kzalloc(dev, sizeof(*hdmi), GFP_KERNEL);
+	if (!hdmi)
+		return -ENOMEM;
+
+	hdmi->dev = dev;
+	hdmi->drm_dev = drm;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -ENXIO;
+
+	hdmi->regs = devm_ioremap_resource(dev, iores);
+	if (IS_ERR(hdmi->regs))
+		return PTR_ERR(hdmi->regs);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	hdmi->hclk = devm_clk_get(dev, "hclk");
+	if (IS_ERR(hdmi->hclk)) {
+		DRM_DEV_ERROR(dev, "unable to get HDMI hclk clock\n");
+		return PTR_ERR(hdmi->hclk);
+	}
+
+	ret = clk_prepare_enable(hdmi->hclk);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "cannot enable HDMI hclk clock: %d\n", ret);
+		return ret;
+	}
+
+	hdmi->grf_regmap = syscon_regmap_lookup_by_phandle(dev->of_node,
+							   "rockchip,grf");
+	if (IS_ERR(hdmi->grf_regmap)) {
+		DRM_DEV_ERROR(dev, "unable to get rockchip,grf\n");
+		ret = PTR_ERR(hdmi->grf_regmap);
+		goto err_disable_hclk;
+	}
+
+	/* internal hclk = hdmi_hclk / 25 */
+	hdmi_writeb(hdmi, HDMI_INTERNAL_CLK_DIVIDER, 25);
+
+	hdmi->ddc = rk3066_hdmi_i2c_adapter(hdmi);
+	if (IS_ERR(hdmi->ddc)) {
+		ret = PTR_ERR(hdmi->ddc);
+		hdmi->ddc = NULL;
+		goto err_disable_hclk;
+	}
+
+	rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_B);
+	usleep_range(999, 1000);
+	hdmi_writeb(hdmi, HDMI_INTR_MASK1, HDMI_INTR_HOTPLUG);
+	hdmi_writeb(hdmi, HDMI_INTR_MASK2, 0);
+	hdmi_writeb(hdmi, HDMI_INTR_MASK3, 0);
+	hdmi_writeb(hdmi, HDMI_INTR_MASK4, 0);
+	rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_A);
+
+	ret = rk3066_hdmi_register(drm, hdmi);
+	if (ret)
+		goto err_disable_i2c;
+
+	dev_set_drvdata(dev, hdmi);
+
+	ret = devm_request_threaded_irq(dev, irq, rk3066_hdmi_hardirq,
+					rk3066_hdmi_irq, IRQF_SHARED,
+					dev_name(dev), hdmi);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "failed to request hdmi irq: %d\n", ret);
+		goto err_cleanup_hdmi;
+	}
+
+	return 0;
+
+err_cleanup_hdmi:
+	hdmi->connector.funcs->destroy(&hdmi->connector);
+	hdmi->encoder.funcs->destroy(&hdmi->encoder);
+err_disable_i2c:
+	i2c_put_adapter(hdmi->ddc);
+err_disable_hclk:
+	clk_disable_unprepare(hdmi->hclk);
+
+	return ret;
+}
+
+static void rk3066_hdmi_unbind(struct device *dev, struct device *master,
+			       void *data)
+{
+	struct rk3066_hdmi *hdmi = dev_get_drvdata(dev);
+
+	hdmi->connector.funcs->destroy(&hdmi->connector);
+	hdmi->encoder.funcs->destroy(&hdmi->encoder);
+
+	i2c_put_adapter(hdmi->ddc);
+	clk_disable_unprepare(hdmi->hclk);
+}
+
+static const struct component_ops rk3066_hdmi_ops = {
+	.bind   = rk3066_hdmi_bind,
+	.unbind = rk3066_hdmi_unbind,
+};
+
+static int rk3066_hdmi_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &rk3066_hdmi_ops);
+}
+
+static int rk3066_hdmi_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &rk3066_hdmi_ops);
+
+	return 0;
+}
+
+static const struct of_device_id rk3066_hdmi_dt_ids[] = {
+	{ .compatible = "rockchip,rk3066-hdmi" },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, rk3066_hdmi_dt_ids);
+
+struct platform_driver rk3066_hdmi_driver = {
+	.probe  = rk3066_hdmi_probe,
+	.remove = rk3066_hdmi_remove,
+	.driver = {
+		.name = "rockchip-rk3066-hdmi",
+		.of_match_table = rk3066_hdmi_dt_ids,
+	},
+};
diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.h b/drivers/gpu/drm/rockchip/rk3066_hdmi.h
new file mode 100644
index 000000000000..39a31c62a428
--- /dev/null
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
+ *    Zheng Yang <zhengyang@rock-chips.com>
+ */
+
+#ifndef __RK3066_HDMI_H__
+#define __RK3066_HDMI_H__
+
+#define GRF_SOC_CON0				0x150
+#define HDMI_VIDEO_SEL				BIT(14)
+
+#define DDC_SEGMENT_ADDR			0x30
+#define HDMI_SCL_RATE				(50 * 1000)
+#define HDMI_MAXIMUM_INFO_FRAME_SIZE		0x11
+
+#define N_32K					0x1000
+#define N_441K					0x1880
+#define N_882K					0x3100
+#define N_1764K					0x6200
+#define N_48K					0x1800
+#define N_96K					0x3000
+#define N_192K					0x6000
+
+#define HDMI_SYS_CTRL				0x000
+#define HDMI_LR_SWAP_N3				0x004
+#define HDMI_N2					0x008
+#define HDMI_N1					0x00c
+#define HDMI_SPDIF_FS_CTS_INT3			0x010
+#define HDMI_CTS_INT2				0x014
+#define HDMI_CTS_INT1				0x018
+#define HDMI_CTS_EXT3				0x01c
+#define HDMI_CTS_EXT2				0x020
+#define HDMI_CTS_EXT1				0x024
+#define HDMI_AUDIO_CTRL1			0x028
+#define HDMI_AUDIO_CTRL2			0x02c
+#define HDMI_I2S_AUDIO_CTRL			0x030
+#define HDMI_I2S_SWAP				0x040
+#define HDMI_AUDIO_STA_BIT_CTRL1		0x044
+#define HDMI_AUDIO_STA_BIT_CTRL2		0x048
+#define HDMI_AUDIO_SRC_NUM_AND_LENGTH		0x050
+#define HDMI_AV_CTRL1				0x054
+#define HDMI_VIDEO_CTRL1			0x058
+#define HDMI_DEEP_COLOR_MODE			0x05c
+
+#define HDMI_EXT_VIDEO_PARA			0x0c0
+#define HDMI_EXT_HTOTAL_L			0x0c4
+#define HDMI_EXT_HTOTAL_H			0x0c8
+#define HDMI_EXT_HBLANK_L			0x0cc
+#define HDMI_EXT_HBLANK_H			0x0d0
+#define HDMI_EXT_HDELAY_L			0x0d4
+#define HDMI_EXT_HDELAY_H			0x0d8
+#define HDMI_EXT_HDURATION_L			0x0dc
+#define HDMI_EXT_HDURATION_H			0x0e0
+#define HDMI_EXT_VTOTAL_L			0x0e4
+#define HDMI_EXT_VTOTAL_H			0x0e8
+#define HDMI_AV_CTRL2				0x0ec
+#define HDMI_EXT_VBLANK_L			0x0f4
+#define HDMI_EXT_VBLANK_H			0x10c
+#define HDMI_EXT_VDELAY				0x0f8
+#define HDMI_EXT_VDURATION			0x0fc
+
+#define HDMI_CP_MANU_SEND_CTRL			0x100
+#define HDMI_CP_AUTO_SEND_CTRL			0x104
+#define HDMI_AUTO_CHECKSUM_OPT			0x108
+
+#define HDMI_VIDEO_CTRL2			0x114
+
+#define HDMI_PHY_OPTION				0x144
+
+#define HDMI_CP_BUF_INDEX			0x17c
+#define HDMI_CP_BUF_ACC_HB0			0x180
+#define HDMI_CP_BUF_ACC_HB1			0x184
+#define HDMI_CP_BUF_ACC_HB2			0x188
+#define HDMI_CP_BUF_ACC_PB0			0x18c
+
+#define HDMI_DDC_READ_FIFO_ADDR			0x200
+#define HDMI_DDC_BUS_FREQ_L			0x204
+#define HDMI_DDC_BUS_FREQ_H			0x208
+#define HDMI_DDC_BUS_CTRL			0x2dc
+#define HDMI_DDC_I2C_LEN			0x278
+#define HDMI_DDC_I2C_OFFSET			0x280
+#define HDMI_DDC_I2C_CTRL			0x284
+#define HDMI_DDC_I2C_READ_BUF0			0x288
+#define HDMI_DDC_I2C_READ_BUF1			0x28c
+#define HDMI_DDC_I2C_READ_BUF2			0x290
+#define HDMI_DDC_I2C_READ_BUF3			0x294
+#define HDMI_DDC_I2C_WRITE_BUF0			0x298
+#define HDMI_DDC_I2C_WRITE_BUF1			0x29c
+#define HDMI_DDC_I2C_WRITE_BUF2			0x2a0
+#define HDMI_DDC_I2C_WRITE_BUF3			0x2a4
+#define HDMI_DDC_I2C_WRITE_BUF4			0x2ac
+#define HDMI_DDC_I2C_WRITE_BUF5			0x2b0
+#define HDMI_DDC_I2C_WRITE_BUF6			0x2b4
+
+#define HDMI_INTR_MASK1				0x248
+#define HDMI_INTR_MASK2				0x24c
+#define HDMI_INTR_STATUS1			0x250
+#define HDMI_INTR_STATUS2			0x254
+#define HDMI_INTR_MASK3				0x258
+#define HDMI_INTR_MASK4				0x25c
+#define HDMI_INTR_STATUS3			0x260
+#define HDMI_INTR_STATUS4			0x264
+
+#define HDMI_HDCP_CTRL				0x2bc
+
+#define HDMI_EDID_SEGMENT_POINTER		0x310
+#define HDMI_EDID_WORD_ADDR			0x314
+#define HDMI_EDID_FIFO_ADDR			0x318
+
+#define HDMI_HPG_MENS_STA			0x37c
+
+#define HDMI_INTERNAL_CLK_DIVIDER		0x800
+
+enum {
+	/* HDMI_SYS_CTRL */
+	HDMI_SYS_POWER_MODE_MASK = 0xf0,
+	HDMI_SYS_POWER_MODE_A = 0x10,
+	HDMI_SYS_POWER_MODE_B = 0x20,
+	HDMI_SYS_POWER_MODE_D = 0x40,
+	HDMI_SYS_POWER_MODE_E = 0x80,
+	HDMI_SYS_PLL_RESET_MASK = 0x0c,
+	HDMI_SYS_PLL_RESET = 0x0c,
+	HDMI_SYS_PLLB_RESET = 0x08,
+
+	/* HDMI_LR_SWAP_N3 */
+	HDMI_AUDIO_LR_SWAP_MASK = 0xf0,
+	HDMI_AUDIO_LR_SWAP_SUBPACKET0 = 0x10,
+	HDMI_AUDIO_LR_SWAP_SUBPACKET1 = 0x20,
+	HDMI_AUDIO_LR_SWAP_SUBPACKET2 = 0x40,
+	HDMI_AUDIO_LR_SWAP_SUBPACKET3 = 0x80,
+	HDMI_AUDIO_N_19_16_MASK = 0x0f,
+
+	/* HDMI_AUDIO_CTRL1 */
+	HDMI_AUDIO_EXTERNAL_CTS = BIT(7),
+	HDMI_AUDIO_INPUT_IIS = 0,
+	HDMI_AUDIO_INPUT_SPDIF = 0x08,
+	HDMI_AUDIO_INPUT_MCLK_ACTIVE = 0x04,
+	HDMI_AUDIO_INPUT_MCLK_DEACTIVE = 0,
+	HDMI_AUDIO_INPUT_MCLK_RATE_128X = 0,
+	HDMI_AUDIO_INPUT_MCLK_RATE_256X = 1,
+	HDMI_AUDIO_INPUT_MCLK_RATE_384X = 2,
+	HDMI_AUDIO_INPUT_MCLK_RATE_512X = 3,
+
+	/* HDMI_I2S_AUDIO_CTRL */
+	HDMI_AUDIO_I2S_FORMAT_STANDARD = 0,
+	HDMI_AUDIO_I2S_CHANNEL_1_2 = 0x04,
+	HDMI_AUDIO_I2S_CHANNEL_3_4 = 0x0c,
+	HDMI_AUDIO_I2S_CHANNEL_5_6 = 0x1c,
+	HDMI_AUDIO_I2S_CHANNEL_7_8 = 0x3c,
+
+	/* HDMI_AV_CTRL1 */
+	HDMI_AUDIO_SAMPLE_FRE_MASK = 0xf0,
+	HDMI_AUDIO_SAMPLE_FRE_32000 = 0x30,
+	HDMI_AUDIO_SAMPLE_FRE_44100 = 0,
+	HDMI_AUDIO_SAMPLE_FRE_48000 = 0x20,
+	HDMI_AUDIO_SAMPLE_FRE_88200 = 0x80,
+	HDMI_AUDIO_SAMPLE_FRE_96000 = 0xa0,
+	HDMI_AUDIO_SAMPLE_FRE_176400 = 0xc0,
+	HDMI_AUDIO_SAMPLE_FRE_192000 = 0xe0,
+	HDMI_AUDIO_SAMPLE_FRE_768000 = 0x90,
+
+	HDMI_VIDEO_INPUT_FORMAT_MASK = 0x0e,
+	HDMI_VIDEO_INPUT_RGB_YCBCR444 = 0,
+	HDMI_VIDEO_INPUT_YCBCR422 = 0x02,
+	HDMI_VIDEO_DE_MASK = 0x1,
+	HDMI_VIDEO_INTERNAL_DE = 0,
+	HDMI_VIDEO_EXTERNAL_DE = 0x01,
+
+	/* HDMI_VIDEO_CTRL1 */
+	HDMI_VIDEO_OUTPUT_FORMAT_MASK = 0xc0,
+	HDMI_VIDEO_OUTPUT_RGB444 = 0,
+	HDMI_VIDEO_OUTPUT_YCBCR444 = 0x40,
+	HDMI_VIDEO_OUTPUT_YCBCR422 = 0x80,
+	HDMI_VIDEO_INPUT_DATA_DEPTH_MASK = 0x30,
+	HDMI_VIDEO_INPUT_DATA_DEPTH_12BIT = 0,
+	HDMI_VIDEO_INPUT_DATA_DEPTH_10BIT = 0x10,
+	HDMI_VIDEO_INPUT_DATA_DEPTH_8BIT = 0x30,
+	HDMI_VIDEO_INPUT_COLOR_MASK = 1,
+	HDMI_VIDEO_INPUT_COLOR_RGB = 0,
+	HDMI_VIDEO_INPUT_COLOR_YCBCR = 1,
+
+	/* HDMI_EXT_VIDEO_PARA */
+	HDMI_VIDEO_VSYNC_OFFSET_SHIFT = 4,
+	HDMI_VIDEO_VSYNC_ACTIVE_HIGH = BIT(3),
+	HDMI_VIDEO_VSYNC_ACTIVE_LOW = 0,
+	HDMI_VIDEO_HSYNC_ACTIVE_HIGH = BIT(2),
+	HDMI_VIDEO_HSYNC_ACTIVE_LOW = 0,
+	HDMI_VIDEO_MODE_INTERLACE = BIT(1),
+	HDMI_VIDEO_MODE_PROGRESSIVE = 0,
+	HDMI_EXT_VIDEO_SET_EN = BIT(0),
+
+	/* HDMI_CP_AUTO_SEND_CTRL */
+
+	/* HDMI_VIDEO_CTRL2 */
+	HDMI_VIDEO_AV_MUTE_MASK = 0xc0,
+	HDMI_VIDEO_CLR_AV_MUTE = BIT(7),
+	HDMI_VIDEO_SET_AV_MUTE = BIT(6),
+	HDMI_AUDIO_CP_LOGIC_RESET_MASK = BIT(2),
+	HDMI_AUDIO_CP_LOGIC_RESET = BIT(2),
+	HDMI_VIDEO_AUDIO_DISABLE_MASK = 0x3,
+	HDMI_AUDIO_DISABLE = BIT(1),
+	HDMI_VIDEO_DISABLE = BIT(0),
+
+	/* HDMI_CP_BUF_INDEX */
+	HDMI_INFOFRAME_VSI = 0x05,
+	HDMI_INFOFRAME_AVI = 0x06,
+	HDMI_INFOFRAME_AAI = 0x08,
+
+	/* HDMI_INTR_MASK1 */
+	/* HDMI_INTR_STATUS1 */
+	HDMI_INTR_HOTPLUG = BIT(7),
+	HDMI_INTR_MSENS = BIT(6),
+	HDMI_INTR_VSYNC = BIT(5),
+	HDMI_INTR_AUDIO_FIFO_FULL = BIT(4),
+	HDMI_INTR_EDID_MASK = 0x6,
+	HDMI_INTR_EDID_READY = BIT(2),
+	HDMI_INTR_EDID_ERR = BIT(1),
+
+	/* HDMI_HDCP_CTRL */
+	HDMI_VIDEO_MODE_MASK = BIT(1),
+	HDMI_VIDEO_MODE_HDMI = BIT(1),
+
+	/* HDMI_HPG_MENS_STA */
+	HDMI_HPG_IN_STATUS_HIGH = BIT(7),
+	HDMI_MSENS_IN_STATUS_HIGH = BIT(6),
+};
+
+#endif /* __RK3066_HDMI_H__ */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index d7fa17f12769..8d7a634c12c2 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -486,6 +486,8 @@ static int __init rockchip_drm_init(void)
 	ADD_ROCKCHIP_SUB_DRIVER(dw_mipi_dsi_rockchip_driver,
 				CONFIG_ROCKCHIP_DW_MIPI_DSI);
 	ADD_ROCKCHIP_SUB_DRIVER(inno_hdmi_driver, CONFIG_ROCKCHIP_INNO_HDMI);
+	ADD_ROCKCHIP_SUB_DRIVER(rk3066_hdmi_driver,
+				CONFIG_ROCKCHIP_RK3066_HDMI);
 
 	ret = platform_register_drivers(rockchip_sub_drivers,
 					num_rockchip_sub_drivers);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index ce48568ec8a0..e4bc4322bc3f 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -73,4 +73,5 @@ extern struct platform_driver inno_hdmi_driver;
 extern struct platform_driver rockchip_dp_driver;
 extern struct platform_driver rockchip_lvds_driver;
 extern struct platform_driver vop_platform_driver;
+extern struct platform_driver rk3066_hdmi_driver;
 #endif /* _ROCKCHIP_DRM_DRV_H_ */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index c7d4c6073ea5..a7cbf6c9a153 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -1029,6 +1029,7 @@ static void vop_crtc_atomic_enable(struct drm_crtc *crtc,
 	u16 vact_st = adjusted_mode->vtotal - adjusted_mode->vsync_start;
 	u16 vact_end = vact_st + vdisplay;
 	uint32_t pin_pol, val;
+	int dither_bpc = s->output_bpc ? s->output_bpc : 10;
 	int ret;
 
 	mutex_lock(&vop->vop_lock);
@@ -1086,11 +1087,19 @@ static void vop_crtc_atomic_enable(struct drm_crtc *crtc,
 	    !(vop_data->feature & VOP_FEATURE_OUTPUT_RGB10))
 		s->output_mode = ROCKCHIP_OUT_MODE_P888;
 
-	if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA && s->output_bpc == 8)
+	if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA && dither_bpc <= 8)
 		VOP_REG_SET(vop, common, pre_dither_down, 1);
 	else
 		VOP_REG_SET(vop, common, pre_dither_down, 0);
 
+	if (dither_bpc == 6) {
+		VOP_REG_SET(vop, common, dither_down_sel, DITHER_DOWN_ALLEGRO);
+		VOP_REG_SET(vop, common, dither_down_mode, RGB888_TO_RGB666);
+		VOP_REG_SET(vop, common, dither_down_en, 1);
+	} else {
+		VOP_REG_SET(vop, common, dither_down_en, 0);
+	}
+
 	VOP_REG_SET(vop, common, out_mode, s->output_mode);
 
 	VOP_REG_SET(vop, modeset, htotal_pw, (htotal << 16) | hsync_len);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 04ed401d2325..e64351dab610 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -71,7 +71,9 @@ struct vop_common {
 	struct vop_reg dsp_blank;
 	struct vop_reg data_blank;
 	struct vop_reg pre_dither_down;
-	struct vop_reg dither_down;
+	struct vop_reg dither_down_sel;
+	struct vop_reg dither_down_mode;
+	struct vop_reg dither_down_en;
 	struct vop_reg dither_up;
 	struct vop_reg gate_en;
 	struct vop_reg mmu_en;
@@ -287,6 +289,16 @@ enum scale_down_mode {
 	SCALE_DOWN_AVG = 0x1
 };
 
+enum dither_down_mode {
+	RGB888_TO_RGB565 = 0x0,
+	RGB888_TO_RGB666 = 0x1
+};
+
+enum dither_down_mode_sel {
+	DITHER_DOWN_ALLEGRO = 0x0,
+	DITHER_DOWN_FRC = 0x1
+};
+
 enum vop_pol {
 	HSYNC_POSITIVE = 0,
 	VSYNC_POSITIVE = 1,
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index bd76328c0fdb..e732b73033c8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -137,6 +137,9 @@ static const struct vop_common rk3036_common = {
 	.standby = VOP_REG_SYNC(RK3036_SYS_CTRL, 0x1, 30),
 	.out_mode = VOP_REG(RK3036_DSP_CTRL0, 0xf, 0),
 	.dsp_blank = VOP_REG(RK3036_DSP_CTRL1, 0x1, 24),
+	.dither_down_sel = VOP_REG(RK3036_DSP_CTRL0, 0x1, 27),
+	.dither_down_en = VOP_REG(RK3036_DSP_CTRL0, 0x1, 11),
+	.dither_down_mode = VOP_REG(RK3036_DSP_CTRL0, 0x1, 10),
 	.cfg_done = VOP_REG_SYNC(RK3036_REG_CFG_DONE, 0x1, 0),
 };
 
@@ -200,6 +203,9 @@ static const struct vop_common px30_common = {
 	.standby = VOP_REG_SYNC(PX30_SYS_CTRL2, 0x1, 1),
 	.out_mode = VOP_REG(PX30_DSP_CTRL2, 0xf, 16),
 	.dsp_blank = VOP_REG(PX30_DSP_CTRL2, 0x1, 14),
+	.dither_down_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 8),
+	.dither_down_sel = VOP_REG(PX30_DSP_CTRL2, 0x1, 7),
+	.dither_down_mode = VOP_REG(PX30_DSP_CTRL2, 0x1, 6),
 	.cfg_done = VOP_REG_SYNC(PX30_REG_CFG_DONE, 0x1, 0),
 };
 
@@ -365,6 +371,8 @@ static const struct vop_common rk3066_common = {
 	.standby = VOP_REG(RK3066_SYS_CTRL0, 0x1, 1),
 	.out_mode = VOP_REG(RK3066_DSP_CTRL0, 0xf, 0),
 	.cfg_done = VOP_REG(RK3066_REG_CFG_DONE, 0x1, 0),
+	.dither_down_en = VOP_REG(RK3066_DSP_CTRL0, 0x1, 11),
+	.dither_down_mode = VOP_REG(RK3066_DSP_CTRL0, 0x1, 10),
 	.dsp_blank = VOP_REG(RK3066_DSP_CTRL1, 0x1, 24),
 };
 
@@ -458,6 +466,9 @@ static const struct vop_common rk3188_common = {
 	.standby = VOP_REG(RK3188_SYS_CTRL, 0x1, 30),
 	.out_mode = VOP_REG(RK3188_DSP_CTRL0, 0xf, 0),
 	.cfg_done = VOP_REG(RK3188_REG_CFG_DONE, 0x1, 0),
+	.dither_down_sel = VOP_REG(RK3188_DSP_CTRL0, 0x1, 27),
+	.dither_down_en = VOP_REG(RK3188_DSP_CTRL0, 0x1, 11),
+	.dither_down_mode = VOP_REG(RK3188_DSP_CTRL0, 0x1, 10),
 	.dsp_blank = VOP_REG(RK3188_DSP_CTRL1, 0x3, 24),
 };
 
@@ -585,8 +596,10 @@ static const struct vop_common rk3288_common = {
 	.standby = VOP_REG_SYNC(RK3288_SYS_CTRL, 0x1, 22),
 	.gate_en = VOP_REG(RK3288_SYS_CTRL, 0x1, 23),
 	.mmu_en = VOP_REG(RK3288_SYS_CTRL, 0x1, 20),
+	.dither_down_sel = VOP_REG(RK3288_DSP_CTRL1, 0x1, 4),
+	.dither_down_mode = VOP_REG(RK3288_DSP_CTRL1, 0x1, 3),
+	.dither_down_en = VOP_REG(RK3288_DSP_CTRL1, 0x1, 2),
 	.pre_dither_down = VOP_REG(RK3288_DSP_CTRL1, 0x1, 1),
-	.dither_down = VOP_REG(RK3288_DSP_CTRL1, 0xf, 1),
 	.dither_up = VOP_REG(RK3288_DSP_CTRL1, 0x1, 6),
 	.data_blank = VOP_REG(RK3288_DSP_CTRL0, 0x1, 19),
 	.dsp_blank = VOP_REG(RK3288_DSP_CTRL0, 0x3, 18),
@@ -878,7 +891,10 @@ static const struct vop_misc rk3328_misc = {
 
 static const struct vop_common rk3328_common = {
 	.standby = VOP_REG_SYNC(RK3328_SYS_CTRL, 0x1, 22),
-	.dither_down = VOP_REG(RK3328_DSP_CTRL1, 0xf, 1),
+	.dither_down_sel = VOP_REG(RK3328_DSP_CTRL1, 0x1, 4),
+	.dither_down_mode = VOP_REG(RK3328_DSP_CTRL1, 0x1, 3),
+	.dither_down_en = VOP_REG(RK3328_DSP_CTRL1, 0x1, 2),
+	.pre_dither_down = VOP_REG(RK3328_DSP_CTRL1, 0x1, 1),
 	.dither_up = VOP_REG(RK3328_DSP_CTRL1, 0x1, 6),
 	.dsp_blank = VOP_REG(RK3328_DSP_CTRL0, 0x3, 18),
 	.out_mode = VOP_REG(RK3328_DSP_CTRL0, 0xf, 0),
diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c
index 0a7f933ab007..5834ef56fbaa 100644
--- a/drivers/gpu/drm/stm/drv.c
+++ b/drivers/gpu/drm/stm/drv.c
@@ -129,6 +129,40 @@ static void drv_unload(struct drm_device *ddev)
 	drm_mode_config_cleanup(ddev);
 }
 
+static __maybe_unused int drv_suspend(struct device *dev)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct ltdc_device *ldev = ddev->dev_private;
+	struct drm_atomic_state *state;
+
+	drm_kms_helper_poll_disable(ddev);
+	state = drm_atomic_helper_suspend(ddev);
+	if (IS_ERR(state)) {
+		drm_kms_helper_poll_enable(ddev);
+		return PTR_ERR(state);
+	}
+	ldev->suspend_state = state;
+	ltdc_suspend(ddev);
+
+	return 0;
+}
+
+static __maybe_unused int drv_resume(struct device *dev)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	ltdc_resume(ddev);
+	drm_atomic_helper_resume(ddev, ldev->suspend_state);
+	drm_kms_helper_poll_enable(ddev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops drv_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(drv_suspend, drv_resume)
+};
+
 static int stm_drm_platform_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -186,6 +220,7 @@ static struct platform_driver stm_drm_platform_driver = {
 	.driver = {
 		.name = "stm32-display",
 		.of_match_table = drv_dt_ids,
+		.pm = &drv_pm_ops,
 	},
 };
 
diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
index a672b59a2226..1bef73e8c8fe 100644
--- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
+++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
@@ -356,12 +356,40 @@ static int dw_mipi_dsi_stm_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int __maybe_unused dw_mipi_dsi_stm_suspend(struct device *dev)
+{
+	struct dw_mipi_dsi_stm *dsi = dw_mipi_dsi_stm_plat_data.priv_data;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	clk_disable_unprepare(dsi->pllref_clk);
+
+	return 0;
+}
+
+static int __maybe_unused dw_mipi_dsi_stm_resume(struct device *dev)
+{
+	struct dw_mipi_dsi_stm *dsi = dw_mipi_dsi_stm_plat_data.priv_data;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	clk_prepare_enable(dsi->pllref_clk);
+
+	return 0;
+}
+
+static const struct dev_pm_ops dw_mipi_dsi_stm_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(dw_mipi_dsi_stm_suspend,
+				dw_mipi_dsi_stm_resume)
+};
+
 static struct platform_driver dw_mipi_dsi_stm_driver = {
 	.probe		= dw_mipi_dsi_stm_probe,
 	.remove		= dw_mipi_dsi_stm_remove,
 	.driver		= {
 		.of_match_table = dw_mipi_dsi_stm_dt_ids,
 		.name	= "stm32-display-dsi",
+		.pm = &dw_mipi_dsi_stm_pm_ops,
 	},
 };
 
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index b1741a9d5be2..32fd6a3b37fb 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -1062,6 +1062,30 @@ static int ltdc_get_caps(struct drm_device *ddev)
 	return 0;
 }
 
+void ltdc_suspend(struct drm_device *ddev)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+
+	DRM_DEBUG_DRIVER("\n");
+	clk_disable_unprepare(ldev->pixel_clk);
+}
+
+int ltdc_resume(struct drm_device *ddev)
+{
+	struct ltdc_device *ldev = ddev->dev_private;
+	int ret;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	ret = clk_prepare_enable(ldev->pixel_clk);
+	if (ret) {
+		DRM_ERROR("failed to enable pixel clock (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 int ltdc_load(struct drm_device *ddev)
 {
 	struct platform_device *pdev = to_platform_device(ddev->dev);
diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h
index e46f477a8494..a1ad0ae3b006 100644
--- a/drivers/gpu/drm/stm/ltdc.h
+++ b/drivers/gpu/drm/stm/ltdc.h
@@ -36,6 +36,7 @@ struct ltdc_device {
 	u32 error_status;
 	u32 irq_status;
 	struct fps_info plane_fpsi[LTDC_MAX_LAYER];
+	struct drm_atomic_state *suspend_state;
 };
 
 bool ltdc_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
@@ -45,5 +46,7 @@ bool ltdc_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 
 int ltdc_load(struct drm_device *ddev);
 void ltdc_unload(struct drm_device *ddev);
+void ltdc_suspend(struct drm_device *ddev);
+int ltdc_resume(struct drm_device *ddev);
 
 #endif
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi.h b/drivers/gpu/drm/sun4i/sun4i_hdmi.h
index b685ee11623d..b08c4453d47c 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi.h
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi.h
@@ -269,6 +269,7 @@ struct sun4i_hdmi {
 	struct clk		*tmds_clk;
 
 	struct i2c_adapter	*i2c;
+	struct i2c_adapter	*ddc_i2c;
 
 	/* Regmap fields for I2C adapter */
 	struct regmap_field	*field_ddc_en;
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index d18862629301..8c122e637697 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -217,7 +217,7 @@ static int sun4i_hdmi_get_modes(struct drm_connector *connector)
 	struct edid *edid;
 	int ret;
 
-	edid = drm_get_edid(connector, hdmi->i2c);
+	edid = drm_get_edid(connector, hdmi->ddc_i2c ?: hdmi->i2c);
 	if (!edid)
 		return 0;
 
@@ -233,6 +233,28 @@ static int sun4i_hdmi_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
+static struct i2c_adapter *sun4i_hdmi_get_ddc(struct device *dev)
+{
+	struct device_node *phandle, *remote;
+	struct i2c_adapter *ddc;
+
+	remote = of_graph_get_remote_node(dev->of_node, 1, -1);
+	if (!remote)
+		return ERR_PTR(-EINVAL);
+
+	phandle = of_parse_phandle(remote, "ddc-i2c-bus", 0);
+	of_node_put(remote);
+	if (!phandle)
+		return ERR_PTR(-ENODEV);
+
+	ddc = of_get_i2c_adapter_by_node(phandle);
+	of_node_put(phandle);
+	if (!ddc)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	return ddc;
+}
+
 static const struct drm_connector_helper_funcs sun4i_hdmi_connector_helper_funcs = {
 	.get_modes	= sun4i_hdmi_get_modes,
 };
@@ -580,6 +602,15 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master,
 		goto err_disable_mod_clk;
 	}
 
+	hdmi->ddc_i2c = sun4i_hdmi_get_ddc(dev);
+	if (IS_ERR(hdmi->ddc_i2c)) {
+		ret = PTR_ERR(hdmi->ddc_i2c);
+		if (ret == -ENODEV)
+			hdmi->ddc_i2c = NULL;
+		else
+			goto err_del_i2c_adapter;
+	}
+
 	drm_encoder_helper_add(&hdmi->encoder,
 			       &sun4i_hdmi_helper_funcs);
 	ret = drm_encoder_init(drm,
@@ -589,14 +620,14 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master,
 			       NULL);
 	if (ret) {
 		dev_err(dev, "Couldn't initialise the HDMI encoder\n");
-		goto err_del_i2c_adapter;
+		goto err_put_ddc_i2c;
 	}
 
 	hdmi->encoder.possible_crtcs = drm_of_find_possible_crtcs(drm,
 								  dev->of_node);
 	if (!hdmi->encoder.possible_crtcs) {
 		ret = -EPROBE_DEFER;
-		goto err_del_i2c_adapter;
+		goto err_put_ddc_i2c;
 	}
 
 #ifdef CONFIG_DRM_SUN4I_HDMI_CEC
@@ -635,6 +666,8 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master,
 err_cleanup_connector:
 	cec_delete_adapter(hdmi->cec_adap);
 	drm_encoder_cleanup(&hdmi->encoder);
+err_put_ddc_i2c:
+	i2c_put_adapter(hdmi->ddc_i2c);
 err_del_i2c_adapter:
 	i2c_del_adapter(hdmi->i2c);
 err_disable_mod_clk:
@@ -655,6 +688,7 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master,
 	drm_connector_cleanup(&hdmi->connector);
 	drm_encoder_cleanup(&hdmi->encoder);
 	i2c_del_adapter(hdmi->i2c);
+	i2c_put_adapter(hdmi->ddc_i2c);
 	clk_disable_unprepare(hdmi->mod_clk);
 	clk_disable_unprepare(hdmi->bus_clk);
 }
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index c0219ebb4284..a22b75a3a533 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -130,8 +130,8 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
 	int ret;
 
 	shmem_obj = drm_gem_shmem_create(dev, unaligned_size);
-	if (!shmem_obj)
-		return NULL;
+	if (IS_ERR(shmem_obj))
+		return ERR_CAST(shmem_obj);
 	bo = to_v3d_bo(&shmem_obj->base);
 
 	ret = v3d_bo_create_finish(&shmem_obj->base);
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index d600628bb5c1..a06b05f714a5 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -102,6 +102,8 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
 			return -EINVAL;
 
 		ret = pm_runtime_get_sync(v3d->dev);
+		if (ret < 0)
+			return ret;
 		if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 &&
 		    args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) {
 			args->value = V3D_CORE_READ(0, offset);
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 7b0fe6240f7d..e9d4a2fdcf44 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -163,7 +163,7 @@ struct v3d_job {
 	struct dma_fence *in_fence;
 
 	/* v3d fence to be signaled by IRQ handler when the job is complete. */
-	struct dma_fence *done_fence;
+	struct dma_fence *irq_fence;
 
 	/* GPU virtual addresses of the start/end of the CL job. */
 	u32 start, end;
@@ -210,7 +210,7 @@ struct v3d_tfu_job {
 	struct dma_fence *in_fence;
 
 	/* v3d fence to be signaled by IRQ handler when the job is complete. */
-	struct dma_fence *done_fence;
+	struct dma_fence *irq_fence;
 
 	struct v3d_dev *v3d;
 
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index b84d89c7b3fb..93ff8fcbe475 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -340,8 +340,8 @@ v3d_exec_cleanup(struct kref *ref)
 	dma_fence_put(exec->bin.in_fence);
 	dma_fence_put(exec->render.in_fence);
 
-	dma_fence_put(exec->bin.done_fence);
-	dma_fence_put(exec->render.done_fence);
+	dma_fence_put(exec->bin.irq_fence);
+	dma_fence_put(exec->render.irq_fence);
 
 	dma_fence_put(exec->bin_done_fence);
 	dma_fence_put(exec->render_done_fence);
@@ -374,7 +374,7 @@ v3d_tfu_job_cleanup(struct kref *ref)
 	unsigned int i;
 
 	dma_fence_put(job->in_fence);
-	dma_fence_put(job->done_fence);
+	dma_fence_put(job->irq_fence);
 
 	for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
 		if (job->bo[i])
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index b4d6ae81186d..aa0a180ae700 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -87,7 +87,8 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_OUTOMEM) {
 		/* Note that the OOM status is edge signaled, so the
 		 * interrupt won't happen again until the we actually
-		 * add more memory.
+		 * add more memory.  Also, as of V3D 4.1, FLDONE won't
+		 * be reported until any OOM state has been cleared.
 		 */
 		schedule_work(&v3d->overflow_mem_work);
 		status = IRQ_HANDLED;
@@ -95,7 +96,7 @@ v3d_irq(int irq, void *arg)
 
 	if (intsts & V3D_INT_FLDONE) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->bin_job->bin.done_fence);
+			to_v3d_fence(v3d->bin_job->bin.irq_fence);
 
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -104,7 +105,7 @@ v3d_irq(int irq, void *arg)
 
 	if (intsts & V3D_INT_FRDONE) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->render_job->render.done_fence);
+			to_v3d_fence(v3d->render_job->render.irq_fence);
 
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -140,7 +141,7 @@ v3d_hub_irq(int irq, void *arg)
 
 	if (intsts & V3D_HUB_INT_TFUC) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->tfu_job->done_fence);
+			to_v3d_fence(v3d->tfu_job->irq_fence);
 
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index d0c68b7c8b41..e740f3b99aa5 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -156,9 +156,9 @@ static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
 	if (IS_ERR(fence))
 		return NULL;
 
-	if (job->done_fence)
-		dma_fence_put(job->done_fence);
-	job->done_fence = dma_fence_get(fence);
+	if (job->irq_fence)
+		dma_fence_put(job->irq_fence);
+	job->irq_fence = dma_fence_get(fence);
 
 	trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
@@ -199,9 +199,9 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 		return NULL;
 
 	v3d->tfu_job = job;
-	if (job->done_fence)
-		dma_fence_put(job->done_fence);
-	job->done_fence = dma_fence_get(fence);
+	if (job->irq_fence)
+		dma_fence_put(job->irq_fence);
+	job->irq_fence = dma_fence_get(fence);
 
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 92e3f98d8478..88ebd681d7eb 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -40,7 +40,7 @@ static bool is_user_label(int label)
 	return label >= VC4_BO_TYPE_COUNT;
 }
 
-static void vc4_bo_stats_dump(struct vc4_dev *vc4)
+static void vc4_bo_stats_print(struct drm_printer *p, struct vc4_dev *vc4)
 {
 	int i;
 
@@ -48,58 +48,35 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4)
 		if (!vc4->bo_labels[i].num_allocated)
 			continue;
 
-		DRM_INFO("%30s: %6dkb BOs (%d)\n",
-			 vc4->bo_labels[i].name,
-			 vc4->bo_labels[i].size_allocated / 1024,
-			 vc4->bo_labels[i].num_allocated);
+		drm_printf(p, "%30s: %6dkb BOs (%d)\n",
+			   vc4->bo_labels[i].name,
+			   vc4->bo_labels[i].size_allocated / 1024,
+			   vc4->bo_labels[i].num_allocated);
 	}
 
 	mutex_lock(&vc4->purgeable.lock);
 	if (vc4->purgeable.num)
-		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
-			 vc4->purgeable.size / 1024, vc4->purgeable.num);
+		drm_printf(p, "%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
+			   vc4->purgeable.size / 1024, vc4->purgeable.num);
 
 	if (vc4->purgeable.purged_num)
-		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO",
-			 vc4->purgeable.purged_size / 1024,
-			 vc4->purgeable.purged_num);
+		drm_printf(p, "%30s: %6zdkb BOs (%d)\n", "total purged BO",
+			   vc4->purgeable.purged_size / 1024,
+			   vc4->purgeable.purged_num);
 	mutex_unlock(&vc4->purgeable.lock);
 }
 
-#ifdef CONFIG_DEBUG_FS
-int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
+static int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	int i;
+	struct drm_printer p = drm_seq_file_printer(m);
 
-	mutex_lock(&vc4->bo_lock);
-	for (i = 0; i < vc4->num_labels; i++) {
-		if (!vc4->bo_labels[i].num_allocated)
-			continue;
-
-		seq_printf(m, "%30s: %6dkb BOs (%d)\n",
-			   vc4->bo_labels[i].name,
-			   vc4->bo_labels[i].size_allocated / 1024,
-			   vc4->bo_labels[i].num_allocated);
-	}
-	mutex_unlock(&vc4->bo_lock);
-
-	mutex_lock(&vc4->purgeable.lock);
-	if (vc4->purgeable.num)
-		seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
-			   vc4->purgeable.size / 1024, vc4->purgeable.num);
-
-	if (vc4->purgeable.purged_num)
-		seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "total purged BO",
-			   vc4->purgeable.purged_size / 1024,
-			   vc4->purgeable.purged_num);
-	mutex_unlock(&vc4->purgeable.lock);
+	vc4_bo_stats_print(&p, vc4);
 
 	return 0;
 }
-#endif
 
 /* Takes ownership of *name and returns the appropriate slot for it in
  * the bo_labels[] array, extending it as necessary.
@@ -475,8 +452,9 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
 	}
 
 	if (IS_ERR(cma_obj)) {
+		struct drm_printer p = drm_info_printer(vc4->dev->dev);
 		DRM_ERROR("Failed to allocate from CMA:\n");
-		vc4_bo_stats_dump(vc4);
+		vc4_bo_stats_print(&p, vc4);
 		return ERR_PTR(-ENOMEM);
 	}
 	bo = to_vc4_bo(&cma_obj->base);
@@ -1025,6 +1003,8 @@ int vc4_bo_cache_init(struct drm_device *dev)
 
 	mutex_init(&vc4->bo_lock);
 
+	vc4_debugfs_add_file(dev, "bo_stats", vc4_bo_stats_debugfs, NULL);
+
 	INIT_LIST_HEAD(&vc4->bo_cache.time_list);
 
 	INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 64c964b7c577..b2891ca0e7f4 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -35,6 +35,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_atomic_uapi.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 #include <linux/clk.h>
 #include <drm/drm_fb_cma_helper.h>
@@ -67,67 +68,22 @@ to_vc4_crtc_state(struct drm_crtc_state *crtc_state)
 #define CRTC_WRITE(offset, val) writel(val, vc4_crtc->regs + (offset))
 #define CRTC_READ(offset) readl(vc4_crtc->regs + (offset))
 
-#define CRTC_REG(reg) { reg, #reg }
-static const struct {
-	u32 reg;
-	const char *name;
-} crtc_regs[] = {
-	CRTC_REG(PV_CONTROL),
-	CRTC_REG(PV_V_CONTROL),
-	CRTC_REG(PV_VSYNCD_EVEN),
-	CRTC_REG(PV_HORZA),
-	CRTC_REG(PV_HORZB),
-	CRTC_REG(PV_VERTA),
-	CRTC_REG(PV_VERTB),
-	CRTC_REG(PV_VERTA_EVEN),
-	CRTC_REG(PV_VERTB_EVEN),
-	CRTC_REG(PV_INTEN),
-	CRTC_REG(PV_INTSTAT),
-	CRTC_REG(PV_STAT),
-	CRTC_REG(PV_HACT_ACT),
+static const struct debugfs_reg32 crtc_regs[] = {
+	VC4_REG32(PV_CONTROL),
+	VC4_REG32(PV_V_CONTROL),
+	VC4_REG32(PV_VSYNCD_EVEN),
+	VC4_REG32(PV_HORZA),
+	VC4_REG32(PV_HORZB),
+	VC4_REG32(PV_VERTA),
+	VC4_REG32(PV_VERTB),
+	VC4_REG32(PV_VERTA_EVEN),
+	VC4_REG32(PV_VERTB_EVEN),
+	VC4_REG32(PV_INTEN),
+	VC4_REG32(PV_INTSTAT),
+	VC4_REG32(PV_STAT),
+	VC4_REG32(PV_HACT_ACT),
 };
 
-static void vc4_crtc_dump_regs(struct vc4_crtc *vc4_crtc)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(crtc_regs); i++) {
-		DRM_INFO("0x%04x (%s): 0x%08x\n",
-			 crtc_regs[i].reg, crtc_regs[i].name,
-			 CRTC_READ(crtc_regs[i].reg));
-	}
-}
-
-#ifdef CONFIG_DEBUG_FS
-int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused)
-{
-	struct drm_info_node *node = (struct drm_info_node *)m->private;
-	struct drm_device *dev = node->minor->dev;
-	int crtc_index = (uintptr_t)node->info_ent->data;
-	struct drm_crtc *crtc;
-	struct vc4_crtc *vc4_crtc;
-	int i;
-
-	i = 0;
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		if (i == crtc_index)
-			break;
-		i++;
-	}
-	if (!crtc)
-		return 0;
-	vc4_crtc = to_vc4_crtc(crtc);
-
-	for (i = 0; i < ARRAY_SIZE(crtc_regs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   crtc_regs[i].name, crtc_regs[i].reg,
-			   CRTC_READ(crtc_regs[i].reg));
-	}
-
-	return 0;
-}
-#endif
-
 bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 			     bool in_vblank_irq, int *vpos, int *hpos,
 			     ktime_t *stime, ktime_t *etime,
@@ -434,8 +390,10 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	bool debug_dump_regs = false;
 
 	if (debug_dump_regs) {
-		DRM_INFO("CRTC %d regs before:\n", drm_crtc_index(crtc));
-		vc4_crtc_dump_regs(vc4_crtc);
+		struct drm_printer p = drm_info_printer(&vc4_crtc->pdev->dev);
+		dev_info(&vc4_crtc->pdev->dev, "CRTC %d regs before:\n",
+			 drm_crtc_index(crtc));
+		drm_print_regset32(&p, &vc4_crtc->regset);
 	}
 
 	if (vc4_crtc->channel == 2) {
@@ -476,8 +434,10 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	vc4_crtc_lut_load(crtc);
 
 	if (debug_dump_regs) {
-		DRM_INFO("CRTC %d regs after:\n", drm_crtc_index(crtc));
-		vc4_crtc_dump_regs(vc4_crtc);
+		struct drm_printer p = drm_info_printer(&vc4_crtc->pdev->dev);
+		dev_info(&vc4_crtc->pdev->dev, "CRTC %d regs after:\n",
+			 drm_crtc_index(crtc));
+		drm_print_regset32(&p, &vc4_crtc->regset);
 	}
 }
 
@@ -1083,6 +1043,7 @@ static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = {
 
 static const struct vc4_crtc_data pv0_data = {
 	.hvs_channel = 0,
+	.debugfs_name = "crtc0_regs",
 	.encoder_types = {
 		[PV_CONTROL_CLK_SELECT_DSI] = VC4_ENCODER_TYPE_DSI0,
 		[PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_DPI,
@@ -1091,6 +1052,7 @@ static const struct vc4_crtc_data pv0_data = {
 
 static const struct vc4_crtc_data pv1_data = {
 	.hvs_channel = 2,
+	.debugfs_name = "crtc1_regs",
 	.encoder_types = {
 		[PV_CONTROL_CLK_SELECT_DSI] = VC4_ENCODER_TYPE_DSI1,
 		[PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_SMI,
@@ -1099,6 +1061,7 @@ static const struct vc4_crtc_data pv1_data = {
 
 static const struct vc4_crtc_data pv2_data = {
 	.hvs_channel = 1,
+	.debugfs_name = "crtc2_regs",
 	.encoder_types = {
 		[PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_HDMI,
 		[PV_CONTROL_CLK_SELECT_VEC] = VC4_ENCODER_TYPE_VEC,
@@ -1177,11 +1140,16 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 	if (!match)
 		return -ENODEV;
 	vc4_crtc->data = match->data;
+	vc4_crtc->pdev = pdev;
 
 	vc4_crtc->regs = vc4_ioremap_regs(pdev, 0);
 	if (IS_ERR(vc4_crtc->regs))
 		return PTR_ERR(vc4_crtc->regs);
 
+	vc4_crtc->regset.base = vc4_crtc->regs;
+	vc4_crtc->regset.regs = crtc_regs;
+	vc4_crtc->regset.nregs = ARRAY_SIZE(crtc_regs);
+
 	/* For now, we create just the primary and the legacy cursor
 	 * planes.  We should be able to stack more planes on easily,
 	 * but to do that we would need to compute the bandwidth
@@ -1255,6 +1223,9 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 
 	platform_set_drvdata(pdev, vc4_crtc);
 
+	vc4_debugfs_add_regset32(drm, vc4_crtc->data->debugfs_name,
+				 &vc4_crtc->regset);
+
 	return 0;
 
 err_destroy_planes:
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
index 59cdad89f844..69df84bdf904 100644
--- a/drivers/gpu/drm/vc4/vc4_debugfs.c
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -15,28 +15,20 @@
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
-static const struct drm_info_list vc4_debugfs_list[] = {
-	{"bo_stats", vc4_bo_stats_debugfs, 0},
-	{"dpi_regs", vc4_dpi_debugfs_regs, 0},
-	{"dsi1_regs", vc4_dsi_debugfs_regs, 0, (void *)(uintptr_t)1},
-	{"hdmi_regs", vc4_hdmi_debugfs_regs, 0},
-	{"vec_regs", vc4_vec_debugfs_regs, 0},
-	{"txp_regs", vc4_txp_debugfs_regs, 0},
-	{"hvs_regs", vc4_hvs_debugfs_regs, 0},
-	{"hvs_underrun",  vc4_hvs_debugfs_underrun, 0},
-	{"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0},
-	{"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1},
-	{"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2},
-	{"v3d_ident", vc4_v3d_debugfs_ident, 0},
-	{"v3d_regs", vc4_v3d_debugfs_regs, 0},
+struct vc4_debugfs_info_entry {
+	struct list_head link;
+	struct drm_info_list info;
 };
 
-#define VC4_DEBUGFS_ENTRIES ARRAY_SIZE(vc4_debugfs_list)
-
+/**
+ * Called at drm_dev_register() time on each of the minors registered
+ * by the DRM device, to attach the debugfs files.
+ */
 int
 vc4_debugfs_init(struct drm_minor *minor)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(minor->dev);
+	struct vc4_debugfs_info_entry *entry;
 	struct dentry *dentry;
 
 	dentry = debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR,
@@ -45,6 +37,60 @@ vc4_debugfs_init(struct drm_minor *minor)
 	if (!dentry)
 		return -ENOMEM;
 
-	return drm_debugfs_create_files(vc4_debugfs_list, VC4_DEBUGFS_ENTRIES,
-					minor->debugfs_root, minor);
+	list_for_each_entry(entry, &vc4->debugfs_list, link) {
+		int ret = drm_debugfs_create_files(&entry->info, 1,
+						   minor->debugfs_root, minor);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int vc4_debugfs_regset32(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct debugfs_regset32 *regset = node->info_ent->data;
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	drm_print_regset32(&p, regset);
+
+	return 0;
+}
+
+/**
+ * Registers a debugfs file with a callback function for a vc4 component.
+ *
+ * This is like drm_debugfs_create_files(), but that can only be
+ * called a given DRM minor, while the various VC4 components want to
+ * register their debugfs files during the component bind process.  We
+ * track the request and delay it to be called on each minor during
+ * vc4_debugfs_init().
+ */
+void vc4_debugfs_add_file(struct drm_device *dev,
+			  const char *name,
+			  int (*show)(struct seq_file*, void*),
+			  void *data)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	struct vc4_debugfs_info_entry *entry =
+		devm_kzalloc(dev->dev, sizeof(*entry), GFP_KERNEL);
+
+	if (!entry)
+		return;
+
+	entry->info.name = name;
+	entry->info.show = show;
+	entry->info.data = data;
+
+	list_add(&entry->link, &vc4->debugfs_list);
+}
+
+void vc4_debugfs_add_regset32(struct drm_device *drm,
+			      const char *name,
+			      struct debugfs_regset32 *regset)
+{
+	vc4_debugfs_add_file(drm, name, vc4_debugfs_regset32, regset);
 }
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index 169521e547ba..34f90ca8f479 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -101,6 +101,8 @@ struct vc4_dpi {
 
 	struct clk *pixel_clock;
 	struct clk *core_clock;
+
+	struct debugfs_regset32 regset;
 };
 
 #define DPI_READ(offset) readl(dpi->regs + (offset))
@@ -118,37 +120,11 @@ to_vc4_dpi_encoder(struct drm_encoder *encoder)
 	return container_of(encoder, struct vc4_dpi_encoder, base.base);
 }
 
-#define DPI_REG(reg) { reg, #reg }
-static const struct {
-	u32 reg;
-	const char *name;
-} dpi_regs[] = {
-	DPI_REG(DPI_C),
-	DPI_REG(DPI_ID),
+static const struct debugfs_reg32 dpi_regs[] = {
+	VC4_REG32(DPI_C),
+	VC4_REG32(DPI_ID),
 };
 
-#ifdef CONFIG_DEBUG_FS
-int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused)
-{
-	struct drm_info_node *node = (struct drm_info_node *)m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	struct vc4_dpi *dpi = vc4->dpi;
-	int i;
-
-	if (!dpi)
-		return 0;
-
-	for (i = 0; i < ARRAY_SIZE(dpi_regs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   dpi_regs[i].name, dpi_regs[i].reg,
-			   DPI_READ(dpi_regs[i].reg));
-	}
-
-	return 0;
-}
-#endif
-
 static const struct drm_encoder_funcs vc4_dpi_encoder_funcs = {
 	.destroy = drm_encoder_cleanup,
 };
@@ -314,6 +290,9 @@ static int vc4_dpi_bind(struct device *dev, struct device *master, void *data)
 	dpi->regs = vc4_ioremap_regs(pdev, 0);
 	if (IS_ERR(dpi->regs))
 		return PTR_ERR(dpi->regs);
+	dpi->regset.base = dpi->regs;
+	dpi->regset.regs = dpi_regs;
+	dpi->regset.nregs = ARRAY_SIZE(dpi_regs);
 
 	if (DPI_READ(DPI_ID) != DPI_ID_VALUE) {
 		dev_err(dev, "Port returned 0x%08x for ID instead of 0x%08x\n",
@@ -352,6 +331,8 @@ static int vc4_dpi_bind(struct device *dev, struct device *master, void *data)
 
 	vc4->dpi = dpi;
 
+	vc4_debugfs_add_regset32(drm, "dpi_regs", &dpi->regset);
+
 	return 0;
 
 err_destroy_encoder:
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 4daf44fd4548..6d9be20a32be 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -72,30 +72,30 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	if (args->pad != 0)
 		return -EINVAL;
 
+	if (!vc4->v3d)
+		return -ENODEV;
+
 	switch (args->param) {
 	case DRM_VC4_PARAM_V3D_IDENT0:
-		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-		if (ret < 0)
+		ret = vc4_v3d_pm_get(vc4);
+		if (ret)
 			return ret;
 		args->value = V3D_READ(V3D_IDENT0);
-		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
-		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
+		vc4_v3d_pm_put(vc4);
 		break;
 	case DRM_VC4_PARAM_V3D_IDENT1:
-		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-		if (ret < 0)
+		ret = vc4_v3d_pm_get(vc4);
+		if (ret)
 			return ret;
 		args->value = V3D_READ(V3D_IDENT1);
-		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
-		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
+		vc4_v3d_pm_put(vc4);
 		break;
 	case DRM_VC4_PARAM_V3D_IDENT2:
-		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-		if (ret < 0)
+		ret = vc4_v3d_pm_get(vc4);
+		if (ret)
 			return ret;
 		args->value = V3D_READ(V3D_IDENT2);
-		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
-		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
+		vc4_v3d_pm_put(vc4);
 		break;
 	case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
 	case DRM_VC4_PARAM_SUPPORTS_ETC1:
@@ -251,6 +251,7 @@ static int vc4_drm_bind(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct drm_device *drm;
 	struct vc4_dev *vc4;
+	struct device_node *node;
 	int ret = 0;
 
 	dev->coherent_dma_mask = DMA_BIT_MASK(32);
@@ -259,12 +260,19 @@ static int vc4_drm_bind(struct device *dev)
 	if (!vc4)
 		return -ENOMEM;
 
+	/* If VC4 V3D is missing, don't advertise render nodes. */
+	node = of_find_matching_node_and_match(NULL, vc4_v3d_dt_match, NULL);
+	if (!node || !of_device_is_available(node))
+		vc4_drm_driver.driver_features &= ~DRIVER_RENDER;
+	of_node_put(node);
+
 	drm = drm_dev_alloc(&vc4_drm_driver, dev);
 	if (IS_ERR(drm))
 		return PTR_ERR(drm);
 	platform_set_drvdata(pdev, drm);
 	vc4->dev = drm;
 	drm->dev_private = vc4;
+	INIT_LIST_HEAD(&vc4->debugfs_list);
 
 	ret = vc4_bo_cache_init(drm);
 	if (ret)
@@ -280,11 +288,13 @@ static int vc4_drm_bind(struct device *dev)
 
 	drm_fb_helper_remove_conflicting_framebuffers(NULL, "vc4drmfb", false);
 
-	ret = drm_dev_register(drm, 0);
+	ret = vc4_kms_load(drm);
 	if (ret < 0)
 		goto unbind_all;
 
-	vc4_kms_load(drm);
+	ret = drm_dev_register(drm, 0);
+	if (ret < 0)
+		goto unbind_all;
 
 	drm_fbdev_generic_setup(drm, 16);
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 7a3c093e7443..4f13f6262491 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -211,6 +211,11 @@ struct vc4_dev {
 	struct drm_modeset_lock ctm_state_lock;
 	struct drm_private_obj ctm_manager;
 	struct drm_private_obj load_tracker;
+
+	/* List of vc4_debugfs_info_entry for adding to debugfs once
+	 * the minor is available (after drm_dev_register()).
+	 */
+	struct list_head debugfs_list;
 };
 
 static inline struct vc4_dev *
@@ -296,6 +301,7 @@ struct vc4_v3d {
 	struct platform_device *pdev;
 	void __iomem *regs;
 	struct clk *clk;
+	struct debugfs_regset32 regset;
 };
 
 struct vc4_hvs {
@@ -312,6 +318,7 @@ struct vc4_hvs {
 	spinlock_t mm_lock;
 
 	struct drm_mm_node mitchell_netravali_filter;
+	struct debugfs_regset32 regset;
 };
 
 struct vc4_plane {
@@ -427,10 +434,12 @@ struct vc4_crtc_data {
 	int hvs_channel;
 
 	enum vc4_encoder_type encoder_types[4];
+	const char *debugfs_name;
 };
 
 struct vc4_crtc {
 	struct drm_crtc base;
+	struct platform_device *pdev;
 	const struct vc4_crtc_data *data;
 	void __iomem *regs;
 
@@ -447,6 +456,8 @@ struct vc4_crtc {
 	u32 cob_size;
 
 	struct drm_pending_vblank_event *event;
+
+	struct debugfs_regset32 regset;
 };
 
 static inline struct vc4_crtc *
@@ -460,6 +471,8 @@ to_vc4_crtc(struct drm_crtc *crtc)
 #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
 #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
 
+#define VC4_REG32(reg) { .name = #reg, .offset = reg }
+
 struct vc4_exec_info {
 	/* Sequence number for this bin/render job. */
 	uint64_t seqno;
@@ -708,7 +721,6 @@ struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
 void *vc4_prime_vmap(struct drm_gem_object *obj);
 int vc4_bo_cache_init(struct drm_device *dev);
 void vc4_bo_cache_destroy(struct drm_device *dev);
-int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
 int vc4_bo_inc_usecnt(struct vc4_bo *bo);
 void vc4_bo_dec_usecnt(struct vc4_bo *bo);
 void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo);
@@ -716,7 +728,6 @@ void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo);
 
 /* vc4_crtc.c */
 extern struct platform_driver vc4_crtc_driver;
-int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
 bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
 			     bool in_vblank_irq, int *vpos, int *hpos,
 			     ktime_t *stime, ktime_t *etime,
@@ -729,17 +740,37 @@ void vc4_crtc_get_margins(struct drm_crtc_state *state,
 
 /* vc4_debugfs.c */
 int vc4_debugfs_init(struct drm_minor *minor);
+#ifdef CONFIG_DEBUG_FS
+void vc4_debugfs_add_file(struct drm_device *drm,
+			  const char *filename,
+			  int (*show)(struct seq_file*, void*),
+			  void *data);
+void vc4_debugfs_add_regset32(struct drm_device *drm,
+			      const char *filename,
+			      struct debugfs_regset32 *regset);
+#else
+static inline void vc4_debugfs_add_file(struct drm_device *drm,
+					const char *filename,
+					int (*show)(struct seq_file*, void*),
+					void *data)
+{
+}
+
+static inline void vc4_debugfs_add_regset32(struct drm_device *drm,
+					    const char *filename,
+					    struct debugfs_regset32 *regset)
+{
+}
+#endif
 
 /* vc4_drv.c */
 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
 
 /* vc4_dpi.c */
 extern struct platform_driver vc4_dpi_driver;
-int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused);
 
 /* vc4_dsi.c */
 extern struct platform_driver vc4_dsi_driver;
-int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused);
 
 /* vc4_fence.c */
 extern const struct dma_fence_ops vc4_fence_ops;
@@ -767,15 +798,12 @@ int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
 
 /* vc4_hdmi.c */
 extern struct platform_driver vc4_hdmi_driver;
-int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
 
 /* vc4_vec.c */
 extern struct platform_driver vc4_vec_driver;
-int vc4_vec_debugfs_regs(struct seq_file *m, void *unused);
 
 /* vc4_txp.c */
 extern struct platform_driver vc4_txp_driver;
-int vc4_txp_debugfs_regs(struct seq_file *m, void *unused);
 
 /* vc4_irq.c */
 irqreturn_t vc4_irq(int irq, void *arg);
@@ -787,8 +815,6 @@ void vc4_irq_reset(struct drm_device *dev);
 /* vc4_hvs.c */
 extern struct platform_driver vc4_hvs_driver;
 void vc4_hvs_dump_state(struct drm_device *dev);
-int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused);
-int vc4_hvs_debugfs_underrun(struct seq_file *m, void *unused);
 void vc4_hvs_unmask_underrun(struct drm_device *dev, int channel);
 void vc4_hvs_mask_underrun(struct drm_device *dev, int channel);
 
@@ -805,9 +831,10 @@ void vc4_plane_async_set_fb(struct drm_plane *plane,
 
 /* vc4_v3d.c */
 extern struct platform_driver vc4_v3d_driver;
-int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
-int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
+extern const struct of_device_id vc4_v3d_dt_match[];
 int vc4_v3d_get_bin_slot(struct vc4_dev *vc4);
+int vc4_v3d_pm_get(struct vc4_dev *vc4);
+void vc4_v3d_pm_put(struct vc4_dev *vc4);
 
 /* vc4_validate.c */
 int
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 11702e1d9011..9412709067f5 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -545,6 +545,8 @@ struct vc4_dsi {
 
 	struct completion xfer_completion;
 	int xfer_result;
+
+	struct debugfs_regset32 regset;
 };
 
 #define host_to_dsi(host) container_of(host, struct vc4_dsi, dsi_host)
@@ -605,113 +607,56 @@ to_vc4_dsi_encoder(struct drm_encoder *encoder)
 	return container_of(encoder, struct vc4_dsi_encoder, base.base);
 }
 
-#define DSI_REG(reg) { reg, #reg }
-static const struct {
-	u32 reg;
-	const char *name;
-} dsi0_regs[] = {
-	DSI_REG(DSI0_CTRL),
-	DSI_REG(DSI0_STAT),
-	DSI_REG(DSI0_HSTX_TO_CNT),
-	DSI_REG(DSI0_LPRX_TO_CNT),
-	DSI_REG(DSI0_TA_TO_CNT),
-	DSI_REG(DSI0_PR_TO_CNT),
-	DSI_REG(DSI0_DISP0_CTRL),
-	DSI_REG(DSI0_DISP1_CTRL),
-	DSI_REG(DSI0_INT_STAT),
-	DSI_REG(DSI0_INT_EN),
-	DSI_REG(DSI0_PHYC),
-	DSI_REG(DSI0_HS_CLT0),
-	DSI_REG(DSI0_HS_CLT1),
-	DSI_REG(DSI0_HS_CLT2),
-	DSI_REG(DSI0_HS_DLT3),
-	DSI_REG(DSI0_HS_DLT4),
-	DSI_REG(DSI0_HS_DLT5),
-	DSI_REG(DSI0_HS_DLT6),
-	DSI_REG(DSI0_HS_DLT7),
-	DSI_REG(DSI0_PHY_AFEC0),
-	DSI_REG(DSI0_PHY_AFEC1),
-	DSI_REG(DSI0_ID),
+static const struct debugfs_reg32 dsi0_regs[] = {
+	VC4_REG32(DSI0_CTRL),
+	VC4_REG32(DSI0_STAT),
+	VC4_REG32(DSI0_HSTX_TO_CNT),
+	VC4_REG32(DSI0_LPRX_TO_CNT),
+	VC4_REG32(DSI0_TA_TO_CNT),
+	VC4_REG32(DSI0_PR_TO_CNT),
+	VC4_REG32(DSI0_DISP0_CTRL),
+	VC4_REG32(DSI0_DISP1_CTRL),
+	VC4_REG32(DSI0_INT_STAT),
+	VC4_REG32(DSI0_INT_EN),
+	VC4_REG32(DSI0_PHYC),
+	VC4_REG32(DSI0_HS_CLT0),
+	VC4_REG32(DSI0_HS_CLT1),
+	VC4_REG32(DSI0_HS_CLT2),
+	VC4_REG32(DSI0_HS_DLT3),
+	VC4_REG32(DSI0_HS_DLT4),
+	VC4_REG32(DSI0_HS_DLT5),
+	VC4_REG32(DSI0_HS_DLT6),
+	VC4_REG32(DSI0_HS_DLT7),
+	VC4_REG32(DSI0_PHY_AFEC0),
+	VC4_REG32(DSI0_PHY_AFEC1),
+	VC4_REG32(DSI0_ID),
 };
 
-static const struct {
-	u32 reg;
-	const char *name;
-} dsi1_regs[] = {
-	DSI_REG(DSI1_CTRL),
-	DSI_REG(DSI1_STAT),
-	DSI_REG(DSI1_HSTX_TO_CNT),
-	DSI_REG(DSI1_LPRX_TO_CNT),
-	DSI_REG(DSI1_TA_TO_CNT),
-	DSI_REG(DSI1_PR_TO_CNT),
-	DSI_REG(DSI1_DISP0_CTRL),
-	DSI_REG(DSI1_DISP1_CTRL),
-	DSI_REG(DSI1_INT_STAT),
-	DSI_REG(DSI1_INT_EN),
-	DSI_REG(DSI1_PHYC),
-	DSI_REG(DSI1_HS_CLT0),
-	DSI_REG(DSI1_HS_CLT1),
-	DSI_REG(DSI1_HS_CLT2),
-	DSI_REG(DSI1_HS_DLT3),
-	DSI_REG(DSI1_HS_DLT4),
-	DSI_REG(DSI1_HS_DLT5),
-	DSI_REG(DSI1_HS_DLT6),
-	DSI_REG(DSI1_HS_DLT7),
-	DSI_REG(DSI1_PHY_AFEC0),
-	DSI_REG(DSI1_PHY_AFEC1),
-	DSI_REG(DSI1_ID),
+static const struct debugfs_reg32 dsi1_regs[] = {
+	VC4_REG32(DSI1_CTRL),
+	VC4_REG32(DSI1_STAT),
+	VC4_REG32(DSI1_HSTX_TO_CNT),
+	VC4_REG32(DSI1_LPRX_TO_CNT),
+	VC4_REG32(DSI1_TA_TO_CNT),
+	VC4_REG32(DSI1_PR_TO_CNT),
+	VC4_REG32(DSI1_DISP0_CTRL),
+	VC4_REG32(DSI1_DISP1_CTRL),
+	VC4_REG32(DSI1_INT_STAT),
+	VC4_REG32(DSI1_INT_EN),
+	VC4_REG32(DSI1_PHYC),
+	VC4_REG32(DSI1_HS_CLT0),
+	VC4_REG32(DSI1_HS_CLT1),
+	VC4_REG32(DSI1_HS_CLT2),
+	VC4_REG32(DSI1_HS_DLT3),
+	VC4_REG32(DSI1_HS_DLT4),
+	VC4_REG32(DSI1_HS_DLT5),
+	VC4_REG32(DSI1_HS_DLT6),
+	VC4_REG32(DSI1_HS_DLT7),
+	VC4_REG32(DSI1_PHY_AFEC0),
+	VC4_REG32(DSI1_PHY_AFEC1),
+	VC4_REG32(DSI1_ID),
 };
 
-static void vc4_dsi_dump_regs(struct vc4_dsi *dsi)
-{
-	int i;
-
-	if (dsi->port == 0) {
-		for (i = 0; i < ARRAY_SIZE(dsi0_regs); i++) {
-			DRM_INFO("0x%04x (%s): 0x%08x\n",
-				 dsi0_regs[i].reg, dsi0_regs[i].name,
-				 DSI_READ(dsi0_regs[i].reg));
-		}
-	} else {
-		for (i = 0; i < ARRAY_SIZE(dsi1_regs); i++) {
-			DRM_INFO("0x%04x (%s): 0x%08x\n",
-				 dsi1_regs[i].reg, dsi1_regs[i].name,
-				 DSI_READ(dsi1_regs[i].reg));
-		}
-	}
-}
-
-#ifdef CONFIG_DEBUG_FS
-int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused)
-{
-	struct drm_info_node *node = (struct drm_info_node *)m->private;
-	struct drm_device *drm = node->minor->dev;
-	struct vc4_dev *vc4 = to_vc4_dev(drm);
-	int dsi_index = (uintptr_t)node->info_ent->data;
-	struct vc4_dsi *dsi = (dsi_index == 1 ? vc4->dsi1 : NULL);
-	int i;
-
-	if (!dsi)
-		return 0;
-
-	if (dsi->port == 0) {
-		for (i = 0; i < ARRAY_SIZE(dsi0_regs); i++) {
-			seq_printf(m, "0x%04x (%s): 0x%08x\n",
-				   dsi0_regs[i].reg, dsi0_regs[i].name,
-				   DSI_READ(dsi0_regs[i].reg));
-		}
-	} else {
-		for (i = 0; i < ARRAY_SIZE(dsi1_regs); i++) {
-			seq_printf(m, "0x%04x (%s): 0x%08x\n",
-				   dsi1_regs[i].reg, dsi1_regs[i].name,
-				   DSI_READ(dsi1_regs[i].reg));
-		}
-	}
-
-	return 0;
-}
-#endif
-
 static void vc4_dsi_encoder_destroy(struct drm_encoder *encoder)
 {
 	drm_encoder_cleanup(encoder);
@@ -900,8 +845,9 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 	}
 
 	if (debug_dump_regs) {
-		DRM_INFO("DSI regs before:\n");
-		vc4_dsi_dump_regs(dsi);
+		struct drm_printer p = drm_info_printer(&dsi->pdev->dev);
+		dev_info(&dsi->pdev->dev, "DSI regs before:\n");
+		drm_print_regset32(&p, &dsi->regset);
 	}
 
 	/* Round up the clk_set_rate() request slightly, since
@@ -1135,8 +1081,9 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 	drm_bridge_enable(dsi->bridge);
 
 	if (debug_dump_regs) {
-		DRM_INFO("DSI regs after:\n");
-		vc4_dsi_dump_regs(dsi);
+		struct drm_printer p = drm_info_printer(&dsi->pdev->dev);
+		dev_info(&dsi->pdev->dev, "DSI regs after:\n");
+		drm_print_regset32(&p, &dsi->regset);
 	}
 }
 
@@ -1527,6 +1474,15 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	if (IS_ERR(dsi->regs))
 		return PTR_ERR(dsi->regs);
 
+	dsi->regset.base = dsi->regs;
+	if (dsi->port == 0) {
+		dsi->regset.regs = dsi0_regs;
+		dsi->regset.nregs = ARRAY_SIZE(dsi0_regs);
+	} else {
+		dsi->regset.regs = dsi1_regs;
+		dsi->regset.nregs = ARRAY_SIZE(dsi1_regs);
+	}
+
 	if (DSI_PORT_READ(ID) != DSI_ID_VALUE) {
 		dev_err(dev, "Port returned 0x%08x for ID instead of 0x%08x\n",
 			DSI_PORT_READ(ID), DSI_ID_VALUE);
@@ -1662,6 +1618,11 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	 */
 	dsi->encoder->bridge = NULL;
 
+	if (dsi->port == 0)
+		vc4_debugfs_add_regset32(drm, "dsi0_regs", &dsi->regset);
+	else
+		vc4_debugfs_add_regset32(drm, "dsi1_regs", &dsi->regset);
+
 	pm_runtime_enable(dev);
 
 	return 0;
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 5ee5bf7fedf7..d9311be32a4f 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -74,6 +74,11 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
 	u32 i;
 	int ret = 0;
 
+	if (!vc4->v3d) {
+		DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n");
+		return -ENODEV;
+	}
+
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
 	kernel_state = vc4->hang_state;
 	if (!kernel_state) {
@@ -964,12 +969,7 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 	/* Release the reference we had on the perf monitor. */
 	vc4_perfmon_put(exec->perfmon);
 
-	mutex_lock(&vc4->power_lock);
-	if (--vc4->power_refcount == 0) {
-		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
-		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
-	}
-	mutex_unlock(&vc4->power_lock);
+	vc4_v3d_pm_put(vc4);
 
 	kfree(exec);
 }
@@ -1124,6 +1124,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct dma_fence *in_fence;
 	int ret = 0;
 
+	if (!vc4->v3d) {
+		DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n");
+		return -ENODEV;
+	}
+
 	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
 			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
 			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
@@ -1143,17 +1148,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	mutex_lock(&vc4->power_lock);
-	if (vc4->power_refcount++ == 0) {
-		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-		if (ret < 0) {
-			mutex_unlock(&vc4->power_lock);
-			vc4->power_refcount--;
-			kfree(exec);
-			return ret;
-		}
+	ret = vc4_v3d_pm_get(vc4);
+	if (ret) {
+		kfree(exec);
+		return ret;
 	}
-	mutex_unlock(&vc4->power_lock);
 
 	exec->args = args;
 	INIT_LIST_HEAD(&exec->unref_list);
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 88fd5df7e7dc..99fc8569e0f5 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -97,6 +97,9 @@ struct vc4_hdmi {
 
 	struct clk *pixel_clock;
 	struct clk *hsm_clock;
+
+	struct debugfs_regset32 hdmi_regset;
+	struct debugfs_regset32 hd_regset;
 };
 
 #define HDMI_READ(offset) readl(vc4->hdmi->hdmicore_regs + offset)
@@ -134,103 +137,69 @@ to_vc4_hdmi_connector(struct drm_connector *connector)
 	return container_of(connector, struct vc4_hdmi_connector, base);
 }
 
-#define HDMI_REG(reg) { reg, #reg }
-static const struct {
-	u32 reg;
-	const char *name;
-} hdmi_regs[] = {
-	HDMI_REG(VC4_HDMI_CORE_REV),
-	HDMI_REG(VC4_HDMI_SW_RESET_CONTROL),
-	HDMI_REG(VC4_HDMI_HOTPLUG_INT),
-	HDMI_REG(VC4_HDMI_HOTPLUG),
-	HDMI_REG(VC4_HDMI_MAI_CHANNEL_MAP),
-	HDMI_REG(VC4_HDMI_MAI_CONFIG),
-	HDMI_REG(VC4_HDMI_MAI_FORMAT),
-	HDMI_REG(VC4_HDMI_AUDIO_PACKET_CONFIG),
-	HDMI_REG(VC4_HDMI_RAM_PACKET_CONFIG),
-	HDMI_REG(VC4_HDMI_HORZA),
-	HDMI_REG(VC4_HDMI_HORZB),
-	HDMI_REG(VC4_HDMI_FIFO_CTL),
-	HDMI_REG(VC4_HDMI_SCHEDULER_CONTROL),
-	HDMI_REG(VC4_HDMI_VERTA0),
-	HDMI_REG(VC4_HDMI_VERTA1),
-	HDMI_REG(VC4_HDMI_VERTB0),
-	HDMI_REG(VC4_HDMI_VERTB1),
-	HDMI_REG(VC4_HDMI_TX_PHY_RESET_CTL),
-	HDMI_REG(VC4_HDMI_TX_PHY_CTL0),
-
-	HDMI_REG(VC4_HDMI_CEC_CNTRL_1),
-	HDMI_REG(VC4_HDMI_CEC_CNTRL_2),
-	HDMI_REG(VC4_HDMI_CEC_CNTRL_3),
-	HDMI_REG(VC4_HDMI_CEC_CNTRL_4),
-	HDMI_REG(VC4_HDMI_CEC_CNTRL_5),
-	HDMI_REG(VC4_HDMI_CPU_STATUS),
-	HDMI_REG(VC4_HDMI_CPU_MASK_STATUS),
-
-	HDMI_REG(VC4_HDMI_CEC_RX_DATA_1),
-	HDMI_REG(VC4_HDMI_CEC_RX_DATA_2),
-	HDMI_REG(VC4_HDMI_CEC_RX_DATA_3),
-	HDMI_REG(VC4_HDMI_CEC_RX_DATA_4),
-	HDMI_REG(VC4_HDMI_CEC_TX_DATA_1),
-	HDMI_REG(VC4_HDMI_CEC_TX_DATA_2),
-	HDMI_REG(VC4_HDMI_CEC_TX_DATA_3),
-	HDMI_REG(VC4_HDMI_CEC_TX_DATA_4),
+static const struct debugfs_reg32 hdmi_regs[] = {
+	VC4_REG32(VC4_HDMI_CORE_REV),
+	VC4_REG32(VC4_HDMI_SW_RESET_CONTROL),
+	VC4_REG32(VC4_HDMI_HOTPLUG_INT),
+	VC4_REG32(VC4_HDMI_HOTPLUG),
+	VC4_REG32(VC4_HDMI_MAI_CHANNEL_MAP),
+	VC4_REG32(VC4_HDMI_MAI_CONFIG),
+	VC4_REG32(VC4_HDMI_MAI_FORMAT),
+	VC4_REG32(VC4_HDMI_AUDIO_PACKET_CONFIG),
+	VC4_REG32(VC4_HDMI_RAM_PACKET_CONFIG),
+	VC4_REG32(VC4_HDMI_HORZA),
+	VC4_REG32(VC4_HDMI_HORZB),
+	VC4_REG32(VC4_HDMI_FIFO_CTL),
+	VC4_REG32(VC4_HDMI_SCHEDULER_CONTROL),
+	VC4_REG32(VC4_HDMI_VERTA0),
+	VC4_REG32(VC4_HDMI_VERTA1),
+	VC4_REG32(VC4_HDMI_VERTB0),
+	VC4_REG32(VC4_HDMI_VERTB1),
+	VC4_REG32(VC4_HDMI_TX_PHY_RESET_CTL),
+	VC4_REG32(VC4_HDMI_TX_PHY_CTL0),
+
+	VC4_REG32(VC4_HDMI_CEC_CNTRL_1),
+	VC4_REG32(VC4_HDMI_CEC_CNTRL_2),
+	VC4_REG32(VC4_HDMI_CEC_CNTRL_3),
+	VC4_REG32(VC4_HDMI_CEC_CNTRL_4),
+	VC4_REG32(VC4_HDMI_CEC_CNTRL_5),
+	VC4_REG32(VC4_HDMI_CPU_STATUS),
+	VC4_REG32(VC4_HDMI_CPU_MASK_STATUS),
+
+	VC4_REG32(VC4_HDMI_CEC_RX_DATA_1),
+	VC4_REG32(VC4_HDMI_CEC_RX_DATA_2),
+	VC4_REG32(VC4_HDMI_CEC_RX_DATA_3),
+	VC4_REG32(VC4_HDMI_CEC_RX_DATA_4),
+	VC4_REG32(VC4_HDMI_CEC_TX_DATA_1),
+	VC4_REG32(VC4_HDMI_CEC_TX_DATA_2),
+	VC4_REG32(VC4_HDMI_CEC_TX_DATA_3),
+	VC4_REG32(VC4_HDMI_CEC_TX_DATA_4),
 };
 
-static const struct {
-	u32 reg;
-	const char *name;
-} hd_regs[] = {
-	HDMI_REG(VC4_HD_M_CTL),
-	HDMI_REG(VC4_HD_MAI_CTL),
-	HDMI_REG(VC4_HD_MAI_THR),
-	HDMI_REG(VC4_HD_MAI_FMT),
-	HDMI_REG(VC4_HD_MAI_SMP),
-	HDMI_REG(VC4_HD_VID_CTL),
-	HDMI_REG(VC4_HD_CSC_CTL),
-	HDMI_REG(VC4_HD_FRAME_COUNT),
+static const struct debugfs_reg32 hd_regs[] = {
+	VC4_REG32(VC4_HD_M_CTL),
+	VC4_REG32(VC4_HD_MAI_CTL),
+	VC4_REG32(VC4_HD_MAI_THR),
+	VC4_REG32(VC4_HD_MAI_FMT),
+	VC4_REG32(VC4_HD_MAI_SMP),
+	VC4_REG32(VC4_HD_VID_CTL),
+	VC4_REG32(VC4_HD_CSC_CTL),
+	VC4_REG32(VC4_HD_FRAME_COUNT),
 };
 
-#ifdef CONFIG_DEBUG_FS
-int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused)
+static int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(hdmi_regs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   hdmi_regs[i].name, hdmi_regs[i].reg,
-			   HDMI_READ(hdmi_regs[i].reg));
-	}
+	struct vc4_hdmi *hdmi = vc4->hdmi;
+	struct drm_printer p = drm_seq_file_printer(m);
 
-	for (i = 0; i < ARRAY_SIZE(hd_regs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   hd_regs[i].name, hd_regs[i].reg,
-			   HD_READ(hd_regs[i].reg));
-	}
+	drm_print_regset32(&p, &hdmi->hdmi_regset);
+	drm_print_regset32(&p, &hdmi->hd_regset);
 
 	return 0;
 }
-#endif /* CONFIG_DEBUG_FS */
-
-static void vc4_hdmi_dump_regs(struct drm_device *dev)
-{
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(hdmi_regs); i++) {
-		DRM_INFO("0x%04x (%s): 0x%08x\n",
-			 hdmi_regs[i].reg, hdmi_regs[i].name,
-			 HDMI_READ(hdmi_regs[i].reg));
-	}
-	for (i = 0; i < ARRAY_SIZE(hd_regs); i++) {
-		DRM_INFO("0x%04x (%s): 0x%08x\n",
-			 hd_regs[i].reg, hd_regs[i].name,
-			 HD_READ(hd_regs[i].reg));
-	}
-}
 
 static enum drm_connector_status
 vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
@@ -561,8 +530,11 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
 	HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0);
 
 	if (debug_dump_regs) {
-		DRM_INFO("HDMI regs before:\n");
-		vc4_hdmi_dump_regs(dev);
+		struct drm_printer p = drm_info_printer(&hdmi->pdev->dev);
+
+		dev_info(&hdmi->pdev->dev, "HDMI regs before:\n");
+		drm_print_regset32(&p, &hdmi->hdmi_regset);
+		drm_print_regset32(&p, &hdmi->hd_regset);
 	}
 
 	HD_WRITE(VC4_HD_VID_CTL, 0);
@@ -637,8 +609,11 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
 	HDMI_WRITE(VC4_HDMI_FIFO_CTL, VC4_HDMI_FIFO_CTL_MASTER_SLAVE_N);
 
 	if (debug_dump_regs) {
-		DRM_INFO("HDMI regs after:\n");
-		vc4_hdmi_dump_regs(dev);
+		struct drm_printer p = drm_info_printer(&hdmi->pdev->dev);
+
+		dev_info(&hdmi->pdev->dev, "HDMI regs after:\n");
+		drm_print_regset32(&p, &hdmi->hdmi_regset);
+		drm_print_regset32(&p, &hdmi->hd_regset);
 	}
 
 	HD_WRITE(VC4_HD_VID_CTL,
@@ -1333,6 +1308,13 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	if (IS_ERR(hdmi->hd_regs))
 		return PTR_ERR(hdmi->hd_regs);
 
+	hdmi->hdmi_regset.base = hdmi->hdmicore_regs;
+	hdmi->hdmi_regset.regs = hdmi_regs;
+	hdmi->hdmi_regset.nregs = ARRAY_SIZE(hdmi_regs);
+	hdmi->hd_regset.base = hdmi->hd_regs;
+	hdmi->hd_regset.regs = hd_regs;
+	hdmi->hd_regset.nregs = ARRAY_SIZE(hd_regs);
+
 	hdmi->pixel_clock = devm_clk_get(dev, "pixel");
 	if (IS_ERR(hdmi->pixel_clock)) {
 		DRM_ERROR("Failed to get pixel clock\n");
@@ -1448,6 +1430,8 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	if (ret)
 		goto err_destroy_encoder;
 
+	vc4_debugfs_add_file(drm, "hdmi_regs", vc4_hdmi_debugfs_regs, hdmi);
+
 	return 0;
 
 #ifdef CONFIG_DRM_VC4_HDMI_CEC
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 918e71256ecc..f746e9a7a88c 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -27,54 +27,47 @@
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
-#define HVS_REG(reg) { reg, #reg }
-static const struct {
-	u32 reg;
-	const char *name;
-} hvs_regs[] = {
-	HVS_REG(SCALER_DISPCTRL),
-	HVS_REG(SCALER_DISPSTAT),
-	HVS_REG(SCALER_DISPID),
-	HVS_REG(SCALER_DISPECTRL),
-	HVS_REG(SCALER_DISPPROF),
-	HVS_REG(SCALER_DISPDITHER),
-	HVS_REG(SCALER_DISPEOLN),
-	HVS_REG(SCALER_DISPLIST0),
-	HVS_REG(SCALER_DISPLIST1),
-	HVS_REG(SCALER_DISPLIST2),
-	HVS_REG(SCALER_DISPLSTAT),
-	HVS_REG(SCALER_DISPLACT0),
-	HVS_REG(SCALER_DISPLACT1),
-	HVS_REG(SCALER_DISPLACT2),
-	HVS_REG(SCALER_DISPCTRL0),
-	HVS_REG(SCALER_DISPBKGND0),
-	HVS_REG(SCALER_DISPSTAT0),
-	HVS_REG(SCALER_DISPBASE0),
-	HVS_REG(SCALER_DISPCTRL1),
-	HVS_REG(SCALER_DISPBKGND1),
-	HVS_REG(SCALER_DISPSTAT1),
-	HVS_REG(SCALER_DISPBASE1),
-	HVS_REG(SCALER_DISPCTRL2),
-	HVS_REG(SCALER_DISPBKGND2),
-	HVS_REG(SCALER_DISPSTAT2),
-	HVS_REG(SCALER_DISPBASE2),
-	HVS_REG(SCALER_DISPALPHA2),
-	HVS_REG(SCALER_OLEDOFFS),
-	HVS_REG(SCALER_OLEDCOEF0),
-	HVS_REG(SCALER_OLEDCOEF1),
-	HVS_REG(SCALER_OLEDCOEF2),
+static const struct debugfs_reg32 hvs_regs[] = {
+	VC4_REG32(SCALER_DISPCTRL),
+	VC4_REG32(SCALER_DISPSTAT),
+	VC4_REG32(SCALER_DISPID),
+	VC4_REG32(SCALER_DISPECTRL),
+	VC4_REG32(SCALER_DISPPROF),
+	VC4_REG32(SCALER_DISPDITHER),
+	VC4_REG32(SCALER_DISPEOLN),
+	VC4_REG32(SCALER_DISPLIST0),
+	VC4_REG32(SCALER_DISPLIST1),
+	VC4_REG32(SCALER_DISPLIST2),
+	VC4_REG32(SCALER_DISPLSTAT),
+	VC4_REG32(SCALER_DISPLACT0),
+	VC4_REG32(SCALER_DISPLACT1),
+	VC4_REG32(SCALER_DISPLACT2),
+	VC4_REG32(SCALER_DISPCTRL0),
+	VC4_REG32(SCALER_DISPBKGND0),
+	VC4_REG32(SCALER_DISPSTAT0),
+	VC4_REG32(SCALER_DISPBASE0),
+	VC4_REG32(SCALER_DISPCTRL1),
+	VC4_REG32(SCALER_DISPBKGND1),
+	VC4_REG32(SCALER_DISPSTAT1),
+	VC4_REG32(SCALER_DISPBASE1),
+	VC4_REG32(SCALER_DISPCTRL2),
+	VC4_REG32(SCALER_DISPBKGND2),
+	VC4_REG32(SCALER_DISPSTAT2),
+	VC4_REG32(SCALER_DISPBASE2),
+	VC4_REG32(SCALER_DISPALPHA2),
+	VC4_REG32(SCALER_OLEDOFFS),
+	VC4_REG32(SCALER_OLEDCOEF0),
+	VC4_REG32(SCALER_OLEDCOEF1),
+	VC4_REG32(SCALER_OLEDCOEF2),
 };
 
 void vc4_hvs_dump_state(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_printer p = drm_info_printer(&vc4->hvs->pdev->dev);
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(hvs_regs); i++) {
-		DRM_INFO("0x%04x (%s): 0x%08x\n",
-			 hvs_regs[i].reg, hvs_regs[i].name,
-			 HVS_READ(hvs_regs[i].reg));
-	}
+	drm_print_regset32(&p, &vc4->hvs->regset);
 
 	DRM_INFO("HVS ctx:\n");
 	for (i = 0; i < 64; i += 4) {
@@ -87,24 +80,7 @@ void vc4_hvs_dump_state(struct drm_device *dev)
 	}
 }
 
-#ifdef CONFIG_DEBUG_FS
-int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused)
-{
-	struct drm_info_node *node = (struct drm_info_node *)m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(hvs_regs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   hvs_regs[i].name, hvs_regs[i].reg,
-			   HVS_READ(hvs_regs[i].reg));
-	}
-
-	return 0;
-}
-
-int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data)
+static int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
@@ -115,7 +91,6 @@ int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data)
 
 	return 0;
 }
-#endif
 
 /* The filter kernel is composed of dwords each containing 3 9-bit
  * signed integers packed next to each other.
@@ -259,6 +234,10 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 	if (IS_ERR(hvs->regs))
 		return PTR_ERR(hvs->regs);
 
+	hvs->regset.base = hvs->regs;
+	hvs->regset.regs = hvs_regs;
+	hvs->regset.nregs = ARRAY_SIZE(hvs_regs);
+
 	hvs->dlist = hvs->regs + SCALER_DLIST_START;
 
 	spin_lock_init(&hvs->mm_lock);
@@ -323,6 +302,10 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 	if (ret)
 		return ret;
 
+	vc4_debugfs_add_regset32(drm, "hvs_regs", &hvs->regset);
+	vc4_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun,
+			     NULL);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index 4cd2ccfe15f4..ffd0a4388752 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -229,6 +229,9 @@ vc4_irq_preinstall(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
+	if (!vc4->v3d)
+		return;
+
 	init_waitqueue_head(&vc4->job_wait_queue);
 	INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
 
@@ -243,6 +246,9 @@ vc4_irq_postinstall(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
+	if (!vc4->v3d)
+		return 0;
+
 	/* Enable both the render done and out of memory interrupts. */
 	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
 
@@ -254,6 +260,9 @@ vc4_irq_uninstall(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
+	if (!vc4->v3d)
+		return;
+
 	/* Disable sending interrupts for our driver's IRQs. */
 	V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
 
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 5160cad25fce..295dacc8bcb9 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -525,6 +525,7 @@ int vc4_kms_load(struct drm_device *dev)
 	/* Set support for vblank irq fast disable, before drm_vblank_init() */
 	dev->vblank_disable_immediate = true;
 
+	dev->irq_enabled = true;
 	ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to initialize vblank\n");
diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c
index 495150415020..f4aa75efd16b 100644
--- a/drivers/gpu/drm/vc4/vc4_perfmon.c
+++ b/drivers/gpu/drm/vc4/vc4_perfmon.c
@@ -100,12 +100,18 @@ void vc4_perfmon_close_file(struct vc4_file *vc4file)
 int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_file *vc4file = file_priv->driver_priv;
 	struct drm_vc4_perfmon_create *req = data;
 	struct vc4_perfmon *perfmon;
 	unsigned int i;
 	int ret;
 
+	if (!vc4->v3d) {
+		DRM_DEBUG("Creating perfmon no VC4 V3D probed\n");
+		return -ENODEV;
+	}
+
 	/* Number of monitored counters cannot exceed HW limits. */
 	if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS ||
 	    !req->ncounters)
@@ -146,10 +152,16 @@ int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
 int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_file *vc4file = file_priv->driver_priv;
 	struct drm_vc4_perfmon_destroy *req = data;
 	struct vc4_perfmon *perfmon;
 
+	if (!vc4->v3d) {
+		DRM_DEBUG("Destroying perfmon no VC4 V3D probed\n");
+		return -ENODEV;
+	}
+
 	mutex_lock(&vc4file->perfmon.lock);
 	perfmon = idr_remove(&vc4file->perfmon.idr, req->id);
 	mutex_unlock(&vc4file->perfmon.lock);
@@ -164,11 +176,17 @@ int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
 int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_file *vc4file = file_priv->driver_priv;
 	struct drm_vc4_perfmon_get_values *req = data;
 	struct vc4_perfmon *perfmon;
 	int ret;
 
+	if (!vc4->v3d) {
+		DRM_DEBUG("Getting perfmon no VC4 V3D probed\n");
+		return -ENODEV;
+	}
+
 	mutex_lock(&vc4file->perfmon.lock);
 	perfmon = idr_find(&vc4file->perfmon.idr, req->id);
 	vc4_perfmon_get(perfmon);
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index 273984f71ae2..3c918eeaf56e 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -148,6 +148,12 @@ static void emit_tile(struct vc4_exec_info *exec,
 	}
 
 	if (setup->zs_read) {
+		if (setup->color_read) {
+			/* Exec previous load. */
+			vc4_tile_coordinates(setup, x, y);
+			vc4_store_before_load(setup);
+		}
+
 		if (args->zs_read.flags &
 		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
 			rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
@@ -156,12 +162,6 @@ static void emit_tile(struct vc4_exec_info *exec,
 						    &args->zs_read, x, y) |
 				VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);
 		} else {
-			if (setup->color_read) {
-				/* Exec previous load. */
-				vc4_tile_coordinates(setup, x, y);
-				vc4_store_before_load(setup);
-			}
-
 			rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
 			rcl_u16(setup, args->zs_read.bits);
 			rcl_u32(setup, setup->zs_read->paddr +
@@ -291,16 +291,15 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
 		}
 	}
 	if (setup->zs_read) {
+		if (setup->color_read) {
+			loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+			loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+		}
+
 		if (args->zs_read.flags &
 		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
 			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
 		} else {
-			if (setup->color_read &&
-			    !(args->color_read.flags &
-			      VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
-				loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
-				loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
-			}
 			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
 		}
 	}
diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c
index cc2888dd7171..c8b89a78f9f4 100644
--- a/drivers/gpu/drm/vc4/vc4_txp.c
+++ b/drivers/gpu/drm/vc4/vc4_txp.c
@@ -148,6 +148,7 @@ struct vc4_txp {
 	struct drm_writeback_connector connector;
 
 	void __iomem *regs;
+	struct debugfs_regset32 regset;
 };
 
 static inline struct vc4_txp *encoder_to_vc4_txp(struct drm_encoder *encoder)
@@ -160,40 +161,14 @@ static inline struct vc4_txp *connector_to_vc4_txp(struct drm_connector *conn)
 	return container_of(conn, struct vc4_txp, connector.base);
 }
 
-#define TXP_REG(reg) { reg, #reg }
-static const struct {
-	u32 reg;
-	const char *name;
-} txp_regs[] = {
-	TXP_REG(TXP_DST_PTR),
-	TXP_REG(TXP_DST_PITCH),
-	TXP_REG(TXP_DIM),
-	TXP_REG(TXP_DST_CTRL),
-	TXP_REG(TXP_PROGRESS),
+static const struct debugfs_reg32 txp_regs[] = {
+	VC4_REG32(TXP_DST_PTR),
+	VC4_REG32(TXP_DST_PITCH),
+	VC4_REG32(TXP_DIM),
+	VC4_REG32(TXP_DST_CTRL),
+	VC4_REG32(TXP_PROGRESS),
 };
 
-#ifdef CONFIG_DEBUG_FS
-int vc4_txp_debugfs_regs(struct seq_file *m, void *unused)
-{
-	struct drm_info_node *node = (struct drm_info_node *)m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	struct vc4_txp *txp = vc4->txp;
-	int i;
-
-	if (!txp)
-		return 0;
-
-	for (i = 0; i < ARRAY_SIZE(txp_regs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   txp_regs[i].name, txp_regs[i].reg,
-			   TXP_READ(txp_regs[i].reg));
-	}
-
-	return 0;
-}
-#endif
-
 static int vc4_txp_connector_get_modes(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
@@ -410,6 +385,9 @@ static int vc4_txp_bind(struct device *dev, struct device *master, void *data)
 	txp->regs = vc4_ioremap_regs(pdev, 0);
 	if (IS_ERR(txp->regs))
 		return PTR_ERR(txp->regs);
+	txp->regset.base = txp->regs;
+	txp->regset.regs = txp_regs;
+	txp->regset.nregs = ARRAY_SIZE(txp_regs);
 
 	drm_connector_helper_add(&txp->connector.base,
 				 &vc4_txp_connector_helper_funcs);
@@ -428,6 +406,8 @@ static int vc4_txp_bind(struct device *dev, struct device *master, void *data)
 	dev_set_drvdata(dev, txp);
 	vc4->txp = txp;
 
+	vc4_debugfs_add_regset32(drm, "txp_regs", &txp->regset);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index e47e29426078..a4b6859e3af6 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -22,129 +22,145 @@
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
-#ifdef CONFIG_DEBUG_FS
-#define REGDEF(reg) { reg, #reg }
-static const struct {
-	uint32_t reg;
-	const char *name;
-} vc4_reg_defs[] = {
-	REGDEF(V3D_IDENT0),
-	REGDEF(V3D_IDENT1),
-	REGDEF(V3D_IDENT2),
-	REGDEF(V3D_SCRATCH),
-	REGDEF(V3D_L2CACTL),
-	REGDEF(V3D_SLCACTL),
-	REGDEF(V3D_INTCTL),
-	REGDEF(V3D_INTENA),
-	REGDEF(V3D_INTDIS),
-	REGDEF(V3D_CT0CS),
-	REGDEF(V3D_CT1CS),
-	REGDEF(V3D_CT0EA),
-	REGDEF(V3D_CT1EA),
-	REGDEF(V3D_CT0CA),
-	REGDEF(V3D_CT1CA),
-	REGDEF(V3D_CT00RA0),
-	REGDEF(V3D_CT01RA0),
-	REGDEF(V3D_CT0LC),
-	REGDEF(V3D_CT1LC),
-	REGDEF(V3D_CT0PC),
-	REGDEF(V3D_CT1PC),
-	REGDEF(V3D_PCS),
-	REGDEF(V3D_BFC),
-	REGDEF(V3D_RFC),
-	REGDEF(V3D_BPCA),
-	REGDEF(V3D_BPCS),
-	REGDEF(V3D_BPOA),
-	REGDEF(V3D_BPOS),
-	REGDEF(V3D_BXCF),
-	REGDEF(V3D_SQRSV0),
-	REGDEF(V3D_SQRSV1),
-	REGDEF(V3D_SQCNTL),
-	REGDEF(V3D_SRQPC),
-	REGDEF(V3D_SRQUA),
-	REGDEF(V3D_SRQUL),
-	REGDEF(V3D_SRQCS),
-	REGDEF(V3D_VPACNTL),
-	REGDEF(V3D_VPMBASE),
-	REGDEF(V3D_PCTRC),
-	REGDEF(V3D_PCTRE),
-	REGDEF(V3D_PCTR(0)),
-	REGDEF(V3D_PCTRS(0)),
-	REGDEF(V3D_PCTR(1)),
-	REGDEF(V3D_PCTRS(1)),
-	REGDEF(V3D_PCTR(2)),
-	REGDEF(V3D_PCTRS(2)),
-	REGDEF(V3D_PCTR(3)),
-	REGDEF(V3D_PCTRS(3)),
-	REGDEF(V3D_PCTR(4)),
-	REGDEF(V3D_PCTRS(4)),
-	REGDEF(V3D_PCTR(5)),
-	REGDEF(V3D_PCTRS(5)),
-	REGDEF(V3D_PCTR(6)),
-	REGDEF(V3D_PCTRS(6)),
-	REGDEF(V3D_PCTR(7)),
-	REGDEF(V3D_PCTRS(7)),
-	REGDEF(V3D_PCTR(8)),
-	REGDEF(V3D_PCTRS(8)),
-	REGDEF(V3D_PCTR(9)),
-	REGDEF(V3D_PCTRS(9)),
-	REGDEF(V3D_PCTR(10)),
-	REGDEF(V3D_PCTRS(10)),
-	REGDEF(V3D_PCTR(11)),
-	REGDEF(V3D_PCTRS(11)),
-	REGDEF(V3D_PCTR(12)),
-	REGDEF(V3D_PCTRS(12)),
-	REGDEF(V3D_PCTR(13)),
-	REGDEF(V3D_PCTRS(13)),
-	REGDEF(V3D_PCTR(14)),
-	REGDEF(V3D_PCTRS(14)),
-	REGDEF(V3D_PCTR(15)),
-	REGDEF(V3D_PCTRS(15)),
-	REGDEF(V3D_DBGE),
-	REGDEF(V3D_FDBGO),
-	REGDEF(V3D_FDBGB),
-	REGDEF(V3D_FDBGR),
-	REGDEF(V3D_FDBGS),
-	REGDEF(V3D_ERRSTAT),
+static const struct debugfs_reg32 v3d_regs[] = {
+	VC4_REG32(V3D_IDENT0),
+	VC4_REG32(V3D_IDENT1),
+	VC4_REG32(V3D_IDENT2),
+	VC4_REG32(V3D_SCRATCH),
+	VC4_REG32(V3D_L2CACTL),
+	VC4_REG32(V3D_SLCACTL),
+	VC4_REG32(V3D_INTCTL),
+	VC4_REG32(V3D_INTENA),
+	VC4_REG32(V3D_INTDIS),
+	VC4_REG32(V3D_CT0CS),
+	VC4_REG32(V3D_CT1CS),
+	VC4_REG32(V3D_CT0EA),
+	VC4_REG32(V3D_CT1EA),
+	VC4_REG32(V3D_CT0CA),
+	VC4_REG32(V3D_CT1CA),
+	VC4_REG32(V3D_CT00RA0),
+	VC4_REG32(V3D_CT01RA0),
+	VC4_REG32(V3D_CT0LC),
+	VC4_REG32(V3D_CT1LC),
+	VC4_REG32(V3D_CT0PC),
+	VC4_REG32(V3D_CT1PC),
+	VC4_REG32(V3D_PCS),
+	VC4_REG32(V3D_BFC),
+	VC4_REG32(V3D_RFC),
+	VC4_REG32(V3D_BPCA),
+	VC4_REG32(V3D_BPCS),
+	VC4_REG32(V3D_BPOA),
+	VC4_REG32(V3D_BPOS),
+	VC4_REG32(V3D_BXCF),
+	VC4_REG32(V3D_SQRSV0),
+	VC4_REG32(V3D_SQRSV1),
+	VC4_REG32(V3D_SQCNTL),
+	VC4_REG32(V3D_SRQPC),
+	VC4_REG32(V3D_SRQUA),
+	VC4_REG32(V3D_SRQUL),
+	VC4_REG32(V3D_SRQCS),
+	VC4_REG32(V3D_VPACNTL),
+	VC4_REG32(V3D_VPMBASE),
+	VC4_REG32(V3D_PCTRC),
+	VC4_REG32(V3D_PCTRE),
+	VC4_REG32(V3D_PCTR(0)),
+	VC4_REG32(V3D_PCTRS(0)),
+	VC4_REG32(V3D_PCTR(1)),
+	VC4_REG32(V3D_PCTRS(1)),
+	VC4_REG32(V3D_PCTR(2)),
+	VC4_REG32(V3D_PCTRS(2)),
+	VC4_REG32(V3D_PCTR(3)),
+	VC4_REG32(V3D_PCTRS(3)),
+	VC4_REG32(V3D_PCTR(4)),
+	VC4_REG32(V3D_PCTRS(4)),
+	VC4_REG32(V3D_PCTR(5)),
+	VC4_REG32(V3D_PCTRS(5)),
+	VC4_REG32(V3D_PCTR(6)),
+	VC4_REG32(V3D_PCTRS(6)),
+	VC4_REG32(V3D_PCTR(7)),
+	VC4_REG32(V3D_PCTRS(7)),
+	VC4_REG32(V3D_PCTR(8)),
+	VC4_REG32(V3D_PCTRS(8)),
+	VC4_REG32(V3D_PCTR(9)),
+	VC4_REG32(V3D_PCTRS(9)),
+	VC4_REG32(V3D_PCTR(10)),
+	VC4_REG32(V3D_PCTRS(10)),
+	VC4_REG32(V3D_PCTR(11)),
+	VC4_REG32(V3D_PCTRS(11)),
+	VC4_REG32(V3D_PCTR(12)),
+	VC4_REG32(V3D_PCTRS(12)),
+	VC4_REG32(V3D_PCTR(13)),
+	VC4_REG32(V3D_PCTRS(13)),
+	VC4_REG32(V3D_PCTR(14)),
+	VC4_REG32(V3D_PCTRS(14)),
+	VC4_REG32(V3D_PCTR(15)),
+	VC4_REG32(V3D_PCTRS(15)),
+	VC4_REG32(V3D_DBGE),
+	VC4_REG32(V3D_FDBGO),
+	VC4_REG32(V3D_FDBGB),
+	VC4_REG32(V3D_FDBGR),
+	VC4_REG32(V3D_FDBGS),
+	VC4_REG32(V3D_ERRSTAT),
 };
 
-int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused)
+static int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(vc4_reg_defs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   vc4_reg_defs[i].name, vc4_reg_defs[i].reg,
-			   V3D_READ(vc4_reg_defs[i].reg));
+	int ret = vc4_v3d_pm_get(vc4);
+
+	if (ret == 0) {
+		uint32_t ident1 = V3D_READ(V3D_IDENT1);
+		uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
+		uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
+		uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
+
+		seq_printf(m, "Revision:   %d\n",
+			   VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
+		seq_printf(m, "Slices:     %d\n", nslc);
+		seq_printf(m, "TMUs:       %d\n", nslc * tups);
+		seq_printf(m, "QPUs:       %d\n", nslc * qups);
+		seq_printf(m, "Semaphores: %d\n",
+			   VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+		vc4_v3d_pm_put(vc4);
 	}
 
 	return 0;
 }
 
-int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
+/**
+ * Wraps pm_runtime_get_sync() in a refcount, so that we can reliably
+ * get the pm_runtime refcount to 0 in vc4_reset().
+ */
+int
+vc4_v3d_pm_get(struct vc4_dev *vc4)
 {
-	struct drm_info_node *node = (struct drm_info_node *)m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	uint32_t ident1 = V3D_READ(V3D_IDENT1);
-	uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
-	uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
-	uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
-
-	seq_printf(m, "Revision:   %d\n",
-		   VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
-	seq_printf(m, "Slices:     %d\n", nslc);
-	seq_printf(m, "TMUs:       %d\n", nslc * tups);
-	seq_printf(m, "QPUs:       %d\n", nslc * qups);
-	seq_printf(m, "Semaphores: %d\n",
-		   VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+	mutex_lock(&vc4->power_lock);
+	if (vc4->power_refcount++ == 0) {
+		int ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+
+		if (ret < 0) {
+			vc4->power_refcount--;
+			mutex_unlock(&vc4->power_lock);
+			return ret;
+		}
+	}
+	mutex_unlock(&vc4->power_lock);
 
 	return 0;
 }
-#endif /* CONFIG_DEBUG_FS */
+
+void
+vc4_v3d_pm_put(struct vc4_dev *vc4)
+{
+	mutex_lock(&vc4->power_lock);
+	if (--vc4->power_refcount == 0) {
+		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
+		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
+	}
+	mutex_unlock(&vc4->power_lock);
+}
 
 static void vc4_v3d_init_hw(struct drm_device *dev)
 {
@@ -354,6 +370,9 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 	v3d->regs = vc4_ioremap_regs(pdev, 0);
 	if (IS_ERR(v3d->regs))
 		return PTR_ERR(v3d->regs);
+	v3d->regset.base = v3d->regs;
+	v3d->regset.regs = v3d_regs;
+	v3d->regset.nregs = ARRAY_SIZE(v3d_regs);
 
 	vc4->v3d = v3d;
 	v3d->vc4 = vc4;
@@ -409,6 +428,9 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 	pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */
 	pm_runtime_enable(dev);
 
+	vc4_debugfs_add_file(drm, "v3d_ident", vc4_v3d_debugfs_ident, NULL);
+	vc4_debugfs_add_regset32(drm, "v3d_regs", &v3d->regset);
+
 	return 0;
 }
 
@@ -452,7 +474,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id vc4_v3d_dt_match[] = {
+const struct of_device_id vc4_v3d_dt_match[] = {
 	{ .compatible = "brcm,bcm2835-v3d" },
 	{ .compatible = "brcm,cygnus-v3d" },
 	{ .compatible = "brcm,vc4-v3d" },
diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c
index 858c3a483229..0a27e48fab31 100644
--- a/drivers/gpu/drm/vc4/vc4_vec.c
+++ b/drivers/gpu/drm/vc4/vc4_vec.c
@@ -176,6 +176,8 @@ struct vc4_vec {
 	struct clk *clock;
 
 	const struct vc4_vec_tv_mode *tv_mode;
+
+	struct debugfs_regset32 regset;
 };
 
 #define VEC_READ(offset) readl(vec->regs + (offset))
@@ -223,59 +225,33 @@ struct vc4_vec_tv_mode {
 	void (*mode_set)(struct vc4_vec *vec);
 };
 
-#define VEC_REG(reg) { reg, #reg }
-static const struct {
-	u32 reg;
-	const char *name;
-} vec_regs[] = {
-	VEC_REG(VEC_WSE_CONTROL),
-	VEC_REG(VEC_WSE_WSS_DATA),
-	VEC_REG(VEC_WSE_VPS_DATA1),
-	VEC_REG(VEC_WSE_VPS_CONTROL),
-	VEC_REG(VEC_REVID),
-	VEC_REG(VEC_CONFIG0),
-	VEC_REG(VEC_SCHPH),
-	VEC_REG(VEC_CLMP0_START),
-	VEC_REG(VEC_CLMP0_END),
-	VEC_REG(VEC_FREQ3_2),
-	VEC_REG(VEC_FREQ1_0),
-	VEC_REG(VEC_CONFIG1),
-	VEC_REG(VEC_CONFIG2),
-	VEC_REG(VEC_INTERRUPT_CONTROL),
-	VEC_REG(VEC_INTERRUPT_STATUS),
-	VEC_REG(VEC_FCW_SECAM_B),
-	VEC_REG(VEC_SECAM_GAIN_VAL),
-	VEC_REG(VEC_CONFIG3),
-	VEC_REG(VEC_STATUS0),
-	VEC_REG(VEC_MASK0),
-	VEC_REG(VEC_CFG),
-	VEC_REG(VEC_DAC_TEST),
-	VEC_REG(VEC_DAC_CONFIG),
-	VEC_REG(VEC_DAC_MISC),
+static const struct debugfs_reg32 vec_regs[] = {
+	VC4_REG32(VEC_WSE_CONTROL),
+	VC4_REG32(VEC_WSE_WSS_DATA),
+	VC4_REG32(VEC_WSE_VPS_DATA1),
+	VC4_REG32(VEC_WSE_VPS_CONTROL),
+	VC4_REG32(VEC_REVID),
+	VC4_REG32(VEC_CONFIG0),
+	VC4_REG32(VEC_SCHPH),
+	VC4_REG32(VEC_CLMP0_START),
+	VC4_REG32(VEC_CLMP0_END),
+	VC4_REG32(VEC_FREQ3_2),
+	VC4_REG32(VEC_FREQ1_0),
+	VC4_REG32(VEC_CONFIG1),
+	VC4_REG32(VEC_CONFIG2),
+	VC4_REG32(VEC_INTERRUPT_CONTROL),
+	VC4_REG32(VEC_INTERRUPT_STATUS),
+	VC4_REG32(VEC_FCW_SECAM_B),
+	VC4_REG32(VEC_SECAM_GAIN_VAL),
+	VC4_REG32(VEC_CONFIG3),
+	VC4_REG32(VEC_STATUS0),
+	VC4_REG32(VEC_MASK0),
+	VC4_REG32(VEC_CFG),
+	VC4_REG32(VEC_DAC_TEST),
+	VC4_REG32(VEC_DAC_CONFIG),
+	VC4_REG32(VEC_DAC_MISC),
 };
 
-#ifdef CONFIG_DEBUG_FS
-int vc4_vec_debugfs_regs(struct seq_file *m, void *unused)
-{
-	struct drm_info_node *node = (struct drm_info_node *)m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	struct vc4_vec *vec = vc4->vec;
-	int i;
-
-	if (!vec)
-		return 0;
-
-	for (i = 0; i < ARRAY_SIZE(vec_regs); i++) {
-		seq_printf(m, "%s (0x%04x): 0x%08x\n",
-			   vec_regs[i].name, vec_regs[i].reg,
-			   VEC_READ(vec_regs[i].reg));
-	}
-
-	return 0;
-}
-#endif
-
 static void vc4_vec_ntsc_mode_set(struct vc4_vec *vec)
 {
 	VEC_WRITE(VEC_CONFIG0, VEC_CONFIG0_NTSC_STD | VEC_CONFIG0_PDEN);
@@ -587,6 +563,9 @@ static int vc4_vec_bind(struct device *dev, struct device *master, void *data)
 	vec->regs = vc4_ioremap_regs(pdev, 0);
 	if (IS_ERR(vec->regs))
 		return PTR_ERR(vec->regs);
+	vec->regset.base = vec->regs;
+	vec->regset.regs = vec_regs;
+	vec->regset.nregs = ARRAY_SIZE(vec_regs);
 
 	vec->clock = devm_clk_get(dev, NULL);
 	if (IS_ERR(vec->clock)) {
@@ -612,6 +591,8 @@ static int vc4_vec_bind(struct device *dev, struct device *master, void *data)
 
 	vc4->vec = vec;
 
+	vc4_debugfs_add_regset32(drm, "vec_regs", &vec->regset);
+
 	return 0;
 
 err_destroy_encoder:
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index 653ec7d0bf4d..86843a4d6102 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -385,5 +385,6 @@ void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev)
 
 	for (i = 0 ; i < vgdev->num_scanouts; ++i)
 		kfree(vgdev->outputs[i].edid);
+	drm_atomic_helper_shutdown(vgdev->ddev);
 	drm_mode_config_cleanup(vgdev->ddev);
 }
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index afbc3beef089..3a4247319e63 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -30,6 +30,7 @@
 #include <linux/printk.h>
 #include <linux/seq_file.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
 
 /**
  * DOC: print
@@ -84,6 +85,7 @@ void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf);
 __printf(2, 3)
 void drm_printf(struct drm_printer *p, const char *f, ...);
 void drm_puts(struct drm_printer *p, const char *str);
+void drm_print_regset32(struct drm_printer *p, struct debugfs_regset32 *regset);
 
 __printf(2, 0)
 /**
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 0311c9fdbd2f..6cf7243a1dc5 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -27,6 +27,7 @@
 #define __DRM_SYNCOBJ_H__
 
 #include <linux/dma-fence.h>
+#include <linux/dma-fence-chain.h>
 
 struct drm_file;
 
@@ -112,6 +113,10 @@ drm_syncobj_fence_get(struct drm_syncobj *syncobj)
 
 struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 				     u32 handle);
+void drm_syncobj_add_point(struct drm_syncobj *syncobj,
+			   struct dma_fence_chain *chain,
+			   struct dma_fence *fence,
+			   uint64_t point);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence);
 int drm_syncobj_find_fence(struct drm_file *file_private,
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
new file mode 100644
index 000000000000..934a442db8ac
--- /dev/null
+++ b/include/linux/dma-fence-chain.h
@@ -0,0 +1,81 @@
+/*
+ * fence-chain: chain fences together in a timeline
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ * Authors:
+ *	Christian König <christian.koenig@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_DMA_FENCE_CHAIN_H
+#define __LINUX_DMA_FENCE_CHAIN_H
+
+#include <linux/dma-fence.h>
+#include <linux/irq_work.h>
+
+/**
+ * struct dma_fence_chain - fence to represent an node of a fence chain
+ * @base: fence base class
+ * @lock: spinlock for fence handling
+ * @prev: previous fence of the chain
+ * @prev_seqno: original previous seqno before garbage collection
+ * @fence: encapsulated fence
+ * @cb: callback structure for signaling
+ * @work: irq work item for signaling
+ */
+struct dma_fence_chain {
+	struct dma_fence base;
+	spinlock_t lock;
+	struct dma_fence __rcu *prev;
+	u64 prev_seqno;
+	struct dma_fence *fence;
+	struct dma_fence_cb cb;
+	struct irq_work work;
+};
+
+extern const struct dma_fence_ops dma_fence_chain_ops;
+
+/**
+ * to_dma_fence_chain - cast a fence to a dma_fence_chain
+ * @fence: fence to cast to a dma_fence_array
+ *
+ * Returns NULL if the fence is not a dma_fence_chain,
+ * or the dma_fence_chain otherwise.
+ */
+static inline struct dma_fence_chain *
+to_dma_fence_chain(struct dma_fence *fence)
+{
+	if (!fence || fence->ops != &dma_fence_chain_ops)
+		return NULL;
+
+	return container_of(fence, struct dma_fence_chain, base);
+}
+
+/**
+ * dma_fence_chain_for_each - iterate over all fences in chain
+ * @iter: current fence
+ * @head: starting point
+ *
+ * Iterate over all fences in the chain. We keep a reference to the current
+ * fence while inside the loop which must be dropped when breaking out.
+ */
+#define dma_fence_chain_for_each(iter, head)	\
+	for (iter = dma_fence_get(head); iter; \
+	     iter = dma_fence_chain_walk(iter))
+
+struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
+int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno);
+void dma_fence_chain_init(struct dma_fence_chain *chain,
+			  struct dma_fence *prev,
+			  struct dma_fence *fence,
+			  uint64_t seqno);
+
+#endif /* __LINUX_DMA_FENCE_CHAIN_H */
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 300f336633f2..236b01a1fabf 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,8 +735,18 @@ struct drm_syncobj_handle {
 	__u32 pad;
 };
 
+struct drm_syncobj_transfer {
+	__u32 src_handle;
+	__u32 dst_handle;
+	__u64 src_point;
+	__u64 dst_point;
+	__u32 flags;
+	__u32 pad;
+};
+
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
 struct drm_syncobj_wait {
 	__u64 handles;
 	/* absolute timeout */
@@ -747,12 +757,33 @@ struct drm_syncobj_wait {
 	__u32 pad;
 };
 
+struct drm_syncobj_timeline_wait {
+	__u64 handles;
+	/* wait on specific timeline point for every handles*/
+	__u64 points;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+};
+
+
 struct drm_syncobj_array {
 	__u64 handles;
 	__u32 count_handles;
 	__u32 pad;
 };
 
+struct drm_syncobj_timeline_array {
+	__u64 handles;
+	__u64 points;
+	__u32 count_handles;
+	__u32 pad;
+};
+
+
 /* Query current scanout sequence number */
 struct drm_crtc_get_sequence {
 	__u32 crtc_id;		/* requested crtc_id */
@@ -909,6 +940,11 @@ extern "C" {
 #define DRM_IOCTL_MODE_GET_LEASE	DRM_IOWR(0xC8, struct drm_mode_get_lease)
 #define DRM_IOCTL_MODE_REVOKE_LEASE	DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
 
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
+#define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+
 /**
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 09d72966899a..83cd1636b9be 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -621,7 +621,8 @@ struct drm_color_ctm {
 
 struct drm_color_lut {
 	/*
-	 * Data is U0.16 fixed point format.
+	 * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
+	 * 0xffff == 1.0.
 	 */
 	__u16 red;
 	__u16 green;
diff --git a/include/uapi/drm/lima_drm.h b/include/uapi/drm/lima_drm.h
new file mode 100644
index 000000000000..95a00fb867e6
--- /dev/null
+++ b/include/uapi/drm/lima_drm.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */
+
+#ifndef __LIMA_DRM_H__
+#define __LIMA_DRM_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+enum drm_lima_param_gpu_id {
+	DRM_LIMA_PARAM_GPU_ID_UNKNOWN,
+	DRM_LIMA_PARAM_GPU_ID_MALI400,
+	DRM_LIMA_PARAM_GPU_ID_MALI450,
+};
+
+enum drm_lima_param {
+	DRM_LIMA_PARAM_GPU_ID,
+	DRM_LIMA_PARAM_NUM_PP,
+	DRM_LIMA_PARAM_GP_VERSION,
+	DRM_LIMA_PARAM_PP_VERSION,
+};
+
+/**
+ * get various information of the GPU
+ */
+struct drm_lima_get_param {
+	__u32 param; /* in, value in enum drm_lima_param */
+	__u32 pad;   /* pad, must be zero */
+	__u64 value; /* out, parameter value */
+};
+
+/**
+ * create a buffer for used by GPU
+ */
+struct drm_lima_gem_create {
+	__u32 size;    /* in, buffer size */
+	__u32 flags;   /* in, currently no flags, must be zero */
+	__u32 handle;  /* out, GEM buffer handle */
+	__u32 pad;     /* pad, must be zero */
+};
+
+/**
+ * get information of a buffer
+ */
+struct drm_lima_gem_info {
+	__u32 handle;  /* in, GEM buffer handle */
+	__u32 va;      /* out, virtual address mapped into GPU MMU */
+	__u64 offset;  /* out, used to mmap this buffer to CPU */
+};
+
+#define LIMA_SUBMIT_BO_READ   0x01
+#define LIMA_SUBMIT_BO_WRITE  0x02
+
+/* buffer information used by one task */
+struct drm_lima_gem_submit_bo {
+	__u32 handle;  /* in, GEM buffer handle */
+	__u32 flags;   /* in, buffer read/write by GPU */
+};
+
+#define LIMA_GP_FRAME_REG_NUM 6
+
+/* frame used to setup GP for each task */
+struct drm_lima_gp_frame {
+	__u32 frame[LIMA_GP_FRAME_REG_NUM];
+};
+
+#define LIMA_PP_FRAME_REG_NUM 23
+#define LIMA_PP_WB_REG_NUM 12
+
+/* frame used to setup mali400 GPU PP for each task */
+struct drm_lima_m400_pp_frame {
+	__u32 frame[LIMA_PP_FRAME_REG_NUM];
+	__u32 num_pp;
+	__u32 wb[3 * LIMA_PP_WB_REG_NUM];
+	__u32 plbu_array_address[4];
+	__u32 fragment_stack_address[4];
+};
+
+/* frame used to setup mali450 GPU PP for each task */
+struct drm_lima_m450_pp_frame {
+	__u32 frame[LIMA_PP_FRAME_REG_NUM];
+	__u32 num_pp;
+	__u32 wb[3 * LIMA_PP_WB_REG_NUM];
+	__u32 use_dlbu;
+	__u32 _pad;
+	union {
+		__u32 plbu_array_address[8];
+		__u32 dlbu_regs[4];
+	};
+	__u32 fragment_stack_address[8];
+};
+
+#define LIMA_PIPE_GP  0x00
+#define LIMA_PIPE_PP  0x01
+
+#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)
+
+/**
+ * submit a task to GPU
+ *
+ * User can always merge multi sync_file and drm_syncobj
+ * into one drm_syncobj as in_sync[0], but we reserve
+ * in_sync[1] for another task's out_sync to avoid the
+ * export/import/merge pass when explicit sync.
+ */
+struct drm_lima_gem_submit {
+	__u32 ctx;         /* in, context handle task is submitted to */
+	__u32 pipe;        /* in, which pipe to use, GP/PP */
+	__u32 nr_bos;      /* in, array length of bos field */
+	__u32 frame_size;  /* in, size of frame field */
+	__u64 bos;         /* in, array of drm_lima_gem_submit_bo */
+	__u64 frame;       /* in, GP/PP frame */
+	__u32 flags;       /* in, submit flags */
+	__u32 out_sync;    /* in, drm_syncobj handle used to wait task finish after submission */
+	__u32 in_sync[2];  /* in, drm_syncobj handle used to wait before start this task */
+};
+
+#define LIMA_GEM_WAIT_READ   0x01
+#define LIMA_GEM_WAIT_WRITE  0x02
+
+/**
+ * wait pending GPU task finish of a buffer
+ */
+struct drm_lima_gem_wait {
+	__u32 handle;      /* in, GEM buffer handle */
+	__u32 op;          /* in, CPU want to read/write this buffer */
+	__s64 timeout_ns;  /* in, wait timeout in absulute time */
+};
+
+/**
+ * create a context
+ */
+struct drm_lima_ctx_create {
+	__u32 id;          /* out, context handle */
+	__u32 _pad;        /* pad, must be zero */
+};
+
+/**
+ * free a context
+ */
+struct drm_lima_ctx_free {
+	__u32 id;          /* in, context handle */
+	__u32 _pad;        /* pad, must be zero */
+};
+
+#define DRM_LIMA_GET_PARAM   0x00
+#define DRM_LIMA_GEM_CREATE  0x01
+#define DRM_LIMA_GEM_INFO    0x02
+#define DRM_LIMA_GEM_SUBMIT  0x03
+#define DRM_LIMA_GEM_WAIT    0x04
+#define DRM_LIMA_CTX_CREATE  0x05
+#define DRM_LIMA_CTX_FREE    0x06
+
+#define DRM_IOCTL_LIMA_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GET_PARAM, struct drm_lima_get_param)
+#define DRM_IOCTL_LIMA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_CREATE, struct drm_lima_gem_create)
+#define DRM_IOCTL_LIMA_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_INFO, struct drm_lima_gem_info)
+#define DRM_IOCTL_LIMA_GEM_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_SUBMIT, struct drm_lima_gem_submit)
+#define DRM_IOCTL_LIMA_GEM_WAIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_WAIT, struct drm_lima_gem_wait)
+#define DRM_IOCTL_LIMA_CTX_CREATE DRM_IOR(DRM_COMMAND_BASE + DRM_LIMA_CTX_CREATE, struct drm_lima_ctx_create)
+#define DRM_IOCTL_LIMA_CTX_FREE DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_CTX_FREE, struct drm_lima_ctx_free)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __LIMA_DRM_H__ */