From ff58a15a502a900c35ff2f20182249b65719d6d7 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 1 Sep 2017 16:49:54 +0200
Subject: drm/vc4: Use correct path to trace include
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The header comment in include/trace/define_trace.h specifies that the
TRACE_INCLUDE_PATH needs to be relative to the define_trace.h header
rather than the trace file including it. Most instances get that wrong
and work around it by adding the $(src) directory to the include path.

While this works, it is preferable to refer to the correct path to the
trace file in the first place and avoid any workaround.

Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170901144954.19620-6-thierry.reding@gmail.com
---
 drivers/gpu/drm/vc4/Makefile    | 2 --
 drivers/gpu/drm/vc4/vc4_trace.h | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index 25bd5d30415d..719a771f3d5c 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -24,5 +24,3 @@ vc4-y := \
 vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
 
 obj-$(CONFIG_DRM_VC4)  += vc4.o
-
-CFLAGS_vc4_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h
index ad7b1ea720c2..deafb32923e1 100644
--- a/drivers/gpu/drm/vc4/vc4_trace.h
+++ b/drivers/gpu/drm/vc4/vc4_trace.h
@@ -59,5 +59,5 @@ TRACE_EVENT(vc4_wait_for_seqno_end,
 
 /* This part must be outside protection */
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/vc4
 #include <trace/define_trace.h>
-- 
cgit v1.2.3


From 5663077a56804890506c913b3ca9fee78764f8b3 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 8 Sep 2017 15:05:04 +0100
Subject: drm/vc4: clean up error handling on devm_kzalloc failure

The current error handling when devm_kzalloc fails performs a
non-null check on connector which is redundant because connector
is null at that failure point.  Once this is removed, make the
failure path into a trivial -ENOMEM return to clean up the
error handling. Also remove need to initialize connector to NULL.

Detected by CoverityScan CID#1339527 ("Logically dead code")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170908140504.1340-1-colin.king@canonical.com
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 937da8dd65b8..fa37a1c07cf6 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -309,16 +309,13 @@ static const struct drm_connector_helper_funcs vc4_hdmi_connector_helper_funcs =
 static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev,
 						     struct drm_encoder *encoder)
 {
-	struct drm_connector *connector = NULL;
+	struct drm_connector *connector;
 	struct vc4_hdmi_connector *hdmi_connector;
-	int ret = 0;
 
 	hdmi_connector = devm_kzalloc(dev->dev, sizeof(*hdmi_connector),
 				      GFP_KERNEL);
-	if (!hdmi_connector) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	if (!hdmi_connector)
+		return ERR_PTR(-ENOMEM);
 	connector = &hdmi_connector->base;
 
 	hdmi_connector->encoder = encoder;
@@ -336,12 +333,6 @@ static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev,
 	drm_mode_connector_attach_encoder(connector, encoder);
 
 	return connector;
-
- fail:
-	if (connector)
-		vc4_hdmi_connector_destroy(connector);
-
-	return ERR_PTR(ret);
 }
 
 static void vc4_hdmi_encoder_destroy(struct drm_encoder *encoder)
-- 
cgit v1.2.3


From af2eca53206c59ce9308a4f5f46c4a104a179b6b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 15 Aug 2017 16:47:19 -0700
Subject: drm/vc4: Avoid using vrefresh==0 mode in DSI htotal math.

The incoming mode might have a missing vrefresh field if it came from
drmModeSetCrtc(), which the kernel is supposed to calculate using
drm_mode_vrefresh().  We could either use that or the adjusted_mode's
original vrefresh value.

However, we can maintain a more exact vrefresh value (not just the
integer approximation), by scaling by the ratio of our clocks.

v2: Use math suggested by Andrzej Hajda instead.
v3: Simplify math now that adjusted_mode->clock isn't padded.
v4: Drop some parens.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170815234722.20700-2-eric@anholt.net
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
---
 drivers/gpu/drm/vc4/vc4_dsi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index d1e0dc908048..04796d7d0fdb 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -866,7 +866,8 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
 	adjusted_mode->clock = pixel_clock_hz / 1000 + 1;
 
 	/* Given the new pixel clock, adjust HFP to keep vrefresh the same. */
-	adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal);
+	adjusted_mode->htotal = adjusted_mode->clock * mode->htotal /
+				mode->clock;
 	adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal;
 	adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal;
 
-- 
cgit v1.2.3


From 32ad958d85b92ee7068aa1de5917777be080cc3c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 15 Aug 2017 16:47:20 -0700
Subject: drm/vc4: Set up the DSI host at pdev probe time, not component bind.

We need the following things to happen in sequence:

DSI host creation
DSI device creation in the panel driver (needs DSI host)
DSI device attach from panel to host.
DSI drm_panel_add()
DSI encoder creation
DSI encoder's DRM panel/bridge attach

Unless we allow device creation while the host isn't up yet, we need
to break the -EPROBE_DEFER deadlock between the panel driver looking
up the host and the host driver looking up the panel.  We can do so by
moving the DSI host creation outside of the component bind loop, and
the panel/bridge lookup/attach into the component bind process.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170815234722.20700-3-eric@anholt.net
Reviewed-by: Archit Taneja <architt@codeaurora.org>
---
 drivers/gpu/drm/vc4/vc4_dsi.c | 97 +++++++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 40 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 04796d7d0fdb..925c726ac694 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -33,6 +33,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_mipi_dsi.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
@@ -504,7 +505,6 @@ struct vc4_dsi {
 	struct mipi_dsi_host dsi_host;
 	struct drm_encoder *encoder;
 	struct drm_bridge *bridge;
-	bool is_panel_bridge;
 
 	void __iomem *regs;
 
@@ -1289,7 +1289,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
 			       struct mipi_dsi_device *device)
 {
 	struct vc4_dsi *dsi = host_to_dsi(host);
-	int ret = 0;
 
 	dsi->lanes = device->lanes;
 	dsi->channel = device->channel;
@@ -1324,34 +1323,12 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
 		return 0;
 	}
 
-	dsi->bridge = of_drm_find_bridge(device->dev.of_node);
-	if (!dsi->bridge) {
-		struct drm_panel *panel =
-			of_drm_find_panel(device->dev.of_node);
-
-		dsi->bridge = drm_panel_bridge_add(panel,
-						   DRM_MODE_CONNECTOR_DSI);
-		if (IS_ERR(dsi->bridge)) {
-			ret = PTR_ERR(dsi->bridge);
-			dsi->bridge = NULL;
-			return ret;
-		}
-		dsi->is_panel_bridge = true;
-	}
-
-	return drm_bridge_attach(dsi->encoder, dsi->bridge, NULL);
+	return 0;
 }
 
 static int vc4_dsi_host_detach(struct mipi_dsi_host *host,
 			       struct mipi_dsi_device *device)
 {
-	struct vc4_dsi *dsi = host_to_dsi(host);
-
-	if (dsi->is_panel_bridge) {
-		drm_panel_bridge_remove(dsi->bridge);
-		dsi->bridge = NULL;
-	}
-
 	return 0;
 }
 
@@ -1493,16 +1470,13 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
-	struct vc4_dsi *dsi;
+	struct vc4_dsi *dsi = dev_get_drvdata(dev);
 	struct vc4_dsi_encoder *vc4_dsi_encoder;
+	struct drm_panel *panel;
 	const struct of_device_id *match;
 	dma_cap_mask_t dma_mask;
 	int ret;
 
-	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
-	if (!dsi)
-		return -ENOMEM;
-
 	match = of_match_device(vc4_dsi_dt_match, dev);
 	if (!match)
 		return -ENODEV;
@@ -1517,7 +1491,6 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	vc4_dsi_encoder->dsi = dsi;
 	dsi->encoder = &vc4_dsi_encoder->base.base;
 
-	dsi->pdev = pdev;
 	dsi->regs = vc4_ioremap_regs(pdev, 0);
 	if (IS_ERR(dsi->regs))
 		return PTR_ERR(dsi->regs);
@@ -1598,6 +1571,18 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 		return ret;
 	}
 
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
+					  &panel, &dsi->bridge);
+	if (ret)
+		return ret;
+
+	if (panel) {
+		dsi->bridge = devm_drm_panel_bridge_add(dev, panel,
+							DRM_MODE_CONNECTOR_DSI);
+		if (IS_ERR(dsi->bridge))
+			return PTR_ERR(dsi->bridge);
+	}
+
 	/* The esc clock rate is supposed to always be 100Mhz. */
 	ret = clk_set_rate(dsi->escape_clock, 100 * 1000000);
 	if (ret) {
@@ -1616,12 +1601,11 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 			 DRM_MODE_ENCODER_DSI, NULL);
 	drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs);
 
-	dsi->dsi_host.ops = &vc4_dsi_host_ops;
-	dsi->dsi_host.dev = dev;
-
-	mipi_dsi_host_register(&dsi->dsi_host);
-
-	dev_set_drvdata(dev, dsi);
+	ret = drm_bridge_attach(dsi->encoder, dsi->bridge, NULL);
+	if (ret) {
+		dev_err(dev, "bridge attach failed: %d\n", ret);
+		return ret;
+	}
 
 	pm_runtime_enable(dev);
 
@@ -1639,8 +1623,6 @@ static void vc4_dsi_unbind(struct device *dev, struct device *master,
 
 	vc4_dsi_encoder_destroy(dsi->encoder);
 
-	mipi_dsi_host_unregister(&dsi->dsi_host);
-
 	if (dsi->port == 1)
 		vc4->dsi1 = NULL;
 }
@@ -1652,12 +1634,47 @@ static const struct component_ops vc4_dsi_ops = {
 
 static int vc4_dsi_dev_probe(struct platform_device *pdev)
 {
-	return component_add(&pdev->dev, &vc4_dsi_ops);
+	struct device *dev = &pdev->dev;
+	struct vc4_dsi *dsi;
+	int ret;
+
+	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
+	if (!dsi)
+		return -ENOMEM;
+	dev_set_drvdata(dev, dsi);
+
+	dsi->pdev = pdev;
+
+	/* Note, the initialization sequence for DSI and panels is
+	 * tricky.  The component bind above won't get past its
+	 * -EPROBE_DEFER until the panel/bridge probes.  The
+	 * panel/bridge will return -EPROBE_DEFER until it has a
+	 * mipi_dsi_host to register its device to.  So, we register
+	 * the host during pdev probe time, so vc4 as a whole can then
+	 * -EPROBE_DEFER its component bind process until the panel
+	 * successfully attaches.
+	 */
+	dsi->dsi_host.ops = &vc4_dsi_host_ops;
+	dsi->dsi_host.dev = dev;
+	mipi_dsi_host_register(&dsi->dsi_host);
+
+	ret = component_add(&pdev->dev, &vc4_dsi_ops);
+	if (ret) {
+		mipi_dsi_host_unregister(&dsi->dsi_host);
+		return ret;
+	}
+
+	return 0;
 }
 
 static int vc4_dsi_dev_remove(struct platform_device *pdev)
 {
+	struct device *dev = &pdev->dev;
+	struct vc4_dsi *dsi = dev_get_drvdata(dev);
+
 	component_del(&pdev->dev, &vc4_dsi_ops);
+	mipi_dsi_host_unregister(&dsi->dsi_host);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8f6b06c11ca4ff0160df54b53299bf94bfd05645 Mon Sep 17 00:00:00 2001
From: "benjamin.gaignard@linaro.org" <benjamin.gaignard@linaro.org>
Date: Mon, 2 Oct 2017 11:34:47 +0200
Subject: drm/vc4: remove bridge from driver internal structure

With a call to drm_of_panel_bridge_remove() we could remove
the bridge without store it in vc4_dpi internal driver structure.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/1506936888-23844-5-git-send-email-benjamin.gaignard@linaro.org
---
 drivers/gpu/drm/vc4/vc4_dpi.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index 519cefef800d..72c9dbd81d7f 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -97,8 +97,6 @@ struct vc4_dpi {
 
 	struct drm_encoder *encoder;
 	struct drm_connector *connector;
-	struct drm_bridge *bridge;
-	bool is_panel_bridge;
 
 	void __iomem *regs;
 
@@ -251,10 +249,11 @@ static int vc4_dpi_init_bridge(struct vc4_dpi *dpi)
 {
 	struct device *dev = &dpi->pdev->dev;
 	struct drm_panel *panel;
+	struct drm_bridge *bridge;
 	int ret;
 
 	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
-					  &panel, &dpi->bridge);
+					  &panel, &bridge);
 	if (ret) {
 		/* If nothing was connected in the DT, that's not an
 		 * error.
@@ -265,13 +264,10 @@ static int vc4_dpi_init_bridge(struct vc4_dpi *dpi)
 			return ret;
 	}
 
-	if (panel) {
-		dpi->bridge = drm_panel_bridge_add(panel,
-						   DRM_MODE_CONNECTOR_DPI);
-		dpi->is_panel_bridge = true;
-	}
+	if (panel)
+		bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DPI);
 
-	return drm_bridge_attach(dpi->encoder, dpi->bridge, NULL);
+	return drm_bridge_attach(dpi->encoder, bridge, NULL);
 }
 
 static int vc4_dpi_bind(struct device *dev, struct device *master, void *data)
@@ -352,8 +348,7 @@ static void vc4_dpi_unbind(struct device *dev, struct device *master,
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
 	struct vc4_dpi *dpi = dev_get_drvdata(dev);
 
-	if (dpi->is_panel_bridge)
-		drm_panel_bridge_remove(dpi->bridge);
+	drm_of_panel_bridge_remove(dev->of_node, 0, 0);
 
 	drm_encoder_cleanup(dpi->encoder);
 
-- 
cgit v1.2.3


From d409eeafa9ba1c0f2eb75a2619fc787808a545e4 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 15 Aug 2017 16:47:18 -0700
Subject: drm/vc4: Move the DSI clock divider workaround closer to the clock
 call.

We want the adjusted_mode->clock to be the actual clock we're
expecting to program, so that consumers see the right values for clock
and vrefresh.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20170815234722.20700-1-eric@anholt.net
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/gpu/drm/vc4/vc4_dsi.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 925c726ac694..554605af344e 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -859,11 +859,7 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
 	pll_clock = parent_rate / divider;
 	pixel_clock_hz = pll_clock / dsi->divider;
 
-	/* Round up the clk_set_rate() request slightly, since
-	 * PLLD_DSI1 is an integer divider and its rate selection will
-	 * never round up.
-	 */
-	adjusted_mode->clock = pixel_clock_hz / 1000 + 1;
+	adjusted_mode->clock = pixel_clock_hz / 1000;
 
 	/* Given the new pixel clock, adjust HFP to keep vrefresh the same. */
 	adjusted_mode->htotal = adjusted_mode->clock * mode->htotal /
@@ -901,7 +897,11 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
 		vc4_dsi_dump_regs(dsi);
 	}
 
-	phy_clock = pixel_clock_hz * dsi->divider;
+	/* Round up the clk_set_rate() request slightly, since
+	 * PLLD_DSI1 is an integer divider and its rate selection will
+	 * never round up.
+	 */
+	phy_clock = (pixel_clock_hz + 1000) * dsi->divider;
 	ret = clk_set_rate(dsi->pll_phy_clock, phy_clock);
 	if (ret) {
 		dev_err(&dsi->pdev->dev,
-- 
cgit v1.2.3


From 652badb9458b41a24b156146a73a5bfbc4356f29 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 27 Sep 2017 12:32:09 -0700
Subject: drm/vc4: Fix pitch setup for T-format scanout.

The documentation said to use src_w here, and I didn't consider that
we actually needed to be using pitch somewhere in our setup.  Fixes
scanout on my DSI panel when X11 does initial setup with 1920x1080
HDMI and 800x480 DSI both at 0,0 of the same framebuffer.

v2: Add some comments requested by Boris

Signed-off-by: Eric Anholt <eric@anholt.net>
Fixes: 98830d91da08 ("drm/vc4: Add T-format scanout support.")
Link: https://patchwork.freedesktop.org/patch/msgid/20170927193209.11870-1-eric@anholt.net
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 2968b3ebb895..3a767a038f72 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -547,14 +547,24 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 		tiling = SCALER_CTL0_TILING_LINEAR;
 		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
 		break;
-	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
+
+	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
+		/* For T-tiled, the FB pitch is "how many bytes from
+		 * one row to the next, such that pitch * tile_h ==
+		 * tile_size * tiles_per_row."
+		 */
+		u32 tile_size_shift = 12; /* T tiles are 4kb */
+		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
+		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
+
 		tiling = SCALER_CTL0_TILING_256B_OR_T;
 
-		pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET),
-			  VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L),
-			  VC4_SET_FIELD((vc4_state->src_w[0] + 31) >> 5,
-					SCALER_PITCH0_TILE_WIDTH_R));
+		pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET) |
+			  VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L) |
+			  VC4_SET_FIELD(tiles_w, SCALER_PITCH0_TILE_WIDTH_R));
 		break;
+	}
+
 	default:
 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
 			      (long long)fb->modifier);
-- 
cgit v1.2.3


From b9f19259b84dc648f207a46f3581d15eeaedf4b6 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 19 Oct 2017 14:57:48 +0200
Subject: drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl

This ioctl will allow us to purge inactive userspace buffers when the
system is running out of contiguous memory.

For now, the purge logic is rather dumb in that it does not try to
release only the amount of BO needed to meet the last CMA alloc request
but instead purges all objects placed in the purgeable pool as soon as
we experience a CMA allocation failure.

Note that the in-kernel BO cache is always purged before the purgeable
cache because those objects are known to be unused while objects marked
as purgeable by a userspace application/library might have to be
restored when they are marked back as unpurgeable, which can be
expensive.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20171019125748.3152-1-boris.brezillon@free-electrons.com
---
 drivers/gpu/drm/vc4/vc4_bo.c    | 287 ++++++++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/vc4/vc4_drv.c   |  10 +-
 drivers/gpu/drm/vc4/vc4_drv.h   |  30 +++++
 drivers/gpu/drm/vc4/vc4_gem.c   | 156 +++++++++++++++++++++-
 drivers/gpu/drm/vc4/vc4_plane.c |  20 +++
 include/uapi/drm/vc4_drm.h      |  19 +++
 6 files changed, 507 insertions(+), 15 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 3afdbf4bc10b..01a53ba304f8 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -53,6 +53,17 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4)
 			 vc4->bo_labels[i].size_allocated / 1024,
 			 vc4->bo_labels[i].num_allocated);
 	}
+
+	mutex_lock(&vc4->purgeable.lock);
+	if (vc4->purgeable.num)
+		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
+			 vc4->purgeable.size / 1024, vc4->purgeable.num);
+
+	if (vc4->purgeable.purged_num)
+		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO",
+			 vc4->purgeable.purged_size / 1024,
+			 vc4->purgeable.purged_num);
+	mutex_unlock(&vc4->purgeable.lock);
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -75,6 +86,17 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
 	}
 	mutex_unlock(&vc4->bo_lock);
 
+	mutex_lock(&vc4->purgeable.lock);
+	if (vc4->purgeable.num)
+		seq_printf(m, "%30s: %6dkb BOs (%d)\n", "userspace BO cache",
+			   vc4->purgeable.size / 1024, vc4->purgeable.num);
+
+	if (vc4->purgeable.purged_num)
+		seq_printf(m, "%30s: %6dkb BOs (%d)\n", "total purged BO",
+			   vc4->purgeable.purged_size / 1024,
+			   vc4->purgeable.purged_num);
+	mutex_unlock(&vc4->purgeable.lock);
+
 	return 0;
 }
 #endif
@@ -247,6 +269,109 @@ static void vc4_bo_cache_purge(struct drm_device *dev)
 	mutex_unlock(&vc4->bo_lock);
 }
 
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	list_add_tail(&bo->size_head, &vc4->purgeable.list);
+	vc4->purgeable.num++;
+	vc4->purgeable.size += bo->base.base.size;
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	/* list_del_init() is used here because the caller might release
+	 * the purgeable lock in order to acquire the madv one and update the
+	 * madv status.
+	 * During this short period of time a user might decide to mark
+	 * the BO as unpurgeable, and if bo->madv is set to
+	 * VC4_MADV_DONTNEED it will try to remove the BO from the
+	 * purgeable list which will fail if the ->next/prev fields
+	 * are set to LIST_POISON1/LIST_POISON2 (which is what
+	 * list_del() does).
+	 * Re-initializing the list element guarantees that list_del()
+	 * will work correctly even if it's a NOP.
+	 */
+	list_del_init(&bo->size_head);
+	vc4->purgeable.num--;
+	vc4->purgeable.size -= bo->base.base.size;
+}
+
+void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	vc4_bo_remove_from_purgeable_pool_locked(bo);
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_purge(struct drm_gem_object *obj)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+	struct drm_device *dev = obj->dev;
+
+	WARN_ON(!mutex_is_locked(&bo->madv_lock));
+	WARN_ON(bo->madv != VC4_MADV_DONTNEED);
+
+	drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
+
+	dma_free_wc(dev->dev, obj->size, bo->base.vaddr, bo->base.paddr);
+	bo->base.vaddr = NULL;
+	bo->madv = __VC4_MADV_PURGED;
+}
+
+static void vc4_bo_userspace_cache_purge(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	while (!list_empty(&vc4->purgeable.list)) {
+		struct vc4_bo *bo = list_first_entry(&vc4->purgeable.list,
+						     struct vc4_bo, size_head);
+		struct drm_gem_object *obj = &bo->base.base;
+		size_t purged_size = 0;
+
+		vc4_bo_remove_from_purgeable_pool_locked(bo);
+
+		/* Release the purgeable lock while we're purging the BO so
+		 * that other people can continue inserting things in the
+		 * purgeable pool without having to wait for all BOs to be
+		 * purged.
+		 */
+		mutex_unlock(&vc4->purgeable.lock);
+		mutex_lock(&bo->madv_lock);
+
+		/* Since we released the purgeable pool lock before acquiring
+		 * the BO madv one, the user may have marked the BO as WILLNEED
+		 * and re-used it in the meantime.
+		 * Before purging the BO we need to make sure
+		 * - it is still marked as DONTNEED
+		 * - it has not been re-inserted in the purgeable list
+		 * - it is not used by HW blocks
+		 * If one of these conditions is not met, just skip the entry.
+		 */
+		if (bo->madv == VC4_MADV_DONTNEED &&
+		    list_empty(&bo->size_head) &&
+		    !refcount_read(&bo->usecnt)) {
+			purged_size = bo->base.base.size;
+			vc4_bo_purge(obj);
+		}
+		mutex_unlock(&bo->madv_lock);
+		mutex_lock(&vc4->purgeable.lock);
+
+		if (purged_size) {
+			vc4->purgeable.purged_size += purged_size;
+			vc4->purgeable.purged_num++;
+		}
+	}
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
 static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
 					    uint32_t size,
 					    enum vc4_kernel_bo_type type)
@@ -293,6 +418,9 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
 	if (!bo)
 		return ERR_PTR(-ENOMEM);
 
+	bo->madv = VC4_MADV_WILLNEED;
+	refcount_set(&bo->usecnt, 0);
+	mutex_init(&bo->madv_lock);
 	mutex_lock(&vc4->bo_lock);
 	bo->label = VC4_BO_TYPE_KERNEL;
 	vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++;
@@ -330,16 +458,38 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
 		 * CMA allocations we've got laying around and try again.
 		 */
 		vc4_bo_cache_purge(dev);
+		cma_obj = drm_gem_cma_create(dev, size);
+	}
 
+	if (IS_ERR(cma_obj)) {
+		/*
+		 * Still not enough CMA memory, purge the userspace BO
+		 * cache and retry.
+		 * This is sub-optimal since we purge the whole userspace
+		 * BO cache which forces user that want to re-use the BO to
+		 * restore its initial content.
+		 * Ideally, we should purge entries one by one and retry
+		 * after each to see if CMA allocation succeeds. Or even
+		 * better, try to find an entry with at least the same
+		 * size.
+		 */
+		vc4_bo_userspace_cache_purge(dev);
 		cma_obj = drm_gem_cma_create(dev, size);
-		if (IS_ERR(cma_obj)) {
-			DRM_ERROR("Failed to allocate from CMA:\n");
-			vc4_bo_stats_dump(vc4);
-			return ERR_PTR(-ENOMEM);
-		}
+	}
+
+	if (IS_ERR(cma_obj)) {
+		DRM_ERROR("Failed to allocate from CMA:\n");
+		vc4_bo_stats_dump(vc4);
+		return ERR_PTR(-ENOMEM);
 	}
 	bo = to_vc4_bo(&cma_obj->base);
 
+	/* By default, BOs do not support the MADV ioctl. This will be enabled
+	 * only on BOs that are exposed to userspace (V3D, V3D_SHADER and DUMB
+	 * BOs).
+	 */
+	bo->madv = __VC4_MADV_NOTSUPP;
+
 	mutex_lock(&vc4->bo_lock);
 	vc4_bo_set_label(&cma_obj->base, type);
 	mutex_unlock(&vc4->bo_lock);
@@ -365,6 +515,8 @@ int vc4_dumb_create(struct drm_file *file_priv,
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
 	drm_gem_object_put_unlocked(&bo->base.base);
 
@@ -403,6 +555,12 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
 	struct vc4_bo *bo = to_vc4_bo(gem_bo);
 	struct list_head *cache_list;
 
+	/* Remove the BO from the purgeable list. */
+	mutex_lock(&bo->madv_lock);
+	if (bo->madv == VC4_MADV_DONTNEED && !refcount_read(&bo->usecnt))
+		vc4_bo_remove_from_purgeable_pool(bo);
+	mutex_unlock(&bo->madv_lock);
+
 	mutex_lock(&vc4->bo_lock);
 	/* If the object references someone else's memory, we can't cache it.
 	 */
@@ -418,7 +576,8 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
 	}
 
 	/* If this object was partially constructed but CMA allocation
-	 * had failed, just free it.
+	 * had failed, just free it. Can also happen when the BO has been
+	 * purged.
 	 */
 	if (!bo->base.vaddr) {
 		vc4_bo_destroy(bo);
@@ -437,6 +596,10 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
 		bo->validated_shader = NULL;
 	}
 
+	/* Reset madv and usecnt before adding the BO to the cache. */
+	bo->madv = __VC4_MADV_NOTSUPP;
+	refcount_set(&bo->usecnt, 0);
+
 	bo->t_format = false;
 	bo->free_time = jiffies;
 	list_add(&bo->size_head, cache_list);
@@ -461,6 +624,56 @@ static void vc4_bo_cache_time_work(struct work_struct *work)
 	mutex_unlock(&vc4->bo_lock);
 }
 
+int vc4_bo_inc_usecnt(struct vc4_bo *bo)
+{
+	int ret;
+
+	/* Fast path: if the BO is already retained by someone, no need to
+	 * check the madv status.
+	 */
+	if (refcount_inc_not_zero(&bo->usecnt))
+		return 0;
+
+	mutex_lock(&bo->madv_lock);
+	switch (bo->madv) {
+	case VC4_MADV_WILLNEED:
+		refcount_inc(&bo->usecnt);
+		ret = 0;
+		break;
+	case VC4_MADV_DONTNEED:
+		/* We shouldn't use a BO marked as purgeable if at least
+		 * someone else retained its content by incrementing usecnt.
+		 * Luckily the BO hasn't been purged yet, but something wrong
+		 * is happening here. Just throw an error instead of
+		 * authorizing this use case.
+		 */
+	case __VC4_MADV_PURGED:
+		/* We can't use a purged BO. */
+	default:
+		/* Invalid madv value. */
+		ret = -EINVAL;
+		break;
+	}
+	mutex_unlock(&bo->madv_lock);
+
+	return ret;
+}
+
+void vc4_bo_dec_usecnt(struct vc4_bo *bo)
+{
+	/* Fast path: if the BO is still retained by someone, no need to test
+	 * the madv value.
+	 */
+	if (refcount_dec_not_one(&bo->usecnt))
+		return;
+
+	mutex_lock(&bo->madv_lock);
+	if (refcount_dec_and_test(&bo->usecnt) &&
+	    bo->madv == VC4_MADV_DONTNEED)
+		vc4_bo_add_to_purgeable_pool(bo);
+	mutex_unlock(&bo->madv_lock);
+}
+
 static void vc4_bo_cache_time_timer(unsigned long data)
 {
 	struct drm_device *dev = (struct drm_device *)data;
@@ -480,18 +693,52 @@ struct dma_buf *
 vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
 {
 	struct vc4_bo *bo = to_vc4_bo(obj);
+	struct dma_buf *dmabuf;
+	int ret;
 
 	if (bo->validated_shader) {
 		DRM_DEBUG("Attempting to export shader BO\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return drm_gem_prime_export(dev, obj, flags);
+	/* Note: as soon as the BO is exported it becomes unpurgeable, because
+	 * noone ever decrements the usecnt even if the reference held by the
+	 * exported BO is released. This shouldn't be a problem since we don't
+	 * expect exported BOs to be marked as purgeable.
+	 */
+	ret = vc4_bo_inc_usecnt(bo);
+	if (ret) {
+		DRM_ERROR("Failed to increment BO usecnt\n");
+		return ERR_PTR(ret);
+	}
+
+	dmabuf = drm_gem_prime_export(dev, obj, flags);
+	if (IS_ERR(dmabuf))
+		vc4_bo_dec_usecnt(bo);
+
+	return dmabuf;
+}
+
+int vc4_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	/* The only reason we would end up here is when user-space accesses
+	 * BO's memory after it's been purged.
+	 */
+	mutex_lock(&bo->madv_lock);
+	WARN_ON(bo->madv != __VC4_MADV_PURGED);
+	mutex_unlock(&bo->madv_lock);
+
+	return VM_FAULT_SIGBUS;
 }
 
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_gem_object *gem_obj;
+	unsigned long vm_pgoff;
 	struct vc4_bo *bo;
 	int ret;
 
@@ -507,16 +754,36 @@ int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
+	if (bo->madv != VC4_MADV_WILLNEED) {
+		DRM_DEBUG("mmaping of %s BO not allowed\n",
+			  bo->madv == VC4_MADV_DONTNEED ?
+			  "purgeable" : "purged");
+		return -EINVAL;
+	}
+
 	/*
 	 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
 	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
 	 * the whole buffer.
 	 */
 	vma->vm_flags &= ~VM_PFNMAP;
-	vma->vm_pgoff = 0;
 
+	/* This ->vm_pgoff dance is needed to make all parties happy:
+	 * - dma_mmap_wc() uses ->vm_pgoff as an offset within the allocated
+	 *   mem-region, hence the need to set it to zero (the value set by
+	 *   the DRM core is a virtual offset encoding the GEM object-id)
+	 * - the mmap() core logic needs ->vm_pgoff to be restored to its
+	 *   initial value before returning from this function because it
+	 *   encodes the  offset of this GEM in the dev->anon_inode pseudo-file
+	 *   and this information will be used when we invalidate userspace
+	 *   mappings  with drm_vma_node_unmap() (called from vc4_gem_purge()).
+	 */
+	vm_pgoff = vma->vm_pgoff;
+	vma->vm_pgoff = 0;
 	ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
 			  bo->base.paddr, vma->vm_end - vma->vm_start);
+	vma->vm_pgoff = vm_pgoff;
+
 	if (ret)
 		drm_gem_vm_close(vma);
 
@@ -580,6 +847,8 @@ int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
 	drm_gem_object_put_unlocked(&bo->base.base);
 
@@ -633,6 +902,8 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	if (copy_from_user(bo->base.vaddr,
 			     (void __user *)(uintptr_t)args->data,
 			     args->size)) {
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 1c96edcb302b..e3c29729da2e 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -100,6 +100,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_VC4_PARAM_SUPPORTS_ETC1:
 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
 	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
+	case DRM_VC4_PARAM_SUPPORTS_MADVISE:
 		args->value = true;
 		break;
 	default:
@@ -117,6 +118,12 @@ static void vc4_lastclose(struct drm_device *dev)
 	drm_fbdev_cma_restore_mode(vc4->fbdev);
 }
 
+static const struct vm_operations_struct vc4_vm_ops = {
+	.fault = vc4_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
 static const struct file_operations vc4_drm_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
@@ -142,6 +149,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver vc4_drm_driver = {
@@ -166,7 +174,7 @@ static struct drm_driver vc4_drm_driver = {
 
 	.gem_create_object = vc4_create_object,
 	.gem_free_object_unlocked = vc4_free_object,
-	.gem_vm_ops = &drm_gem_cma_vm_ops,
+	.gem_vm_ops = &vc4_vm_ops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 87f2d8e5c134..9c0d380c96f2 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -74,6 +74,19 @@ struct vc4_dev {
 	/* Protects bo_cache and bo_labels. */
 	struct mutex bo_lock;
 
+	/* Purgeable BO pool. All BOs in this pool can have their memory
+	 * reclaimed if the driver is unable to allocate new BOs. We also
+	 * keep stats related to the purge mechanism here.
+	 */
+	struct {
+		struct list_head list;
+		unsigned int num;
+		size_t size;
+		unsigned int purged_num;
+		size_t purged_size;
+		struct mutex lock;
+	} purgeable;
+
 	uint64_t dma_fence_context;
 
 	/* Sequence number for the last job queued in bin_job_list.
@@ -192,6 +205,16 @@ struct vc4_bo {
 	 * for user-allocated labels.
 	 */
 	int label;
+
+	/* Count the number of active users. This is needed to determine
+	 * whether we can move the BO to the purgeable list or not (when the BO
+	 * is used by the GPU or the display engine we can't purge it).
+	 */
+	refcount_t usecnt;
+
+	/* Store purgeable/purged state here */
+	u32 madv;
+	struct mutex madv_lock;
 };
 
 static inline struct vc4_bo *
@@ -503,6 +526,7 @@ int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
 int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
+int vc4_fault(struct vm_fault *vmf);
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
 struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
@@ -513,6 +537,10 @@ void *vc4_prime_vmap(struct drm_gem_object *obj);
 int vc4_bo_cache_init(struct drm_device *dev);
 void vc4_bo_cache_destroy(struct drm_device *dev);
 int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
+int vc4_bo_inc_usecnt(struct vc4_bo *bo);
+void vc4_bo_dec_usecnt(struct vc4_bo *bo);
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo);
+void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo);
 
 /* vc4_crtc.c */
 extern struct platform_driver vc4_crtc_driver;
@@ -557,6 +585,8 @@ void vc4_job_handle_completed(struct vc4_dev *vc4);
 int vc4_queue_seqno_cb(struct drm_device *dev,
 		       struct vc4_seqno_cb *cb, uint64_t seqno,
 		       void (*func)(struct vc4_seqno_cb *cb));
+int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv);
 
 /* vc4_hdmi.c */
 extern struct platform_driver vc4_hdmi_driver;
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index d0c6bfb68c4e..e00ac2f3a264 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -188,11 +188,22 @@ vc4_save_hang_state(struct drm_device *dev)
 			continue;
 
 		for (j = 0; j < exec[i]->bo_count; j++) {
+			bo = to_vc4_bo(&exec[i]->bo[j]->base);
+
+			/* Retain BOs just in case they were marked purgeable.
+			 * This prevents the BO from being purged before
+			 * someone had a chance to dump the hang state.
+			 */
+			WARN_ON(!refcount_read(&bo->usecnt));
+			refcount_inc(&bo->usecnt);
 			drm_gem_object_get(&exec[i]->bo[j]->base);
 			kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base;
 		}
 
 		list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
+			/* No need to retain BOs coming from the ->unref_list
+			 * because they are naturally unpurgeable.
+			 */
 			drm_gem_object_get(&bo->base.base);
 			kernel_state->bo[j + prev_idx] = &bo->base.base;
 			j++;
@@ -233,6 +244,26 @@ vc4_save_hang_state(struct drm_device *dev)
 	state->fdbgs = V3D_READ(V3D_FDBGS);
 	state->errstat = V3D_READ(V3D_ERRSTAT);
 
+	/* We need to turn purgeable BOs into unpurgeable ones so that
+	 * userspace has a chance to dump the hang state before the kernel
+	 * decides to purge those BOs.
+	 * Note that BO consistency at dump time cannot be guaranteed. For
+	 * example, if the owner of these BOs decides to re-use them or mark
+	 * them purgeable again there's nothing we can do to prevent it.
+	 */
+	for (i = 0; i < kernel_state->user_state.bo_count; i++) {
+		struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
+
+		if (bo->madv == __VC4_MADV_NOTSUPP)
+			continue;
+
+		mutex_lock(&bo->madv_lock);
+		if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
+			bo->madv = VC4_MADV_WILLNEED;
+		refcount_dec(&bo->usecnt);
+		mutex_unlock(&bo->madv_lock);
+	}
+
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
 	if (vc4->hang_state) {
 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
@@ -639,9 +670,6 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
  * The command validator needs to reference BOs by their index within
  * the submitted job's BO list.  This does the validation of the job's
  * BO list and reference counting for the lifetime of the job.
- *
- * Note that this function doesn't need to unreference the BOs on
- * failure, because that will happen at vc4_complete_exec() time.
  */
 static int
 vc4_cl_lookup_bos(struct drm_device *dev,
@@ -693,16 +721,47 @@ vc4_cl_lookup_bos(struct drm_device *dev,
 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
 				  i, handles[i]);
 			ret = -EINVAL;
-			spin_unlock(&file_priv->table_lock);
-			goto fail;
+			break;
 		}
+
 		drm_gem_object_get(bo);
 		exec->bo[i] = (struct drm_gem_cma_object *)bo;
 	}
 	spin_unlock(&file_priv->table_lock);
 
+	if (ret)
+		goto fail_put_bo;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
+		if (ret)
+			goto fail_dec_usecnt;
+	}
+
+	kvfree(handles);
+	return 0;
+
+fail_dec_usecnt:
+	/* Decrease usecnt on acquired objects.
+	 * We cannot rely on  vc4_complete_exec() to release resources here,
+	 * because vc4_complete_exec() has no information about which BO has
+	 * had its ->usecnt incremented.
+	 * To make things easier we just free everything explicitly and set
+	 * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
+	 * step.
+	 */
+	for (i-- ; i >= 0; i--)
+		vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
+
+fail_put_bo:
+	/* Release any reference to acquired objects. */
+	for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
+		drm_gem_object_put_unlocked(&exec->bo[i]->base);
+
 fail:
 	kvfree(handles);
+	kvfree(exec->bo);
+	exec->bo = NULL;
 	return ret;
 }
 
@@ -833,8 +892,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 		dma_fence_signal(exec->fence);
 
 	if (exec->bo) {
-		for (i = 0; i < exec->bo_count; i++)
+		for (i = 0; i < exec->bo_count; i++) {
+			struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
+
+			vc4_bo_dec_usecnt(bo);
 			drm_gem_object_put_unlocked(&exec->bo[i]->base);
+		}
 		kvfree(exec->bo);
 	}
 
@@ -1098,6 +1161,9 @@ vc4_gem_init(struct drm_device *dev)
 	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
 
 	mutex_init(&vc4->power_lock);
+
+	INIT_LIST_HEAD(&vc4->purgeable.list);
+	mutex_init(&vc4->purgeable.lock);
 }
 
 void
@@ -1121,3 +1187,81 @@ vc4_gem_destroy(struct drm_device *dev)
 	if (vc4->hang_state)
 		vc4_free_hang_state(dev, vc4->hang_state);
 }
+
+int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct drm_vc4_gem_madvise *args = data;
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+	int ret;
+
+	switch (args->madv) {
+	case VC4_MADV_DONTNEED:
+	case VC4_MADV_WILLNEED:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+
+	bo = to_vc4_bo(gem_obj);
+
+	/* Only BOs exposed to userspace can be purged. */
+	if (bo->madv == __VC4_MADV_NOTSUPP) {
+		DRM_DEBUG("madvise not supported on this BO\n");
+		ret = -EINVAL;
+		goto out_put_gem;
+	}
+
+	/* Not sure it's safe to purge imported BOs. Let's just assume it's
+	 * not until proven otherwise.
+	 */
+	if (gem_obj->import_attach) {
+		DRM_DEBUG("madvise not supported on imported BOs\n");
+		ret = -EINVAL;
+		goto out_put_gem;
+	}
+
+	mutex_lock(&bo->madv_lock);
+
+	if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
+	    !refcount_read(&bo->usecnt)) {
+		/* If the BO is about to be marked as purgeable, is not used
+		 * and is not already purgeable or purged, add it to the
+		 * purgeable list.
+		 */
+		vc4_bo_add_to_purgeable_pool(bo);
+	} else if (args->madv == VC4_MADV_WILLNEED &&
+		   bo->madv == VC4_MADV_DONTNEED &&
+		   !refcount_read(&bo->usecnt)) {
+		/* The BO has not been purged yet, just remove it from
+		 * the purgeable list.
+		 */
+		vc4_bo_remove_from_purgeable_pool(bo);
+	}
+
+	/* Save the purged state. */
+	args->retained = bo->madv != __VC4_MADV_PURGED;
+
+	/* Update internal madv state only if the bo was not purged. */
+	if (bo->madv != __VC4_MADV_PURGED)
+		bo->madv = args->madv;
+
+	mutex_unlock(&bo->madv_lock);
+
+	ret = 0;
+
+out_put_gem:
+	drm_gem_object_put_unlocked(gem_obj);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 3a767a038f72..423a23ed8fc2 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -23,6 +23,7 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_plane_helper.h>
 
+#include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -774,21 +775,40 @@ static int vc4_prepare_fb(struct drm_plane *plane,
 {
 	struct vc4_bo *bo;
 	struct dma_fence *fence;
+	int ret;
 
 	if ((plane->state->fb == state->fb) || !state->fb)
 		return 0;
 
 	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
+
+	ret = vc4_bo_inc_usecnt(bo);
+	if (ret)
+		return ret;
+
 	fence = reservation_object_get_excl_rcu(bo->resv);
 	drm_atomic_set_fence_for_plane(state, fence);
 
 	return 0;
 }
 
+static void vc4_cleanup_fb(struct drm_plane *plane,
+			   struct drm_plane_state *state)
+{
+	struct vc4_bo *bo;
+
+	if (plane->state->fb == state->fb || !state->fb)
+		return;
+
+	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
+	vc4_bo_dec_usecnt(bo);
+}
+
 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
 	.atomic_check = vc4_plane_atomic_check,
 	.atomic_update = vc4_plane_atomic_update,
 	.prepare_fb = vc4_prepare_fb,
+	.cleanup_fb = vc4_cleanup_fb,
 };
 
 static void vc4_plane_destroy(struct drm_plane *plane)
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index afae87004963..52263b575bdc 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -41,6 +41,7 @@ extern "C" {
 #define DRM_VC4_SET_TILING                        0x08
 #define DRM_VC4_GET_TILING                        0x09
 #define DRM_VC4_LABEL_BO                          0x0a
+#define DRM_VC4_GEM_MADVISE                       0x0b
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -53,6 +54,7 @@ extern "C" {
 #define DRM_IOCTL_VC4_SET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
 #define DRM_IOCTL_VC4_GET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
 #define DRM_IOCTL_VC4_LABEL_BO            DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
+#define DRM_IOCTL_VC4_GEM_MADVISE         DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise)
 
 struct drm_vc4_submit_rcl_surface {
 	__u32 hindex; /* Handle index, or ~0 if not present. */
@@ -305,6 +307,7 @@ struct drm_vc4_get_hang_state {
 #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
 #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
+#define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
 
 struct drm_vc4_get_param {
 	__u32 param;
@@ -333,6 +336,22 @@ struct drm_vc4_label_bo {
 	__u64 name;
 };
 
+/*
+ * States prefixed with '__' are internal states and cannot be passed to the
+ * DRM_IOCTL_VC4_GEM_MADVISE ioctl.
+ */
+#define VC4_MADV_WILLNEED			0
+#define VC4_MADV_DONTNEED			1
+#define __VC4_MADV_PURGED			2
+#define __VC4_MADV_NOTSUPP			3
+
+struct drm_vc4_gem_madvise {
+	__u32 handle;
+	__u32 madv;
+	__u32 retained;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From af0c8c10564aac5b6d67308129ec09c4ad5db476 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 13 Oct 2017 17:12:55 -0700
Subject: drm/vc4: Fix sleeps during the IRQ handler for DSI transactions.

VC4's DSI1 has a bug where the AXI connection is broken for 32-bit
writes from the CPU, so we use the DMA engine to DMA 32-bit values
into registers instead.  That sleeps, so we can't do it from the top
half.

As a solution, use an interrupt thread so that all our writes happen
when sleeping is is allowed.

v2: Use IRQF_ONESHOT (suggested by Boris)
v3: Style nitpicks.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20171014001255.32005-1-eric@anholt.net
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com> (v2)
---
 drivers/gpu/drm/vc4/vc4_dsi.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 554605af344e..94085f8bcd68 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -1360,6 +1360,27 @@ static void dsi_handle_error(struct vc4_dsi *dsi,
 	*ret = IRQ_HANDLED;
 }
 
+/*
+ * Initial handler for port 1 where we need the reg_dma workaround.
+ * The register DMA writes sleep, so we can't do it in the top half.
+ * Instead we use IRQF_ONESHOT so that the IRQ gets disabled in the
+ * parent interrupt contrller until our interrupt thread is done.
+ */
+static irqreturn_t vc4_dsi_irq_defer_to_thread_handler(int irq, void *data)
+{
+	struct vc4_dsi *dsi = data;
+	u32 stat = DSI_PORT_READ(INT_STAT);
+
+	if (!stat)
+		return IRQ_NONE;
+
+	return IRQ_WAKE_THREAD;
+}
+
+/*
+ * Normal IRQ handler for port 0, or the threaded IRQ handler for port
+ * 1 where we need the reg_dma workaround.
+ */
 static irqreturn_t vc4_dsi_irq_handler(int irq, void *data)
 {
 	struct vc4_dsi *dsi = data;
@@ -1539,8 +1560,15 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
 	/* Clear any existing interrupt state. */
 	DSI_PORT_WRITE(INT_STAT, DSI_PORT_READ(INT_STAT));
 
-	ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
-			       vc4_dsi_irq_handler, 0, "vc4 dsi", dsi);
+	if (dsi->reg_dma_mem)
+		ret = devm_request_threaded_irq(dev, platform_get_irq(pdev, 0),
+						vc4_dsi_irq_defer_to_thread_handler,
+						vc4_dsi_irq_handler,
+						IRQF_ONESHOT,
+						"vc4 dsi", dsi);
+	else
+		ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+				       vc4_dsi_irq_handler, 0, "vc4 dsi", dsi);
 	if (ret) {
 		if (ret != -EPROBE_DEFER)
 			dev_err(dev, "Failed to get interrupt: %d\n", ret);
-- 
cgit v1.2.3


From e073db5c5d7ad311efa8f4192a2047b006bbc5f3 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Wed, 1 Nov 2017 10:57:31 +0100
Subject: drm/vc4: Fix wrong printk format in vc4_bo_stats_debugfs()

vc4->purgeable.size and vc4->purgeable.purged_size are size_t fields
and should be printed with a %zd specifier.

Fixes: b9f19259b84d ("drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl")
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20171101095731.14878-1-boris.brezillon@free-electrons.com
(cherry picked from commit 50f365cde4ffb5ae70c3f02384bbb46698aba65c)
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_bo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/vc4')

diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 01a53ba304f8..98a6cb9f44fc 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -88,11 +88,11 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
 
 	mutex_lock(&vc4->purgeable.lock);
 	if (vc4->purgeable.num)
-		seq_printf(m, "%30s: %6dkb BOs (%d)\n", "userspace BO cache",
+		seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
 			   vc4->purgeable.size / 1024, vc4->purgeable.num);
 
 	if (vc4->purgeable.purged_num)
-		seq_printf(m, "%30s: %6dkb BOs (%d)\n", "total purged BO",
+		seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "total purged BO",
 			   vc4->purgeable.purged_size / 1024,
 			   vc4->purgeable.purged_num);
 	mutex_unlock(&vc4->purgeable.lock);
-- 
cgit v1.2.3