From 91e9f352cd1b79772b4883fd31e04655d4a97318 Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Tue, 16 Jan 2018 08:24:17 +0100 Subject: drm/vmwgfx: Avoid iterating over display unit if crtc is available In case of page flip there is no need to iterate over all display unit in the function "vmw_kms_helper_dirty". If crtc is available then dirty commands is performed on that crtc only. Signed-off-by: Deepak Rawat Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 31 +++++++++++++++++++------------ drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 17 ++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 19 ++++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 15 +++++++++++---- 4 files changed, 56 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 34ecc27fc30a..53392d64ca03 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -888,11 +888,11 @@ static int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer, if (dev_priv->active_display_unit == vmw_du_screen_object) ret = vmw_kms_sou_do_surface_dirty(dev_priv, &vfbs->base, clips, NULL, NULL, 0, 0, - num_clips, inc, NULL); + num_clips, inc, NULL, NULL); else ret = vmw_kms_stdu_surface_dirty(dev_priv, &vfbs->base, clips, NULL, NULL, 0, 0, - num_clips, inc, NULL); + num_clips, inc, NULL, NULL); vmw_fifo_flush(dev_priv, false); ttm_read_unlock(&dev_priv->reservation_sem); @@ -928,11 +928,12 @@ int vmw_kms_readback(struct vmw_private *dev_priv, switch (dev_priv->active_display_unit) { case vmw_du_screen_object: return vmw_kms_sou_readback(dev_priv, file_priv, vfb, - user_fence_rep, vclips, num_clips); + user_fence_rep, vclips, num_clips, + NULL); case vmw_du_screen_target: return vmw_kms_stdu_dma(dev_priv, file_priv, vfb, user_fence_rep, NULL, vclips, num_clips, - 1, false, true); + 1, false, true, NULL); default: WARN_ONCE(true, "Readback called with invalid display system.\n"); @@ -1090,12 +1091,12 @@ static int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer, case vmw_du_screen_target: ret = vmw_kms_stdu_dma(dev_priv, NULL, &vfbd->base, NULL, clips, NULL, num_clips, increment, - true, true); + true, true, NULL); break; case vmw_du_screen_object: ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, &vfbd->base, clips, NULL, num_clips, - increment, true, NULL); + increment, true, NULL, NULL); break; case vmw_du_legacy: ret = vmw_kms_ldu_do_dmabuf_dirty(dev_priv, &vfbd->base, 0, 0, @@ -1581,7 +1582,7 @@ static int vmw_kms_generic_present(struct vmw_private *dev_priv, { return vmw_kms_sou_do_surface_dirty(dev_priv, vfb, NULL, clips, &surface->res, destX, destY, - num_clips, 1, NULL); + num_clips, 1, NULL, NULL); } @@ -1600,7 +1601,7 @@ int vmw_kms_present(struct vmw_private *dev_priv, case vmw_du_screen_target: ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, clips, &surface->res, destX, destY, - num_clips, 1, NULL); + num_clips, 1, NULL, NULL); break; case vmw_du_screen_object: ret = vmw_kms_generic_present(dev_priv, file_priv, vfb, surface, @@ -2328,10 +2329,16 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, dirty->dev_priv = dev_priv; - list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) { - if (crtc->primary->fb != &framebuffer->base) - continue; - units[num_units++] = vmw_crtc_to_du(crtc); + /* If crtc is passed, no need to iterate over other display units */ + if (dirty->crtc) { + units[num_units++] = vmw_crtc_to_du(dirty->crtc); + } else { + list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, + head) { + if (crtc->primary->fb != &framebuffer->base) + continue; + units[num_units++] = vmw_crtc_to_du(crtc); + } } for (k = 0; k < num_units; k++) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index cd9da2dd79af..42b0f1589d3f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -50,6 +50,7 @@ * @unit: The current display unit. Set up by the helper before a call to @clip. * @cmd: The allocated fifo space. Set up by the helper before the first @clip * call. + * @crtc: The crtc for which to build dirty commands. * @num_hits: Number of clip rect commands for this display unit. * Cleared by the helper before the first @clip call. Updated by the @clip * callback. @@ -71,6 +72,7 @@ struct vmw_kms_dirty { struct vmw_private *dev_priv; struct vmw_display_unit *unit; void *cmd; + struct drm_crtc *crtc; u32 num_hits; s32 fb_x; s32 fb_y; @@ -398,20 +400,23 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, s32 dest_x, s32 dest_y, unsigned num_clips, int inc, - struct vmw_fence_obj **out_fence); + struct vmw_fence_obj **out_fence, + struct drm_crtc *crtc); int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv, struct vmw_framebuffer *framebuffer, struct drm_clip_rect *clips, struct drm_vmw_rect *vclips, unsigned num_clips, int increment, bool interruptible, - struct vmw_fence_obj **out_fence); + struct vmw_fence_obj **out_fence, + struct drm_crtc *crtc); int vmw_kms_sou_readback(struct vmw_private *dev_priv, struct drm_file *file_priv, struct vmw_framebuffer *vfb, struct drm_vmw_fence_rep __user *user_fence_rep, struct drm_vmw_rect *vclips, - uint32_t num_clips); + uint32_t num_clips, + struct drm_crtc *crtc); /* * Screen Target Display Unit functions - vmwgfx_stdu.c @@ -425,7 +430,8 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, s32 dest_x, s32 dest_y, unsigned num_clips, int inc, - struct vmw_fence_obj **out_fence); + struct vmw_fence_obj **out_fence, + struct drm_crtc *crtc); int vmw_kms_stdu_dma(struct vmw_private *dev_priv, struct drm_file *file_priv, struct vmw_framebuffer *vfb, @@ -435,7 +441,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv, uint32_t num_clips, int increment, bool to_surface, - bool interruptible); + bool interruptible, + struct drm_crtc *crtc); int vmw_kms_set_config(struct drm_mode_set *set, struct drm_modeset_acquire_ctx *ctx); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 63a4cd794b73..a17f6c70fc38 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -341,11 +341,11 @@ static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc, if (vfb->dmabuf) ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, vfb, NULL, &vclips, 1, 1, - true, &fence); + true, &fence, crtc); else ret = vmw_kms_sou_do_surface_dirty(dev_priv, vfb, NULL, &vclips, NULL, - 0, 0, 1, 1, &fence); + 0, 0, 1, 1, &fence, crtc); if (ret != 0) @@ -892,6 +892,7 @@ static void vmw_sou_surface_clip(struct vmw_kms_dirty *dirty) * @out_fence: If non-NULL, will return a ref-counted pointer to a * struct vmw_fence_obj. The returned fence pointer may be NULL in which * case the device has already synchronized. + * @crtc: If crtc is passed, perform surface dirty on that crtc only. * * Returns 0 on success, negative error code on failure. -ERESTARTSYS if * interrupted. @@ -904,7 +905,8 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, s32 dest_x, s32 dest_y, unsigned num_clips, int inc, - struct vmw_fence_obj **out_fence) + struct vmw_fence_obj **out_fence, + struct drm_crtc *crtc) { struct vmw_framebuffer_surface *vfbs = container_of(framebuffer, typeof(*vfbs), base); @@ -923,6 +925,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, sdirty.base.dev_priv = dev_priv; sdirty.base.fifo_reserve_size = sizeof(struct vmw_kms_sou_dirty_cmd) + sizeof(SVGASignedRect) * num_clips; + sdirty.base.crtc = crtc; sdirty.sid = srf->id; sdirty.left = sdirty.top = S32_MAX; @@ -994,6 +997,7 @@ static void vmw_sou_dmabuf_clip(struct vmw_kms_dirty *dirty) * @out_fence: If non-NULL, will return a ref-counted pointer to a * struct vmw_fence_obj. The returned fence pointer may be NULL in which * case the device has already synchronized. + * @crtc: If crtc is passed, perform dmabuf dirty on that crtc only. * * Returns 0 on success, negative error code on failure. -ERESTARTSYS if * interrupted. @@ -1004,7 +1008,8 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv, struct drm_vmw_rect *vclips, unsigned num_clips, int increment, bool interruptible, - struct vmw_fence_obj **out_fence) + struct vmw_fence_obj **out_fence, + struct drm_crtc *crtc) { struct vmw_dma_buffer *buf = container_of(framebuffer, struct vmw_framebuffer_dmabuf, @@ -1021,6 +1026,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv, if (unlikely(ret != 0)) goto out_revert; + dirty.crtc = crtc; dirty.fifo_commit = vmw_sou_dmabuf_fifo_commit; dirty.clip = vmw_sou_dmabuf_clip; dirty.fifo_reserve_size = sizeof(struct vmw_kms_sou_dmabuf_blit) * @@ -1092,6 +1098,7 @@ static void vmw_sou_readback_clip(struct vmw_kms_dirty *dirty) * Must be set to non-NULL if @file_priv is non-NULL. * @vclips: Array of clip rects. * @num_clips: Number of clip rects in @vclips. + * @crtc: If crtc is passed, readback on that crtc only. * * Returns 0 on success, negative error code on failure. -ERESTARTSYS if * interrupted. @@ -1101,7 +1108,8 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv, struct vmw_framebuffer *vfb, struct drm_vmw_fence_rep __user *user_fence_rep, struct drm_vmw_rect *vclips, - uint32_t num_clips) + uint32_t num_clips, + struct drm_crtc *crtc) { struct vmw_dma_buffer *buf = container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer; @@ -1116,6 +1124,7 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv, if (unlikely(ret != 0)) goto out_revert; + dirty.crtc = crtc; dirty.fifo_commit = vmw_sou_readback_fifo_commit; dirty.clip = vmw_sou_readback_clip; dirty.fifo_reserve_size = sizeof(struct vmw_kms_sou_readback_blit) * diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index b68d74888ab1..c22bf80d4ddd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -530,10 +530,10 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc, if (vfb->dmabuf) ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL, &vclips, - 1, 1, true, false); + 1, 1, true, false, crtc); else ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, &vclips, - NULL, 0, 0, 1, 1, NULL); + NULL, 0, 0, 1, 1, NULL, crtc); if (ret) { DRM_ERROR("Page flip update error %d.\n", ret); return ret; @@ -802,6 +802,7 @@ out_cleanup: * @to_surface: Whether to DMA to the screen target system as opposed to * from the screen target system. * @interruptible: Whether to perform waits interruptible if possible. + * @crtc: If crtc is passed, perform stdu dma on that crtc only. * * If DMA-ing till the screen target system, the function will also notify * the screen target system that a bounding box of the cliprects has been @@ -818,7 +819,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv, uint32_t num_clips, int increment, bool to_surface, - bool interruptible) + bool interruptible, + struct drm_crtc *crtc) { struct vmw_dma_buffer *buf = container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer; @@ -852,6 +854,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv, ddirty.base.fifo_reserve_size = 0; } + ddirty.base.crtc = crtc; + ret = vmw_kms_helper_dirty(dev_priv, vfb, clips, vclips, 0, 0, num_clips, increment, &ddirty.base); vmw_kms_helper_buffer_finish(dev_priv, file_priv, buf, NULL, @@ -963,6 +967,7 @@ static void vmw_kms_stdu_surface_fifo_commit(struct vmw_kms_dirty *dirty) * @out_fence: If non-NULL, will return a ref-counted pointer to a * struct vmw_fence_obj. The returned fence pointer may be NULL in which * case the device has already synchronized. + * @crtc: If crtc is passed, perform surface dirty on that crtc only. * * Returns 0 on success, negative error code on failure. -ERESTARTSYS if * interrupted. @@ -975,7 +980,8 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, s32 dest_x, s32 dest_y, unsigned num_clips, int inc, - struct vmw_fence_obj **out_fence) + struct vmw_fence_obj **out_fence, + struct drm_crtc *crtc) { struct vmw_framebuffer_surface *vfbs = container_of(framebuffer, typeof(*vfbs), base); @@ -1000,6 +1006,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, sdirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_surface_copy) + sizeof(SVGA3dCopyBox) * num_clips + sizeof(struct vmw_stdu_update); + sdirty.base.crtc = crtc; sdirty.sid = srf->id; sdirty.left = sdirty.top = S32_MAX; sdirty.right = sdirty.bottom = S32_MIN; -- cgit v1.2.3 From 4e2f9fa7ffb5324adfc62fa3da6e1e36827fd5ad Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Tue, 16 Jan 2018 08:25:55 +0100 Subject: drm/vmwgfx: Move surface copy cmd to atomic function When display surface is different than the framebuffer surface, atomic path do not copy the surface data. This commit moved the code to copy surface from legacy to atomic path. Signed-off-by: Deepak Rawat Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 47 +++++++++++++++++------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index c22bf80d4ddd..dfbcd41303ad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -492,8 +492,6 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc, { struct vmw_private *dev_priv = vmw_priv(crtc->dev); struct vmw_screen_target_display_unit *stdu = vmw_crtc_to_stdu(crtc); - struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(new_fb); - struct drm_vmw_rect vclips; int ret; dev_priv = vmw_priv(crtc->dev); @@ -519,26 +517,6 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc, if (stdu->base.is_implicit) vmw_kms_update_implicit_fb(dev_priv, crtc); - /* - * Now that we've bound a new surface to the screen target, - * update the contents. - */ - vclips.x = crtc->x; - vclips.y = crtc->y; - vclips.w = crtc->mode.hdisplay; - vclips.h = crtc->mode.vdisplay; - - if (vfb->dmabuf) - ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL, &vclips, - 1, 1, true, false, crtc); - else - ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, &vclips, - NULL, 0, 0, 1, 1, NULL, crtc); - if (ret) { - DRM_ERROR("Page flip update error %d.\n", ret); - return ret; - } - if (event) { struct vmw_fence_obj *fence = NULL; struct drm_file *file_priv = event->base.file_priv; @@ -1333,6 +1311,7 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, struct vmw_screen_target_display_unit *stdu; struct vmw_plane_state *vps = vmw_plane_state_to_vps(plane->state); struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; + struct vmw_framebuffer *vfb = NULL; int ret; stdu = vmw_crtc_to_stdu(crtc); @@ -1346,9 +1325,10 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, if (!stdu->defined) return; - if (plane->state->fb) + if (plane->state->fb) { + vfb = vmw_framebuffer_to_vfb(plane->state->fb); ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res); - else + } else ret = vmw_stdu_bind_st(dev_priv, stdu, NULL); /* @@ -1360,7 +1340,24 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, else crtc->primary->fb = plane->state->fb; - ret = vmw_stdu_update_st(dev_priv, stdu); + if (vfb) { + struct drm_vmw_rect vclips; + + vclips.x = crtc->x; + vclips.y = crtc->y; + vclips.w = crtc->mode.hdisplay; + vclips.h = crtc->mode.vdisplay; + + if (vfb->dmabuf) + ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL, + &vclips, 1, 1, true, false, + crtc); + else + ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, + &vclips, NULL, 0, 0, + 1, 1, NULL, crtc); + } else + ret = vmw_stdu_update_st(dev_priv, stdu); if (ret) DRM_ERROR("Failed to update STDU.\n"); -- cgit v1.2.3 From 3cbe87fcf026e4cdae6719511267ef020256bf5c Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Tue, 16 Jan 2018 08:27:17 +0100 Subject: drm/vmwgfx: Remove drm_crtc_arm_vblank_event from atomic flush The function drm_crtc_arm_vblank_event should be used for the driver which have vblank interrupt support. In case of vmwgfx we do not have vblank interrupt. Signed-off-by: Deepak Rawat Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 53392d64ca03..8e3a44dee633 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -566,13 +566,9 @@ void vmw_du_crtc_atomic_flush(struct drm_crtc *crtc, crtc->state->event = NULL; spin_lock_irq(&crtc->dev->event_lock); - if (drm_crtc_vblank_get(crtc) == 0) - drm_crtc_arm_vblank_event(crtc, event); - else - drm_crtc_send_vblank_event(crtc, event); + drm_crtc_send_vblank_event(crtc, event); spin_unlock_irq(&crtc->dev->event_lock); } - } -- cgit v1.2.3 From aa64b3f18aeb2cc4b74e69115df434147f1ed96c Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Tue, 16 Jan 2018 08:31:04 +0100 Subject: drm/vmwgfx: Move screen object page flip to atomic function The dmabuf_dirty/surface_dirty in case of screen object is moved to plane atomic update, so that page flip in atomic ioctl also works. vmwgfx does not support DRM_MODE_PAGE_FLIP_ASYNC, so this flag is never expected. Signed-off-by: Deepak Rawat Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 114 ++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index a17f6c70fc38..9f6deced4244 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -316,69 +316,21 @@ static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { struct vmw_private *dev_priv = vmw_priv(crtc->dev); - struct drm_framebuffer *old_fb = crtc->primary->fb; - struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(new_fb); - struct vmw_fence_obj *fence = NULL; - struct drm_vmw_rect vclips; int ret; if (!vmw_kms_crtc_flippable(dev_priv, crtc)) return -EINVAL; - flags &= ~DRM_MODE_PAGE_FLIP_ASYNC; - ret = drm_atomic_helper_page_flip(crtc, new_fb, NULL, flags, ctx); + ret = drm_atomic_helper_page_flip(crtc, new_fb, event, flags, ctx); if (ret) { DRM_ERROR("Page flip error %d.\n", ret); return ret; } - /* do a full screen dirty update */ - vclips.x = crtc->x; - vclips.y = crtc->y; - vclips.w = crtc->mode.hdisplay; - vclips.h = crtc->mode.vdisplay; - - if (vfb->dmabuf) - ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, vfb, - NULL, &vclips, 1, 1, - true, &fence, crtc); - else - ret = vmw_kms_sou_do_surface_dirty(dev_priv, vfb, - NULL, &vclips, NULL, - 0, 0, 1, 1, &fence, crtc); - - - if (ret != 0) - goto out_no_fence; - if (!fence) { - ret = -EINVAL; - goto out_no_fence; - } - - if (event) { - struct drm_file *file_priv = event->base.file_priv; - - ret = vmw_event_fence_action_queue(file_priv, fence, - &event->base, - &event->event.vbl.tv_sec, - &event->event.vbl.tv_usec, - true); - } - - /* - * No need to hold on to this now. The only cleanup - * we need to do if we fail is unref the fence. - */ - vmw_fence_obj_unreference(&fence); - if (vmw_crtc_to_du(crtc)->is_implicit) vmw_kms_update_implicit_fb(dev_priv, crtc); return ret; - -out_no_fence: - drm_atomic_set_fb_for_plane(crtc->primary->state, old_fb); - return ret; } static const struct drm_crtc_funcs vmw_screen_object_crtc_funcs = { @@ -530,9 +482,71 @@ vmw_sou_primary_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { struct drm_crtc *crtc = plane->state->crtc; + struct drm_pending_vblank_event *event = NULL; + struct vmw_fence_obj *fence = NULL; + int ret; + + if (crtc && plane->state->fb) { + struct vmw_private *dev_priv = vmw_priv(crtc->dev); + struct vmw_framebuffer *vfb = + vmw_framebuffer_to_vfb(plane->state->fb); + struct drm_vmw_rect vclips; + + vclips.x = crtc->x; + vclips.y = crtc->y; + vclips.w = crtc->mode.hdisplay; + vclips.h = crtc->mode.vdisplay; + + if (vfb->dmabuf) + ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, vfb, NULL, + &vclips, 1, 1, true, + &fence, crtc); + else + ret = vmw_kms_sou_do_surface_dirty(dev_priv, vfb, NULL, + &vclips, NULL, 0, 0, + 1, 1, &fence, crtc); + + /* + * We cannot really fail this function, so if we do, then output + * an error and maintain consistent atomic state. + */ + if (ret != 0) + DRM_ERROR("Failed to update screen.\n"); - if (crtc) crtc->primary->fb = plane->state->fb; + } else { + /* + * When disabling a plane, CRTC and FB should always be NULL + * together, otherwise it's an error. + * Here primary plane is being disable so should really blank + * the screen object display unit, if not already done. + */ + return; + } + + event = crtc->state->event; + /* + * In case of failure and other cases, vblank event will be sent in + * vmw_du_crtc_atomic_flush. + */ + if (event && fence) { + struct drm_file *file_priv = event->base.file_priv; + + ret = vmw_event_fence_action_queue(file_priv, + fence, + &event->base, + &event->event.vbl.tv_sec, + &event->event.vbl.tv_usec, + true); + + if (unlikely(ret != 0)) + DRM_ERROR("Failed to queue event on fence.\n"); + else + crtc->state->event = NULL; + } + + if (fence) + vmw_fence_obj_unreference(&fence); } -- cgit v1.2.3 From ac3069e67f5659131d7ac5f54d966005bbc40af8 Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Tue, 16 Jan 2018 08:44:42 +0100 Subject: drm/vmwgfx: Move the stdu vblank event to atomic function Atomic ioctl can also send the same page flip flags as legacy ioctl. In those cases also need to send the vblank event to userspace. vmwgfx does not support flag DRM_MODE_PAGE_FLIP_ASYNC, so this flag is never expected. Signed-off-by: Deepak Rawat Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 141 +++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index dfbcd41303ad..6de28746f6f7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -494,47 +494,15 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc, struct vmw_screen_target_display_unit *stdu = vmw_crtc_to_stdu(crtc); int ret; - dev_priv = vmw_priv(crtc->dev); - stdu = vmw_crtc_to_stdu(crtc); - if (!stdu->defined || !vmw_kms_crtc_flippable(dev_priv, crtc)) return -EINVAL; - /* - * We're always async, but the helper doesn't know how to set async - * so lie to the helper. Also, the helper expects someone - * to pick the event up from the crtc state, and if nobody does, - * it will free it. Since we handle the event in this function, - * don't hand it to the helper. - */ - flags &= ~DRM_MODE_PAGE_FLIP_ASYNC; - ret = drm_atomic_helper_page_flip(crtc, new_fb, NULL, flags, ctx); + ret = drm_atomic_helper_page_flip(crtc, new_fb, event, flags, ctx); if (ret) { DRM_ERROR("Page flip error %d.\n", ret); return ret; } - if (stdu->base.is_implicit) - vmw_kms_update_implicit_fb(dev_priv, crtc); - - if (event) { - struct vmw_fence_obj *fence = NULL; - struct drm_file *file_priv = event->base.file_priv; - - vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL); - if (!fence) - return -ENOMEM; - - ret = vmw_event_fence_action_queue(file_priv, fence, - &event->base, - &event->event.vbl.tv_sec, - &event->event.vbl.tv_usec, - true); - vmw_fence_obj_unreference(&fence); - } else { - (void) vmw_fifo_flush(dev_priv, false); - } - return 0; } @@ -1307,47 +1275,38 @@ static void vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct vmw_private *dev_priv; - struct vmw_screen_target_display_unit *stdu; struct vmw_plane_state *vps = vmw_plane_state_to_vps(plane->state); - struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; - struct vmw_framebuffer *vfb = NULL; + struct drm_crtc *crtc = plane->state->crtc; + struct vmw_screen_target_display_unit *stdu; + struct drm_pending_vblank_event *event; + struct vmw_private *dev_priv; int ret; - stdu = vmw_crtc_to_stdu(crtc); - dev_priv = vmw_priv(crtc->dev); - - stdu->display_srf = vps->surf; - stdu->content_fb_type = vps->content_fb_type; - stdu->cpp = vps->cpp; - memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map)); - - if (!stdu->defined) - return; - - if (plane->state->fb) { - vfb = vmw_framebuffer_to_vfb(plane->state->fb); - ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res); - } else - ret = vmw_stdu_bind_st(dev_priv, stdu, NULL); - /* * We cannot really fail this function, so if we do, then output an - * error and quit + * error and maintain consistent atomic state. */ - if (ret) - DRM_ERROR("Failed to bind surface to STDU.\n"); - else - crtc->primary->fb = plane->state->fb; - - if (vfb) { + if (crtc && plane->state->fb) { + struct vmw_framebuffer *vfb = + vmw_framebuffer_to_vfb(plane->state->fb); struct drm_vmw_rect vclips; + stdu = vmw_crtc_to_stdu(crtc); + dev_priv = vmw_priv(crtc->dev); + + stdu->display_srf = vps->surf; + stdu->content_fb_type = vps->content_fb_type; + stdu->cpp = vps->cpp; + memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map)); vclips.x = crtc->x; vclips.y = crtc->y; vclips.w = crtc->mode.hdisplay; vclips.h = crtc->mode.vdisplay; + ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res); + if (ret) + DRM_ERROR("Failed to bind surface to STDU.\n"); + if (vfb->dmabuf) ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL, &vclips, 1, 1, true, false, @@ -1356,11 +1315,65 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, &vclips, NULL, 0, 0, 1, 1, NULL, crtc); - } else + if (ret) + DRM_ERROR("Failed to update STDU.\n"); + + crtc->primary->fb = plane->state->fb; + } else { + crtc = old_state->crtc; + stdu = vmw_crtc_to_stdu(crtc); + dev_priv = vmw_priv(crtc->dev); + + /* + * When disabling a plane, CRTC and FB should always be NULL + * together, otherwise it's an error. + * Here primary plane is being disable so blank the screen + * target display unit, if not already done. + */ + if (!stdu->defined) + return; + + ret = vmw_stdu_bind_st(dev_priv, stdu, NULL); + if (ret) + DRM_ERROR("Failed to blank STDU\n"); + ret = vmw_stdu_update_st(dev_priv, stdu); + if (ret) + DRM_ERROR("Failed to update STDU.\n"); - if (ret) - DRM_ERROR("Failed to update STDU.\n"); + return; + } + + event = crtc->state->event; + /* + * In case of failure and other cases, vblank event will be sent in + * vmw_du_crtc_atomic_flush. + */ + if (event && (ret == 0)) { + struct vmw_fence_obj *fence = NULL; + struct drm_file *file_priv = event->base.file_priv; + + vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL); + + /* + * If fence is NULL, then already sync. + */ + if (fence) { + ret = vmw_event_fence_action_queue( + file_priv, fence, &event->base, + &event->event.vbl.tv_sec, + &event->event.vbl.tv_usec, + true); + if (ret) + DRM_ERROR("Failed to queue event on fence.\n"); + else + crtc->state->event = NULL; + + vmw_fence_obj_unreference(&fence); + } + } else { + (void) vmw_fifo_flush(dev_priv, false); + } } -- cgit v1.2.3 From 904efd9e3f4c8f288b1279a316eed8e177190c8f Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Tue, 16 Jan 2018 08:48:09 +0100 Subject: drm/vmwgfx: Send the correct nonblock option for atomic_commit Page flip can be slow for vmwgfx in some cases, like need to do surface copy to different surface or waiting for IN_FENCE_FD. Enabling nonblocking commits for vmwgfx in case userspace request it. Signed-off-by: Deepak Rawat Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 8e3a44dee633..f26a2aca3611 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1536,35 +1536,10 @@ vmw_kms_atomic_check_modeset(struct drm_device *dev, return drm_atomic_helper_check(dev, state); } - -/** - * vmw_kms_atomic_commit - Perform an atomic state commit - * - * @dev: DRM device - * @state: the driver state object - * @nonblock: Whether nonblocking behaviour is requested - * - * This is a simple wrapper around drm_atomic_helper_commit() for - * us to clear the nonblocking value. - * - * Nonblocking commits currently cause synchronization issues - * for vmwgfx. - * - * RETURNS - * Zero for success or negative error code on failure. - */ -int vmw_kms_atomic_commit(struct drm_device *dev, - struct drm_atomic_state *state, - bool nonblock) -{ - return drm_atomic_helper_commit(dev, state, false); -} - - static const struct drm_mode_config_funcs vmw_kms_funcs = { .fb_create = vmw_kms_fb_create, .atomic_check = vmw_kms_atomic_check_modeset, - .atomic_commit = vmw_kms_atomic_commit, + .atomic_commit = drm_atomic_helper_commit, }; static int vmw_kms_generic_present(struct vmw_private *dev_priv, -- cgit v1.2.3 From 25db875401c8aaac31a6650cb80a56cc78852694 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 16 Jan 2018 08:54:30 +0100 Subject: drm/vmwgfx: Cursor update fixes Use drm_plane_helper_check_update also for the cursor plane. Some applications, like gdm on gnome shell still uses cursor front-buffer like rendering without notifying the kernel. We do need some kind of noficiation, but work around this for now by updating the cursor image on every cursor move. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index f26a2aca3611..0f9c9cd78d83 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -393,13 +393,13 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, du->cursor_surface = vps->surf; du->cursor_dmabuf = vps->dmabuf; - /* setup new image */ if (vps->surf) { du->cursor_age = du->cursor_surface->snooper.age; ret = vmw_cursor_update_image(dev_priv, vps->surf->snooper.image, - 64, 64, hotspot_x, hotspot_y); + 64, 64, hotspot_x, + hotspot_y); } else if (vps->dmabuf) { ret = vmw_cursor_update_dmabuf(dev_priv, vps->dmabuf, plane->state->crtc_w, @@ -497,11 +497,22 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, struct vmw_surface *surface = NULL; struct drm_framebuffer *fb = new_state->fb; + struct drm_rect src = drm_plane_state_src(new_state); + struct drm_rect dest = drm_plane_state_dest(new_state); /* Turning off */ if (!fb) return ret; + ret = drm_plane_helper_check_update(plane, new_state->crtc, fb, + &src, &dest, + DRM_MODE_ROTATE_0, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true, &new_state->visible); + if (!ret) + return ret; + /* A lot of the code assumes this */ if (new_state->crtc_w != 64 || new_state->crtc_h != 64) { DRM_ERROR("Invalid cursor dimensions (%d, %d)\n", -- cgit v1.2.3 From 403c1826a456441ee983acefbd03ce7d73d1ff00 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 16 Jan 2018 09:02:03 +0100 Subject: drm/ttm: Clean up kmap_atomic_prot selection code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use helpers to perform the kmap_atomic_prot() functionality to a) Avoid in-function ifdefs that violate the kernel coding policy, b) Facilitate exporting the functionality. This commit should not change any functionality. Reviewed-by: Christian König Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/ttm/ttm_bo_util.c | 64 +++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1f730b3f18e5..11f27db58abe 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -255,6 +255,33 @@ static int ttm_copy_io_page(void *dst, void *src, unsigned long page) return 0; } +#ifdef CONFIG_X86 +#define __ttm_kmap_atomic_prot(__page, __prot) kmap_atomic_prot(__page, __prot) +#define __ttm_kunmap_atomic(__addr) kunmap_atomic(__addr) +#else +#define __ttm_kmap_atomic_prot(__page, __prot) vmap(&__page, 1, 0, __prot) +#define __ttm_kunmap_atomic(__addr) vunmap(__addr) +#endif + +static void *ttm_kmap_atomic_prot(struct page *page, + pgprot_t prot) +{ + if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) + return kmap_atomic(page); + else + return __ttm_kmap_atomic_prot(page, prot); +} + + +static void ttm_kunmap_atomic_prot(void *addr, + pgprot_t prot) +{ + if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) + kunmap_atomic(addr); + else + __ttm_kunmap_atomic(addr); +} + static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, unsigned long page, pgprot_t prot) @@ -266,28 +293,13 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, return -ENOMEM; src = (void *)((unsigned long)src + (page << PAGE_SHIFT)); - -#ifdef CONFIG_X86 - dst = kmap_atomic_prot(d, prot); -#else - if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) - dst = vmap(&d, 1, 0, prot); - else - dst = kmap(d); -#endif + dst = ttm_kmap_atomic_prot(d, prot); if (!dst) return -ENOMEM; memcpy_fromio(dst, src, PAGE_SIZE); -#ifdef CONFIG_X86 - kunmap_atomic(dst); -#else - if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) - vunmap(dst); - else - kunmap(d); -#endif + ttm_kunmap_atomic_prot(dst, prot); return 0; } @@ -303,27 +315,13 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, return -ENOMEM; dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT)); -#ifdef CONFIG_X86 - src = kmap_atomic_prot(s, prot); -#else - if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) - src = vmap(&s, 1, 0, prot); - else - src = kmap(s); -#endif + src = ttm_kmap_atomic_prot(s, prot); if (!src) return -ENOMEM; memcpy_toio(dst, src, PAGE_SIZE); -#ifdef CONFIG_X86 - kunmap_atomic(src); -#else - if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) - vunmap(src); - else - kunmap(s); -#endif + ttm_kunmap_atomic_prot(src, prot); return 0; } -- cgit v1.2.3 From 9c11fcf1a74d338774faa059b0aea24264c95658 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 16 Jan 2018 09:12:05 +0100 Subject: drm/ttm: Export the ttm_k[un]map_atomic_prot API. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be used by vmwgfx cpu blit. Reviewed-by: Christian König Reviewed-by: Brian Paul Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/ttm/ttm_bo_util.c | 31 ++++++++++++++++++++++++++----- include/drm/ttm/ttm_bo_api.h | 4 ++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 11f27db58abe..2ebbae6067ab 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -263,24 +263,45 @@ static int ttm_copy_io_page(void *dst, void *src, unsigned long page) #define __ttm_kunmap_atomic(__addr) vunmap(__addr) #endif -static void *ttm_kmap_atomic_prot(struct page *page, - pgprot_t prot) + +/** + * ttm_kmap_atomic_prot - Efficient kernel map of a single page with + * specified page protection. + * + * @page: The page to map. + * @prot: The page protection. + * + * This function maps a TTM page using the kmap_atomic api if available, + * otherwise falls back to vmap. The user must make sure that the + * specified page does not have an aliased mapping with a different caching + * policy unless the architecture explicitly allows it. Also mapping and + * unmapping using this api must be correctly nested. Unmapping should + * occur in the reverse order of mapping. + */ +void *ttm_kmap_atomic_prot(struct page *page, pgprot_t prot) { if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) return kmap_atomic(page); else return __ttm_kmap_atomic_prot(page, prot); } +EXPORT_SYMBOL(ttm_kmap_atomic_prot); - -static void ttm_kunmap_atomic_prot(void *addr, - pgprot_t prot) +/** + * ttm_kunmap_atomic_prot - Unmap a page that was mapped using + * ttm_kmap_atomic_prot. + * + * @addr: The virtual address from the map. + * @prot: The page protection. + */ +void ttm_kunmap_atomic_prot(void *addr, pgprot_t prot) { if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) kunmap_atomic(addr); else __ttm_kunmap_atomic(addr); } +EXPORT_SYMBOL(ttm_kunmap_atomic_prot); static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, unsigned long page, diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 8e2fb1ac4e0c..c67977aa1a0e 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -709,6 +709,10 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo); int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, struct ttm_bo_device *bdev); +void *ttm_kmap_atomic_prot(struct page *page, pgprot_t prot); + +void ttm_kunmap_atomic_prot(void *addr, pgprot_t prot); + /** * ttm_bo_io * -- cgit v1.2.3 From 79273e1b7eb0e2007c5a9cae71af31075df8e6c5 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 16 Jan 2018 09:33:27 +0100 Subject: drm/vmwgfx: Add a cpu blit utility that can be used for page-backed bos The utility uses kmap_atomic() instead of vmapping the whole buffer object. As a result there will be more book-keeping but on some architectures this will help avoid exhausting vmalloc space and also avoid expensive TLB flushes. The blit utility also adds a provision to compute a bounding box of changed content, which is very useful to optimize presentation speed of ill-behaved applications that don't supply proper damage regions, and for page-flips. The cost of computing the bounding box is not that expensive when done in a cpu-blit utility like this. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul --- drivers/gpu/drm/vmwgfx/Makefile | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_blit.c | 506 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 48 ++++ 3 files changed, 555 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_blit.c diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index ad80211e1098..794cc9d5c9b0 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -7,6 +7,6 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_surface.o vmwgfx_prime.o vmwgfx_mob.o vmwgfx_shader.o \ vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \ vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \ - vmwgfx_simple_resource.o vmwgfx_va.o + vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c new file mode 100644 index 000000000000..e8c94b19db7b --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c @@ -0,0 +1,506 @@ +/************************************************************************** + * + * Copyright © 2017 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "vmwgfx_drv.h" + +/* + * Template that implements find_first_diff() for a generic + * unsigned integer type. @size and return value are in bytes. + */ +#define VMW_FIND_FIRST_DIFF(_type) \ +static size_t vmw_find_first_diff_ ## _type \ + (const _type * dst, const _type * src, size_t size)\ +{ \ + size_t i; \ + \ + for (i = 0; i < size; i += sizeof(_type)) { \ + if (*dst++ != *src++) \ + break; \ + } \ + \ + return i; \ +} + + +/* + * Template that implements find_last_diff() for a generic + * unsigned integer type. Pointers point to the item following the + * *end* of the area to be examined. @size and return value are in + * bytes. + */ +#define VMW_FIND_LAST_DIFF(_type) \ +static ssize_t vmw_find_last_diff_ ## _type( \ + const _type * dst, const _type * src, size_t size) \ +{ \ + while (size) { \ + if (*--dst != *--src) \ + break; \ + \ + size -= sizeof(_type); \ + } \ + return size; \ +} + + +/* + * Instantiate find diff functions for relevant unsigned integer sizes, + * assuming that wider integers are faster (including aligning) up to the + * architecture native width, which is assumed to be 32 bit unless + * CONFIG_64BIT is defined. + */ +VMW_FIND_FIRST_DIFF(u8); +VMW_FIND_LAST_DIFF(u8); + +VMW_FIND_FIRST_DIFF(u16); +VMW_FIND_LAST_DIFF(u16); + +VMW_FIND_FIRST_DIFF(u32); +VMW_FIND_LAST_DIFF(u32); + +#ifdef CONFIG_64BIT +VMW_FIND_FIRST_DIFF(u64); +VMW_FIND_LAST_DIFF(u64); +#endif + + +/* We use size aligned copies. This computes (addr - align(addr)) */ +#define SPILL(_var, _type) ((unsigned long) _var & (sizeof(_type) - 1)) + + +/* + * Template to compute find_first_diff() for a certain integer type + * including a head copy for alignment, and adjustment of parameters + * for tail find or increased resolution find using an unsigned integer find + * of smaller width. If finding is complete, and resolution is sufficient, + * the macro executes a return statement. Otherwise it falls through. + */ +#define VMW_TRY_FIND_FIRST_DIFF(_type) \ +do { \ + unsigned int spill = SPILL(dst, _type); \ + size_t diff_offs; \ + \ + if (spill && spill == SPILL(src, _type) && \ + sizeof(_type) - spill <= size) { \ + spill = sizeof(_type) - spill; \ + diff_offs = vmw_find_first_diff_u8(dst, src, spill); \ + if (diff_offs < spill) \ + return round_down(offset + diff_offs, granularity); \ + \ + dst += spill; \ + src += spill; \ + size -= spill; \ + offset += spill; \ + spill = 0; \ + } \ + if (!spill && !SPILL(src, _type)) { \ + size_t to_copy = size & ~(sizeof(_type) - 1); \ + \ + diff_offs = vmw_find_first_diff_ ## _type \ + ((_type *) dst, (_type *) src, to_copy); \ + if (diff_offs >= size || granularity == sizeof(_type)) \ + return (offset + diff_offs); \ + \ + dst += diff_offs; \ + src += diff_offs; \ + size -= diff_offs; \ + offset += diff_offs; \ + } \ +} while (0) \ + + +/** + * vmw_find_first_diff - find the first difference between dst and src + * + * @dst: The destination address + * @src: The source address + * @size: Number of bytes to compare + * @granularity: The granularity needed for the return value in bytes. + * return: The offset from find start where the first difference was + * encountered in bytes. If no difference was found, the function returns + * a value >= @size. + */ +static size_t vmw_find_first_diff(const u8 *dst, const u8 *src, size_t size, + size_t granularity) +{ + size_t offset = 0; + + /* + * Try finding with large integers if alignment allows, or we can + * fix it. Fall through if we need better resolution or alignment + * was bad. + */ +#ifdef CONFIG_64BIT + VMW_TRY_FIND_FIRST_DIFF(u64); +#endif + VMW_TRY_FIND_FIRST_DIFF(u32); + VMW_TRY_FIND_FIRST_DIFF(u16); + + return round_down(offset + vmw_find_first_diff_u8(dst, src, size), + granularity); +} + + +/* + * Template to compute find_last_diff() for a certain integer type + * including a tail copy for alignment, and adjustment of parameters + * for head find or increased resolution find using an unsigned integer find + * of smaller width. If finding is complete, and resolution is sufficient, + * the macro executes a return statement. Otherwise it falls through. + */ +#define VMW_TRY_FIND_LAST_DIFF(_type) \ +do { \ + unsigned int spill = SPILL(dst, _type); \ + ssize_t location; \ + ssize_t diff_offs; \ + \ + if (spill && spill <= size && spill == SPILL(src, _type)) { \ + diff_offs = vmw_find_last_diff_u8(dst, src, spill); \ + if (diff_offs) { \ + location = size - spill + diff_offs - 1; \ + return round_down(location, granularity); \ + } \ + \ + dst -= spill; \ + src -= spill; \ + size -= spill; \ + spill = 0; \ + } \ + if (!spill && !SPILL(src, _type)) { \ + size_t to_copy = round_down(size, sizeof(_type)); \ + \ + diff_offs = vmw_find_last_diff_ ## _type \ + ((_type *) dst, (_type *) src, to_copy); \ + location = size - to_copy + diff_offs - sizeof(_type); \ + if (location < 0 || granularity == sizeof(_type)) \ + return location; \ + \ + dst -= to_copy - diff_offs; \ + src -= to_copy - diff_offs; \ + size -= to_copy - diff_offs; \ + } \ +} while (0) + + +/** + * vmw_find_last_diff - find the last difference between dst and src + * + * @dst: The destination address + * @src: The source address + * @size: Number of bytes to compare + * @granularity: The granularity needed for the return value in bytes. + * return: The offset from find start where the last difference was + * encountered in bytes, or a negative value if no difference was found. + */ +static ssize_t vmw_find_last_diff(const u8 *dst, const u8 *src, size_t size, + size_t granularity) +{ + dst += size; + src += size; + +#ifdef CONFIG_64BIT + VMW_TRY_FIND_LAST_DIFF(u64); +#endif + VMW_TRY_FIND_LAST_DIFF(u32); + VMW_TRY_FIND_LAST_DIFF(u16); + + return round_down(vmw_find_last_diff_u8(dst, src, size) - 1, + granularity); +} + + +/** + * vmw_memcpy - A wrapper around kernel memcpy with allowing to plug it into a + * struct vmw_diff_cpy. + * + * @diff: The struct vmw_diff_cpy closure argument (unused). + * @dest: The copy destination. + * @src: The copy source. + * @n: Number of bytes to copy. + */ +void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n) +{ + memcpy(dest, src, n); +} + + +/** + * vmw_adjust_rect - Adjust rectangle coordinates for newly found difference + * + * @diff: The struct vmw_diff_cpy used to track the modified bounding box. + * @diff_offs: The offset from @diff->line_offset where the difference was + * found. + */ +static void vmw_adjust_rect(struct vmw_diff_cpy *diff, size_t diff_offs) +{ + size_t offs = (diff_offs + diff->line_offset) / diff->cpp; + struct drm_rect *rect = &diff->rect; + + rect->x1 = min_t(int, rect->x1, offs); + rect->x2 = max_t(int, rect->x2, offs + 1); + rect->y1 = min_t(int, rect->y1, diff->line); + rect->y2 = max_t(int, rect->y2, diff->line + 1); +} + +/** + * vmw_diff_memcpy - memcpy that creates a bounding box of modified content. + * + * @diff: The struct vmw_diff_cpy used to track the modified bounding box. + * @dest: The copy destination. + * @src: The copy source. + * @n: Number of bytes to copy. + * + * In order to correctly track the modified content, the field @diff->line must + * be pre-loaded with the current line number, the field @diff->line_offset must + * be pre-loaded with the line offset in bytes where the copy starts, and + * finally the field @diff->cpp need to be preloaded with the number of bytes + * per unit in the horizontal direction of the area we're examining. + * Typically bytes per pixel. + * This is needed to know the needed granularity of the difference computing + * operations. A higher cpp generally leads to faster execution at the cost of + * bounding box width precision. + */ +void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, + size_t n) +{ + ssize_t csize, byte_len; + + if (WARN_ON_ONCE(round_down(n, diff->cpp) != n)) + return; + + /* TODO: Possibly use a single vmw_find_first_diff per line? */ + csize = vmw_find_first_diff(dest, src, n, diff->cpp); + if (csize < n) { + vmw_adjust_rect(diff, csize); + byte_len = diff->cpp; + + /* + * Starting from where first difference was found, find + * location of last difference, and then copy. + */ + diff->line_offset += csize; + dest += csize; + src += csize; + n -= csize; + csize = vmw_find_last_diff(dest, src, n, diff->cpp); + if (csize >= 0) { + byte_len += csize; + vmw_adjust_rect(diff, csize); + } + memcpy(dest, src, byte_len); + } + diff->line_offset += n; +} + +/** + * struct vmw_bo_blit_line_data - Convenience argument to vmw_bo_cpu_blit_line + * + * @mapped_dst: Already mapped destination page index in @dst_pages. + * @dst_addr: Kernel virtual address of mapped destination page. + * @dst_pages: Array of destination bo pages. + * @dst_num_pages: Number of destination bo pages. + * @dst_prot: Destination bo page protection. + * @mapped_src: Already mapped source page index in @dst_pages. + * @src_addr: Kernel virtual address of mapped source page. + * @src_pages: Array of source bo pages. + * @src_num_pages: Number of source bo pages. + * @src_prot: Source bo page protection. + * @diff: Struct vmw_diff_cpy, in the end forwarded to the memcpy routine. + */ +struct vmw_bo_blit_line_data { + u32 mapped_dst; + u8 *dst_addr; + struct page **dst_pages; + u32 dst_num_pages; + pgprot_t dst_prot; + u32 mapped_src; + u8 *src_addr; + struct page **src_pages; + u32 src_num_pages; + pgprot_t src_prot; + struct vmw_diff_cpy *diff; +}; + +/** + * vmw_bo_cpu_blit_line - Blit part of a line from one bo to another. + * + * @d: Blit data as described above. + * @dst_offset: Destination copy start offset from start of bo. + * @src_offset: Source copy start offset from start of bo. + * @bytes_to_copy: Number of bytes to copy in this line. + */ +static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d, + u32 dst_offset, + u32 src_offset, + u32 bytes_to_copy) +{ + struct vmw_diff_cpy *diff = d->diff; + + while (bytes_to_copy) { + u32 copy_size = bytes_to_copy; + u32 dst_page = dst_offset >> PAGE_SHIFT; + u32 src_page = src_offset >> PAGE_SHIFT; + u32 dst_page_offset = dst_offset & ~PAGE_MASK; + u32 src_page_offset = src_offset & ~PAGE_MASK; + bool unmap_dst = d->dst_addr && dst_page != d->mapped_dst; + bool unmap_src = d->src_addr && (src_page != d->mapped_src || + unmap_dst); + + copy_size = min_t(u32, copy_size, PAGE_SIZE - dst_page_offset); + copy_size = min_t(u32, copy_size, PAGE_SIZE - src_page_offset); + + if (unmap_src) { + ttm_kunmap_atomic_prot(d->src_addr, d->src_prot); + d->src_addr = NULL; + } + + if (unmap_dst) { + ttm_kunmap_atomic_prot(d->dst_addr, d->dst_prot); + d->dst_addr = NULL; + } + + if (!d->dst_addr) { + if (WARN_ON_ONCE(dst_page >= d->dst_num_pages)) + return -EINVAL; + + d->dst_addr = + ttm_kmap_atomic_prot(d->dst_pages[dst_page], + d->dst_prot); + if (!d->dst_addr) + return -ENOMEM; + + d->mapped_dst = dst_page; + } + + if (!d->src_addr) { + if (WARN_ON_ONCE(src_page >= d->src_num_pages)) + return -EINVAL; + + d->src_addr = + ttm_kmap_atomic_prot(d->src_pages[src_page], + d->src_prot); + if (!d->src_addr) + return -ENOMEM; + + d->mapped_src = src_page; + } + diff->do_cpy(diff, d->dst_addr + dst_page_offset, + d->src_addr + src_page_offset, copy_size); + + bytes_to_copy -= copy_size; + dst_offset += copy_size; + src_offset += copy_size; + } + + return 0; +} + +/** + * ttm_bo_cpu_blit - in-kernel cpu blit. + * + * @dst: Destination buffer object. + * @dst_offset: Destination offset of blit start in bytes. + * @dst_stride: Destination stride in bytes. + * @src: Source buffer object. + * @src_offset: Source offset of blit start in bytes. + * @src_stride: Source stride in bytes. + * @w: Width of blit. + * @h: Height of blit. + * return: Zero on success. Negative error value on failure. Will print out + * kernel warnings on caller bugs. + * + * Performs a CPU blit from one buffer object to another avoiding a full + * bo vmap which may exhaust- or fragment vmalloc space. + * On supported architectures (x86), we're using kmap_atomic which avoids + * cross-processor TLB- and cache flushes and may, on non-HIGHMEM systems + * reference already set-up mappings. + * + * Neither of the buffer objects may be placed in PCI memory + * (Fixed memory in TTM terminology) when using this function. + */ +int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, + u32 dst_offset, u32 dst_stride, + struct ttm_buffer_object *src, + u32 src_offset, u32 src_stride, + u32 w, u32 h, + struct vmw_diff_cpy *diff) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false + }; + u32 j, initial_line = dst_offset / dst_stride; + struct vmw_bo_blit_line_data d; + int ret = 0; + + /* Buffer objects need to be either pinned or reserved: */ + if (!(dst->mem.placement & TTM_PL_FLAG_NO_EVICT)) + lockdep_assert_held(&dst->resv->lock.base); + if (!(src->mem.placement & TTM_PL_FLAG_NO_EVICT)) + lockdep_assert_held(&src->resv->lock.base); + + if (dst->ttm->state == tt_unpopulated) { + ret = dst->ttm->bdev->driver->ttm_tt_populate(dst->ttm, &ctx); + if (ret) + return ret; + } + + if (src->ttm->state == tt_unpopulated) { + ret = src->ttm->bdev->driver->ttm_tt_populate(src->ttm, &ctx); + if (ret) + return ret; + } + + d.mapped_dst = 0; + d.mapped_src = 0; + d.dst_addr = NULL; + d.src_addr = NULL; + d.dst_pages = dst->ttm->pages; + d.src_pages = src->ttm->pages; + d.dst_num_pages = dst->num_pages; + d.src_num_pages = src->num_pages; + d.dst_prot = ttm_io_prot(dst->mem.placement, PAGE_KERNEL); + d.src_prot = ttm_io_prot(src->mem.placement, PAGE_KERNEL); + d.diff = diff; + + for (j = 0; j < h; ++j) { + diff->line = j + initial_line; + diff->line_offset = dst_offset % dst_stride; + ret = vmw_bo_cpu_blit_line(&d, dst_offset, src_offset, w); + if (ret) + goto out; + + dst_offset += dst_stride; + src_offset += src_stride; + } +out: + if (d.src_addr) + ttm_kunmap_atomic_prot(d.src_addr, d.src_prot); + if (d.dst_addr) + ttm_kunmap_atomic_prot(d.dst_addr, d.dst_prot); + + return ret; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d08753e8fd94..053418adf6a0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -678,6 +678,7 @@ extern void vmw_fence_single_bo(struct ttm_buffer_object *bo, struct vmw_fence_obj *fence); extern void vmw_resource_evict_all(struct vmw_private *dev_priv); + /** * DMA buffer helper routines - vmwgfx_dmabuf.c */ @@ -1165,6 +1166,53 @@ extern int vmw_cmdbuf_cur_flush(struct vmw_cmdbuf_man *man, bool interruptible); extern void vmw_cmdbuf_irqthread(struct vmw_cmdbuf_man *man); +/* CPU blit utilities - vmwgfx_blit.c */ + +/** + * struct vmw_diff_cpy - CPU blit information structure + * + * @rect: The output bounding box rectangle. + * @line: The current line of the blit. + * @line_offset: Offset of the current line segment. + * @cpp: Bytes per pixel (granularity information). + * @memcpy: Which memcpy function to use. + */ +struct vmw_diff_cpy { + struct drm_rect rect; + size_t line; + size_t line_offset; + int cpp; + void (*do_cpy)(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, + size_t n); +}; + +#define VMW_CPU_BLIT_INITIALIZER { \ + .do_cpy = vmw_memcpy, \ +} + +#define VMW_CPU_BLIT_DIFF_INITIALIZER(_cpp) { \ + .line = 0, \ + .line_offset = 0, \ + .rect = { .x1 = INT_MAX/2, \ + .y1 = INT_MAX/2, \ + .x2 = INT_MIN/2, \ + .y2 = INT_MIN/2 \ + }, \ + .cpp = _cpp, \ + .do_cpy = vmw_diff_memcpy, \ +} + +void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, + size_t n); + +void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n); + +int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, + u32 dst_offset, u32 dst_stride, + struct ttm_buffer_object *src, + u32 src_offset, u32 src_stride, + u32 w, u32 h, + struct vmw_diff_cpy *diff); /** * Inline helper functions -- cgit v1.2.3 From ef86cfee7d74baf2e3b883871087a684acecb595 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 16 Jan 2018 11:07:30 +0100 Subject: drm/vmwgfx: Use the cpu blit utility for framebuffer to screen target blits This blit was previously performed using two large vmaps, one of which was teared down and remapped on each blit. Use the more resource- conserving TTM cpu blit instead. The blit is used in boundary-box computing mode which makes it possible to minimize the bounding box used in host operations. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul --- drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 23 ++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 50 ++++++++++------ drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 5 +- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 105 +++++++++++---------------------- 6 files changed, 97 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c index 7177eecb8c9f..ddf71bef5359 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c @@ -185,6 +185,22 @@ static const struct ttm_place evictable_placement_flags[] = { } }; +static const struct ttm_place nonfixed_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED + } +}; + struct ttm_placement vmw_evictable_placement = { .num_placement = 4, .placement = evictable_placement_flags, @@ -213,6 +229,13 @@ struct ttm_placement vmw_mob_ne_placement = { .busy_placement = &mob_ne_placement_flags }; +struct ttm_placement vmw_nonfixed_placement = { + .num_placement = 3, + .placement = nonfixed_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags +}; + struct vmw_ttm_tt { struct ttm_dma_tt dma_ttm; struct vmw_private *dev_priv; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 053418adf6a0..714c794df566 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -767,6 +767,7 @@ extern struct ttm_placement vmw_evictable_placement; extern struct ttm_placement vmw_srf_placement; extern struct ttm_placement vmw_mob_placement; extern struct ttm_placement vmw_mob_ne_placement; +extern struct ttm_placement vmw_nonfixed_placement; extern struct ttm_bo_driver vmw_bo_driver; extern int vmw_dma_quiescent(struct drm_device *dev); extern int vmw_bo_map_dma(struct ttm_buffer_object *bo); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 0f9c9cd78d83..63159674bf92 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -682,9 +682,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane) return NULL; vps->pinned = 0; - - /* Mapping is managed by prepare_fb/cleanup_fb */ - memset(&vps->host_map, 0, sizeof(vps->host_map)); vps->cpp = 0; /* Each ref counted resource needs to be acquired again */ @@ -746,11 +743,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane, /* Should have been freed by cleanup_fb */ - if (vps->host_map.virtual) { - DRM_ERROR("Host mapping not freed\n"); - ttm_bo_kunmap(&vps->host_map); - } - if (vps->surf) vmw_surface_unreference(&vps->surf); @@ -1129,12 +1121,14 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = { }; /** - * Pin the dmabuffer to the start of vram. + * Pin the dmabuffer in a location suitable for access by the + * display system. */ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb) { struct vmw_private *dev_priv = vmw_priv(vfb->base.dev); struct vmw_dma_buffer *buf; + struct ttm_placement *placement; int ret; buf = vfb->dmabuf ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : @@ -1151,12 +1145,24 @@ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb) break; case vmw_du_screen_object: case vmw_du_screen_target: - if (vfb->dmabuf) - return vmw_dmabuf_pin_in_vram_or_gmr(dev_priv, buf, - false); + if (vfb->dmabuf) { + if (dev_priv->capabilities & SVGA_CAP_3D) { + /* + * Use surface DMA to get content to + * sreen target surface. + */ + placement = &vmw_vram_gmr_placement; + } else { + /* Use CPU blit. */ + placement = &vmw_sys_placement; + } + } else { + /* Use surface / image update */ + placement = &vmw_mob_placement; + } - return vmw_dmabuf_pin_in_placement(dev_priv, buf, - &vmw_mob_placement, false); + return vmw_dmabuf_pin_in_placement(dev_priv, buf, placement, + false); default: return -EINVAL; } @@ -2419,14 +2425,21 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv, struct vmw_dma_buffer *buf, bool interruptible, - bool validate_as_mob) + bool validate_as_mob, + bool for_cpu_blit) { + struct ttm_operation_ctx ctx = { + .interruptible = interruptible, + .no_wait_gpu = false}; struct ttm_buffer_object *bo = &buf->base; int ret; ttm_bo_reserve(bo, false, false, NULL); - ret = vmw_validate_single_buffer(dev_priv, bo, interruptible, - validate_as_mob); + if (for_cpu_blit) + ret = ttm_bo_validate(bo, &vmw_nonfixed_placement, &ctx); + else + ret = vmw_validate_single_buffer(dev_priv, bo, interruptible, + validate_as_mob); if (ret) ttm_bo_unreserve(bo); @@ -2538,7 +2551,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res, if (res->backup) { ret = vmw_kms_helper_buffer_prepare(res->dev_priv, res->backup, interruptible, - res->dev_priv->has_mob); + res->dev_priv->has_mob, + false); if (ret) goto out_unreserve; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 42b0f1589d3f..4e8749a8717e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -177,7 +177,6 @@ struct vmw_plane_state { int pinned; /* For CPU Blit */ - struct ttm_bo_kmap_obj host_map; unsigned int cpp; }; @@ -289,7 +288,8 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv, struct vmw_dma_buffer *buf, bool interruptible, - bool validate_as_mob); + bool validate_as_mob, + bool for_cpu_blit); void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf); void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 9f6deced4244..3b7bf7ca18b9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -1032,7 +1032,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv, int ret; ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible, - false); + false, false); if (ret) return ret; @@ -1130,7 +1130,8 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv, struct vmw_kms_dirty dirty; int ret; - ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false); + ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false, + false); if (ret) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 6de28746f6f7..8eec88920851 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -114,7 +114,6 @@ struct vmw_screen_target_display_unit { bool defined; /* For CPU Blit */ - struct ttm_bo_kmap_obj host_map; unsigned int cpp; }; @@ -639,10 +638,9 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) container_of(dirty->unit, typeof(*stdu), base); s32 width, height; s32 src_pitch, dst_pitch; - u8 *src, *dst; - bool not_used; - struct ttm_bo_kmap_obj guest_map; - int ret; + struct ttm_buffer_object *src_bo, *dst_bo; + u32 src_offset, dst_offset; + struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp); if (!dirty->num_hits) return; @@ -653,57 +651,38 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) if (width == 0 || height == 0) return; - ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages, - &guest_map); - if (ret) { - DRM_ERROR("Failed mapping framebuffer for blit: %d\n", - ret); - goto out_cleanup; - } - - /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */ - src_pitch = stdu->display_srf->base_size.width * stdu->cpp; - src = ttm_kmap_obj_virtual(&stdu->host_map, ¬_used); - src += ddirty->top * src_pitch + ddirty->left * stdu->cpp; - - dst_pitch = ddirty->pitch; - dst = ttm_kmap_obj_virtual(&guest_map, ¬_used); - dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp; + /* Assume we are blitting from Guest (dmabuf) to Host (display_srf) */ + dst_pitch = stdu->display_srf->base_size.width * stdu->cpp; + dst_bo = &stdu->display_srf->res.backup->base; + dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp; + src_pitch = ddirty->pitch; + src_bo = &ddirty->buf->base; + src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp; - /* Figure out the real direction */ - if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) { - u8 *tmp; - s32 tmp_pitch; - - tmp = src; - tmp_pitch = src_pitch; - - src = dst; - src_pitch = dst_pitch; - - dst = tmp; - dst_pitch = tmp_pitch; + /* Swap src and dst if the assumption was wrong. */ + if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) { + swap(dst_pitch, src_pitch); + swap(dst_bo, src_bo); + swap(src_offset, dst_offset); } - /* CPU Blit */ - while (height-- > 0) { - memcpy(dst, src, width * stdu->cpp); - dst += dst_pitch; - src += src_pitch; - } + (void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch, + src_bo, src_offset, src_pitch, + width * stdu->cpp, height, &diff); - if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) { + if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM && + drm_rect_visible(&diff.rect)) { struct vmw_private *dev_priv; struct vmw_stdu_update *cmd; struct drm_clip_rect region; int ret; /* We are updating the actual surface, not a proxy */ - region.x1 = ddirty->left; - region.x2 = ddirty->right; - region.y1 = ddirty->top; - region.y2 = ddirty->bottom; + region.x1 = diff.rect.x1; + region.x2 = diff.rect.x2; + region.y1 = diff.rect.y1; + region.y2 = diff.rect.y2; ret = vmw_kms_update_proxy( (struct vmw_resource *) &stdu->display_srf->res, (const struct drm_clip_rect *) ®ion, 1, 1); @@ -720,13 +699,12 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) } vmw_stdu_populate_update(cmd, stdu->base.unit, - ddirty->left, ddirty->right, - ddirty->top, ddirty->bottom); + region.x1, region.x2, + region.y1, region.y2); vmw_fifo_commit(dev_priv, sizeof(*cmd)); } - ttm_bo_kunmap(&guest_map); out_cleanup: ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX; ddirty->right = ddirty->bottom = S32_MIN; @@ -772,9 +750,15 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv, container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer; struct vmw_stdu_dirty ddirty; int ret; + bool cpu_blit = !(dev_priv->capabilities & SVGA_CAP_3D); + /* + * VMs without 3D support don't have the surface DMA command and + * we'll be using a CPU blit, and the framebuffer should be moved out + * of VRAM. + */ ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible, - false); + false, cpu_blit); if (ret) return ret; @@ -793,8 +777,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv, if (to_surface) ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update); - /* 2D VMs cannot use SVGA_3D_CMD_SURFACE_DMA so do CPU blit instead */ - if (!(dev_priv->capabilities & SVGA_CAP_3D)) { + + if (cpu_blit) { ddirty.base.fifo_commit = vmw_stdu_dmabuf_cpu_commit; ddirty.base.clip = vmw_stdu_dmabuf_cpu_clip; ddirty.base.fifo_reserve_size = 0; @@ -1071,9 +1055,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->host_map.virtual) - ttm_bo_kunmap(&vps->host_map); - if (vps->surf) WARN_ON(!vps->pinned); @@ -1235,24 +1216,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, * so cache these mappings */ if (vps->content_fb_type == SEPARATE_DMA && - !(dev_priv->capabilities & SVGA_CAP_3D)) { - ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0, - vps->surf->res.backup->base.num_pages, - &vps->host_map); - if (ret) { - DRM_ERROR("Failed to map display buffer to CPU\n"); - goto out_srf_unpin; - } - + !(dev_priv->capabilities & SVGA_CAP_3D)) vps->cpp = new_fb->pitches[0] / new_fb->width; - } return 0; -out_srf_unpin: - vmw_resource_unpin(&vps->surf->res); - vps->pinned--; - out_srf_unref: vmw_surface_unreference(&vps->surf); return ret; @@ -1296,7 +1264,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, stdu->display_srf = vps->surf; stdu->content_fb_type = vps->content_fb_type; stdu->cpp = vps->cpp; - memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map)); vclips.x = crtc->x; vclips.y = crtc->y; -- cgit v1.2.3 From dc366364c4ef809dccd063919314301f8ba01ac2 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 22 Mar 2018 10:15:23 +0100 Subject: drm/vmwgfx: Fix multiple command buffer context use The start / stop and preempt commands don't honor the context argument but rather acts on all available contexts. Also add detection for context 1 availability. Note that currently there's no driver interface for submitting buffers using the high-priority command queue (context 1). Testing done: Change the default context for command submission to 1 instead of 0, verify basic desktop functionality including faulty command injection and recovery. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/device_include/svga_reg.h | 12 ++++- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 57 +++++++++++------------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 + 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h b/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h index 6e0ccb70a700..88e72bf9a534 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h @@ -372,6 +372,14 @@ SVGAGuestPtr; * PA, not biased by the offset. When the command buffer is finished * the guest should not read the offset field as there is no guarantee * what it will set to. + * + * When the SVGA_CAP_HP_CMD_QUEUE cap bit is set a new command queue + * SVGA_CB_CONTEXT_1 is available. Commands submitted to this queue + * will be executed as quickly as possible by the SVGA device + * potentially before already queued commands on SVGA_CB_CONTEXT_0. + * The SVGA device guarantees that any command buffers submitted to + * SVGA_CB_CONTEXT_0 will be executed after any _already_ submitted + * command buffers to SVGA_CB_CONTEXT_1. */ #define SVGA_CB_MAX_SIZE (512 * 1024) /* 512 KB */ @@ -382,7 +390,8 @@ SVGAGuestPtr; typedef enum { SVGA_CB_CONTEXT_DEVICE = 0x3f, SVGA_CB_CONTEXT_0 = 0x0, - SVGA_CB_CONTEXT_MAX = 0x1, + SVGA_CB_CONTEXT_1 = 0x1, /* Supported with SVGA_CAP_HP_CMD_QUEUE */ + SVGA_CB_CONTEXT_MAX = 0x2, } SVGACBContext; @@ -689,6 +698,7 @@ SVGASignedPoint; #define SVGA_CAP_CMD_BUFFERS_2 0x04000000 #define SVGA_CAP_GBOBJECTS 0x08000000 #define SVGA_CAP_DX 0x10000000 +#define SVGA_CAP_HP_CMD_QUEUE 0x20000000 #define SVGA_CAP_CMD_RESERVED 0x80000000 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index f283324ce598..9f45d5004cae 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -101,6 +101,7 @@ struct vmw_cmdbuf_context { * @handle: DMA address handle for the command buffer space if @using_mob is * false. Immutable. * @size: The size of the command buffer space. Immutable. + * @num_contexts: Number of contexts actually enabled. */ struct vmw_cmdbuf_man { struct mutex cur_mutex; @@ -128,6 +129,7 @@ struct vmw_cmdbuf_man { bool has_pool; dma_addr_t handle; size_t size; + u32 num_contexts; }; /** @@ -185,7 +187,7 @@ struct vmw_cmdbuf_alloc_info { /* Loop over each context in the command buffer manager. */ #define for_each_cmdbuf_ctx(_man, _i, _ctx) \ - for (_i = 0, _ctx = &(_man)->ctx[0]; (_i) < SVGA_CB_CONTEXT_MAX; \ + for (_i = 0, _ctx = &(_man)->ctx[0]; (_i) < (_man)->num_contexts; \ ++(_i), ++(_ctx)) static int vmw_cmdbuf_startstop(struct vmw_cmdbuf_man *man, u32 context, @@ -514,6 +516,7 @@ static void vmw_cmdbuf_work_func(struct work_struct *work) struct list_head restart_head[SVGA_CB_CONTEXT_MAX]; int i; struct vmw_cmdbuf_context *ctx; + bool global_block = false; for_each_cmdbuf_ctx(man, i, ctx) { INIT_LIST_HEAD(&restart_head[i]); @@ -531,6 +534,7 @@ static void vmw_cmdbuf_work_func(struct work_struct *work) list_del_init(&entry->list); restart[entry->cb_context] = true; + global_block = true; if (!vmw_cmd_describe(header, &error_cmd_size, &cmd_name)) { DRM_ERROR("Unknown command causing device error.\n"); @@ -564,23 +568,21 @@ static void vmw_cmdbuf_work_func(struct work_struct *work) cb_hdr->length -= new_start_offset; cb_hdr->errorOffset = 0; cb_hdr->offset = 0; + list_add_tail(&entry->list, &restart_head[entry->cb_context]); - man->ctx[entry->cb_context].block_submission = true; } + + for_each_cmdbuf_ctx(man, i, ctx) + man->ctx[i].block_submission = true; + spin_unlock(&man->lock); - /* Preempt all contexts with errors */ - for_each_cmdbuf_ctx(man, i, ctx) { - if (ctx->block_submission && vmw_cmdbuf_preempt(man, i)) - DRM_ERROR("Failed preempting command buffer " - "context %u.\n", i); - } + /* Preempt all contexts */ + if (global_block && vmw_cmdbuf_preempt(man, 0)) + DRM_ERROR("Failed preempting command buffer contexts\n"); spin_lock(&man->lock); for_each_cmdbuf_ctx(man, i, ctx) { - if (!ctx->block_submission) - continue; - /* Move preempted command buffers to the preempted queue. */ vmw_cmdbuf_ctx_process(man, ctx, &dummy); @@ -594,19 +596,16 @@ static void vmw_cmdbuf_work_func(struct work_struct *work) * Finally add all command buffers first in the submitted * queue, to rerun them. */ - list_splice_init(&restart_head[i], &ctx->submitted); ctx->block_submission = false; + list_splice_init(&restart_head[i], &ctx->submitted); } vmw_cmdbuf_man_process(man); spin_unlock(&man->lock); - for_each_cmdbuf_ctx(man, i, ctx) { - if (restart[i] && vmw_cmdbuf_startstop(man, i, true)) - DRM_ERROR("Failed restarting command buffer " - "context %u.\n", i); - } + if (global_block && vmw_cmdbuf_startstop(man, 0, true)) + DRM_ERROR("Failed restarting command buffer contexts\n"); /* Send a new fence in case one was removed */ if (send_fence) { @@ -1307,6 +1306,8 @@ struct vmw_cmdbuf_man *vmw_cmdbuf_man_create(struct vmw_private *dev_priv) if (!man) return ERR_PTR(-ENOMEM); + man->num_contexts = (dev_priv->capabilities & SVGA_CAP_HP_CMD_QUEUE) ? + 2 : 1; man->headers = dma_pool_create("vmwgfx cmdbuf", &dev_priv->dev->pdev->dev, sizeof(SVGACBHeader), @@ -1341,14 +1342,11 @@ struct vmw_cmdbuf_man *vmw_cmdbuf_man_create(struct vmw_private *dev_priv) INIT_WORK(&man->work, &vmw_cmdbuf_work_func); vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_ERROR, &dev_priv->error_waiters); - for_each_cmdbuf_ctx(man, i, ctx) { - ret = vmw_cmdbuf_startstop(man, i, true); - if (ret) { - DRM_ERROR("Failed starting command buffer " - "context %u.\n", i); - vmw_cmdbuf_man_destroy(man); - return ERR_PTR(ret); - } + ret = vmw_cmdbuf_startstop(man, 0, true); + if (ret) { + DRM_ERROR("Failed starting command buffer contexts\n"); + vmw_cmdbuf_man_destroy(man); + return ERR_PTR(ret); } return man; @@ -1398,16 +1396,11 @@ void vmw_cmdbuf_remove_pool(struct vmw_cmdbuf_man *man) */ void vmw_cmdbuf_man_destroy(struct vmw_cmdbuf_man *man) { - struct vmw_cmdbuf_context *ctx; - unsigned int i; - WARN_ON_ONCE(man->has_pool); (void) vmw_cmdbuf_idle(man, false, 10*HZ); - for_each_cmdbuf_ctx(man, i, ctx) - if (vmw_cmdbuf_startstop(man, i, false)) - DRM_ERROR("Failed stopping command buffer " - "context %u.\n", i); + if (vmw_cmdbuf_startstop(man, 0, false)) + DRM_ERROR("Failed stopping command buffer contexts.\n"); vmw_generic_waiter_remove(man->dev_priv, SVGA_IRQFLAG_ERROR, &man->dev_priv->error_waiters); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 184340d486c3..5055e5f68c4f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -301,6 +301,8 @@ static void vmw_print_capabilities(uint32_t capabilities) DRM_INFO(" Guest Backed Resources.\n"); if (capabilities & SVGA_CAP_DX) DRM_INFO(" DX Features.\n"); + if (capabilities & SVGA_CAP_HP_CMD_QUEUE) + DRM_INFO(" HP Command Queue.\n"); } /** -- cgit v1.2.3 From bf833fd36f9bdc2c86e1fdc90318e4c99b452472 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 22 Mar 2018 10:19:01 +0100 Subject: drm/vmwgfx: Avoid pinning fbdev framebuffers fbdev framebuffers were previously pinned to be able to keep them mapped across updates. This commit introduces a mechanism that instead revalidates the map on each update, keeping the map cached across updates. The cached map is torn down if the underlying pages change. Typically on buffer object moves and swapouts. This should be nicer to the system when we have resource contention. Testing done: Basic fbdev functionality under Fedora 27. Signed-off-by: Thomas Hellstrom Signed-off-by: Sinclair Yeh Reviewed-by: Brian Paul Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c | 51 ++++++++++++++++++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 5 +++ drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 67 +++++++++----------------------- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 23 +++++++++++ 5 files changed, 98 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c index ddf71bef5359..21111fd091f9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c @@ -864,6 +864,7 @@ static void vmw_move_notify(struct ttm_buffer_object *bo, */ static void vmw_swap_notify(struct ttm_buffer_object *bo) { + vmw_resource_swap_notify(bo); (void) ttm_bo_wait(bo, false, false); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c index d45d2caffa5a..d59d9dd16ebc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c @@ -323,3 +323,54 @@ void vmw_bo_pin_reserved(struct vmw_dma_buffer *vbo, bool pin) BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type); } + + +/* + * vmw_dma_buffer_unmap - Tear down a cached buffer object map. + * + * @vbo: The buffer object whose map we are tearing down. + * + * This function tears down a cached map set up using + * vmw_dma_buffer_map_and_cache(). + */ +void vmw_dma_buffer_unmap(struct vmw_dma_buffer *vbo) +{ + if (vbo->map.bo == NULL) + return; + + ttm_bo_kunmap(&vbo->map); +} + + +/* + * vmw_dma_buffer_map_and_cache - Map a buffer object and cache the map + * + * @vbo: The buffer object to map + * Return: A kernel virtual address or NULL if mapping failed. + * + * This function maps a buffer object into the kernel address space, or + * returns the virtual kernel address of an already existing map. The virtual + * address remains valid as long as the buffer object is pinned or reserved. + * The cached map is torn down on either + * 1) Buffer object move + * 2) Buffer object swapout + * 3) Buffer object destruction + * + */ +void *vmw_dma_buffer_map_and_cache(struct vmw_dma_buffer *vbo) +{ + struct ttm_buffer_object *bo = &vbo->base; + bool not_used; + void *virtual; + int ret; + + virtual = ttm_kmap_obj_virtual(&vbo->map, ¬_used); + if (virtual) + return virtual; + + ret = ttm_bo_kmap(bo, 0, bo->num_pages, &vbo->map); + if (ret) + DRM_ERROR("Buffer object map failed: %d.\n", ret); + + return ttm_kmap_obj_virtual(&vbo->map, ¬_used); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 714c794df566..5c2a36ae1bbe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -92,6 +92,8 @@ struct vmw_dma_buffer { s32 pin_count; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; + /* Protected by reservation */ + struct ttm_bo_kmap_obj map; }; /** @@ -673,6 +675,7 @@ extern void vmw_resource_move_notify(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem); extern void vmw_query_move_notify(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem); +extern void vmw_resource_swap_notify(struct ttm_buffer_object *bo); extern int vmw_query_readback_all(struct vmw_dma_buffer *dx_query_mob); extern void vmw_fence_single_bo(struct ttm_buffer_object *bo, struct vmw_fence_obj *fence); @@ -701,6 +704,8 @@ extern int vmw_dmabuf_unpin(struct vmw_private *vmw_priv, extern void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *buf, SVGAGuestPtr *ptr); extern void vmw_bo_pin_reserved(struct vmw_dma_buffer *bo, bool pin); +extern void *vmw_dma_buffer_map_and_cache(struct vmw_dma_buffer *vbo); +extern void vmw_dma_buffer_unmap(struct vmw_dma_buffer *vbo); /** * Misc Ioctl functionality - vmwgfx_ioctl.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index d23a18aae476..fb4e59ee26c7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -43,8 +43,6 @@ struct vmw_fb_par { struct mutex bo_mutex; struct vmw_dma_buffer *vmw_bo; - struct ttm_bo_kmap_obj map; - void *bo_ptr; unsigned bo_size; struct drm_framebuffer *set_fb; struct drm_display_mode *set_mode; @@ -174,11 +172,13 @@ static void vmw_fb_dirty_flush(struct work_struct *work) struct vmw_private *vmw_priv = par->vmw_priv; struct fb_info *info = vmw_priv->fb_info; unsigned long irq_flags; - s32 dst_x1, dst_x2, dst_y1, dst_y2, w, h; + s32 dst_x1, dst_x2, dst_y1, dst_y2, w = 0, h = 0; u32 cpp, max_x, max_y; struct drm_clip_rect clip; struct drm_framebuffer *cur_fb; u8 *src_ptr, *dst_ptr; + struct vmw_dma_buffer *vbo = par->vmw_bo; + void *virtual; if (vmw_priv->suspended) return; @@ -188,10 +188,16 @@ static void vmw_fb_dirty_flush(struct work_struct *work) if (!cur_fb) goto out_unlock; + (void) ttm_read_lock(&vmw_priv->reservation_sem, false); + (void) ttm_bo_reserve(&vbo->base, false, false, NULL); + virtual = vmw_dma_buffer_map_and_cache(vbo); + if (!virtual) + goto out_unreserve; + spin_lock_irqsave(&par->dirty.lock, irq_flags); if (!par->dirty.active) { spin_unlock_irqrestore(&par->dirty.lock, irq_flags); - goto out_unlock; + goto out_unreserve; } /* @@ -221,7 +227,7 @@ static void vmw_fb_dirty_flush(struct work_struct *work) spin_unlock_irqrestore(&par->dirty.lock, irq_flags); if (w && h) { - dst_ptr = (u8 *)par->bo_ptr + + dst_ptr = (u8 *)virtual + (dst_y1 * par->set_fb->pitches[0] + dst_x1 * cpp); src_ptr = (u8 *)par->vmalloc + ((dst_y1 + par->fb_y) * info->fix.line_length + @@ -237,7 +243,12 @@ static void vmw_fb_dirty_flush(struct work_struct *work) clip.x2 = dst_x2; clip.y1 = dst_y1; clip.y2 = dst_y2; + } +out_unreserve: + ttm_bo_unreserve(&vbo->base); + ttm_read_unlock(&vmw_priv->reservation_sem); + if (w && h) { WARN_ON_ONCE(par->set_fb->funcs->dirty(cur_fb, NULL, 0, 0, &clip, 1)); vmw_fifo_flush(vmw_priv, false); @@ -504,18 +515,8 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par, par->set_fb = NULL; } - if (par->vmw_bo && detach_bo) { - struct vmw_private *vmw_priv = par->vmw_priv; - - if (par->bo_ptr) { - ttm_bo_kunmap(&par->map); - par->bo_ptr = NULL; - } - if (unref_bo) - vmw_dmabuf_unreference(&par->vmw_bo); - else if (vmw_priv->active_display_unit != vmw_du_legacy) - vmw_dmabuf_unpin(par->vmw_priv, par->vmw_bo, false); - } + if (par->vmw_bo && detach_bo && unref_bo) + vmw_dmabuf_unreference(&par->vmw_bo); return 0; } @@ -636,38 +637,6 @@ static int vmw_fb_set_par(struct fb_info *info) if (ret) goto out_unlock; - if (!par->bo_ptr) { - struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(set.fb); - - /* - * Pin before mapping. Since we don't know in what placement - * to pin, call into KMS to do it for us. LDU doesn't require - * additional pinning because set_config() would've pinned - * it already - */ - if (vmw_priv->active_display_unit != vmw_du_legacy) { - ret = vfb->pin(vfb); - if (ret) { - DRM_ERROR("Could not pin the fbdev " - "framebuffer.\n"); - goto out_unlock; - } - } - - ret = ttm_bo_kmap(&par->vmw_bo->base, 0, - par->vmw_bo->base.num_pages, &par->map); - if (ret) { - if (vmw_priv->active_display_unit != vmw_du_legacy) - vfb->unpin(vfb); - - DRM_ERROR("Could not map the fbdev framebuffer.\n"); - goto out_unlock; - } - - par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite); - } - - vmw_fb_dirty_mark(par, par->fb_x, par->fb_y, par->set_fb->width, par->set_fb->height); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 9e101450cc4d..6b3a942b18df 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -354,6 +354,7 @@ void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo) { struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo); + vmw_dma_buffer_unmap(vmw_bo); kfree(vmw_bo); } @@ -361,6 +362,7 @@ static void vmw_user_dmabuf_destroy(struct ttm_buffer_object *bo) { struct vmw_user_dma_buffer *vmw_user_bo = vmw_user_dma_buffer(bo); + vmw_dma_buffer_unmap(&vmw_user_bo->dma); ttm_prime_object_kfree(vmw_user_bo, prime); } @@ -1239,6 +1241,12 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo, dma_buf = container_of(bo, struct vmw_dma_buffer, base); + /* + * Kill any cached kernel maps before move. An optimization could + * be to do this iff source or destination memory type is VRAM. + */ + vmw_dma_buffer_unmap(dma_buf); + if (mem->mem_type != VMW_PL_MOB) { struct vmw_resource *res, *n; struct ttm_validate_buffer val_buf; @@ -1262,6 +1270,21 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo, } +/** + * vmw_resource_swap_notify - swapout notify callback. + * + * @bo: The buffer object to be swapped out. + */ +void vmw_resource_swap_notify(struct ttm_buffer_object *bo) +{ + if (bo->destroy != vmw_dmabuf_bo_free && + bo->destroy != vmw_user_dmabuf_destroy) + return; + + /* Kill any cached kernel maps before swapout */ + vmw_dma_buffer_unmap(vmw_dma_buffer(bo)); +} + /** * vmw_query_readback_all - Read back cached query states -- cgit v1.2.3 From c3b9b165734492b7e42bdd898aba93e1120f9084 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 22 Mar 2018 10:26:37 +0100 Subject: drm/vmwgfx: Improve on hibernation Make it possible to hibernate also with masters that don't switch VT at hibernation time. We save and restore modesetting state unless fbdev is active and enabled at hibernation time. Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 75 +++++++++++++++++++++++++------------ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 5 +++ drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 35 ++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 48 ++++++++++++++++++++++++ 4 files changed, 130 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 5055e5f68c4f..c66f32a6a9d9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1279,8 +1279,7 @@ static void vmw_master_drop(struct drm_device *dev, ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); ttm_vt_unlock(&dev_priv->fbdev_master.lock); - if (dev_priv->enable_fb) - vmw_fb_on(dev_priv); + vmw_fb_refresh(dev_priv); } /** @@ -1370,28 +1369,23 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, switch (val) { case PM_HIBERNATION_PREPARE: - if (dev_priv->enable_fb) - vmw_fb_off(dev_priv); - ttm_suspend_lock(&dev_priv->reservation_sem); - /* - * This empties VRAM and unbinds all GMR bindings. - * Buffer contents is moved to swappable memory. + * Take the reservation sem in write mode, which will make sure + * there are no other processes holding a buffer object + * reservation, meaning we should be able to evict all buffer + * objects if needed. + * Once user-space processes have been frozen, we can release + * the lock again. */ - vmw_execbuf_release_pinned_bo(dev_priv); - vmw_resource_evict_all(dev_priv); - vmw_release_device_early(dev_priv); - ttm_bo_swapout_all(&dev_priv->bdev); - vmw_fence_fifo_down(dev_priv->fman); + ttm_suspend_lock(&dev_priv->reservation_sem); + dev_priv->suspend_locked = true; break; case PM_POST_HIBERNATION: case PM_POST_RESTORE: - vmw_fence_fifo_up(dev_priv->fman); - ttm_suspend_unlock(&dev_priv->reservation_sem); - if (dev_priv->enable_fb) - vmw_fb_on(dev_priv); - break; - case PM_RESTORE_PREPARE: + if (READ_ONCE(dev_priv->suspend_locked)) { + dev_priv->suspend_locked = false; + ttm_suspend_unlock(&dev_priv->reservation_sem); + } break; default: break; @@ -1442,25 +1436,50 @@ static int vmw_pm_freeze(struct device *kdev) struct pci_dev *pdev = to_pci_dev(kdev); struct drm_device *dev = pci_get_drvdata(pdev); struct vmw_private *dev_priv = vmw_priv(dev); + int ret; + /* + * Unlock for vmw_kms_suspend. + * No user-space processes should be running now. + */ + ttm_suspend_unlock(&dev_priv->reservation_sem); + ret = vmw_kms_suspend(dev_priv->dev); + if (ret) { + ttm_suspend_lock(&dev_priv->reservation_sem); + DRM_ERROR("Failed to freeze modesetting.\n"); + return ret; + } dev_priv->suspended = true; if (dev_priv->enable_fb) - vmw_fifo_resource_dec(dev_priv); + vmw_fb_off(dev_priv); + ttm_suspend_lock(&dev_priv->reservation_sem); + vmw_execbuf_release_pinned_bo(dev_priv); + vmw_resource_evict_all(dev_priv); + vmw_release_device_early(dev_priv); + ttm_bo_swapout_all(&dev_priv->bdev); + if (dev_priv->enable_fb) + vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { DRM_ERROR("Can't hibernate while 3D resources are active.\n"); if (dev_priv->enable_fb) vmw_fifo_resource_inc(dev_priv); WARN_ON(vmw_request_device_late(dev_priv)); + dev_priv->suspend_locked = false; + ttm_suspend_unlock(&dev_priv->reservation_sem); + if (dev_priv->suspend_state) + vmw_kms_resume(dev); + if (dev_priv->enable_fb) + vmw_fb_on(dev_priv); dev_priv->suspended = false; + vmw_fb_refresh(dev_priv); return -EBUSY; } - if (dev_priv->enable_fb) - __vmw_svga_disable(dev_priv); + vmw_fence_fifo_down(dev_priv->fman); + __vmw_svga_disable(dev_priv); vmw_release_device_late(dev_priv); - return 0; } @@ -1484,7 +1503,17 @@ static int vmw_pm_restore(struct device *kdev) if (dev_priv->enable_fb) __vmw_svga_enable(dev_priv); + vmw_fence_fifo_up(dev_priv->fman); + dev_priv->suspend_locked = false; + ttm_suspend_unlock(&dev_priv->reservation_sem); + if (dev_priv->suspend_state) + vmw_kms_resume(dev_priv->dev); + + if (dev_priv->enable_fb) + vmw_fb_on(dev_priv); + dev_priv->suspended = false; + vmw_fb_refresh(dev_priv); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 5c2a36ae1bbe..0bf28a6528bf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -425,6 +425,7 @@ struct vmw_private { struct vmw_framebuffer *implicit_fb; struct mutex global_kms_state_mutex; spinlock_t cursor_lock; + struct drm_atomic_state *suspend_state; /* * Context and surface management. @@ -498,6 +499,7 @@ struct vmw_private { struct notifier_block pm_nb; bool suspended; bool refuse_hibernation; + bool suspend_locked; struct mutex release_mutex; atomic_t num_fifo_resources; @@ -909,6 +911,7 @@ int vmw_fb_init(struct vmw_private *vmw_priv); int vmw_fb_close(struct vmw_private *dev_priv); int vmw_fb_off(struct vmw_private *vmw_priv); int vmw_fb_on(struct vmw_private *vmw_priv); +void vmw_fb_refresh(struct vmw_private *vmw_priv); /** * Kernel modesetting - vmwgfx_kms.c @@ -945,6 +948,8 @@ int vmw_kms_present(struct vmw_private *dev_priv, int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv); +int vmw_kms_suspend(struct drm_device *dev); +int vmw_kms_resume(struct drm_device *dev); int vmw_dumb_create(struct drm_file *file_priv, struct drm_device *dev, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index fb4e59ee26c7..e85c1868ef12 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -161,10 +161,17 @@ static int vmw_fb_blank(int blank, struct fb_info *info) return 0; } -/* - * Dirty code +/** + * vmw_fb_dirty_flush - flush dirty regions to the kms framebuffer + * + * @work: The struct work_struct associated with this task. + * + * This function flushes the dirty regions of the vmalloc framebuffer to the + * kms framebuffer, and if the kms framebuffer is visible, also updated the + * corresponding displays. Note that this function runs even if the kms + * framebuffer is not bound to a crtc and thus not visible, but it's turned + * off during hibernation using the par->dirty.active bool. */ - static void vmw_fb_dirty_flush(struct work_struct *work) { struct vmw_fb_par *par = container_of(work, struct vmw_fb_par, @@ -852,12 +859,6 @@ int vmw_fb_off(struct vmw_private *vmw_priv) flush_delayed_work(&info->deferred_work); flush_delayed_work(&par->local_work); - mutex_lock(&par->bo_mutex); - drm_modeset_lock_all(vmw_priv->dev); - (void) vmw_fb_kms_detach(par, true, false); - drm_modeset_unlock_all(vmw_priv->dev); - mutex_unlock(&par->bo_mutex); - return 0; } @@ -873,10 +874,24 @@ int vmw_fb_on(struct vmw_private *vmw_priv) info = vmw_priv->fb_info; par = info->par; - vmw_fb_set_par(info); spin_lock_irqsave(&par->dirty.lock, flags); par->dirty.active = true; spin_unlock_irqrestore(&par->dirty.lock, flags); return 0; } + +/** + * vmw_fb_refresh - Refresh fb display + * + * @vmw_priv: Pointer to device private + * + * Call into kms to show the fbdev display(s). + */ +void vmw_fb_refresh(struct vmw_private *vmw_priv) +{ + if (!vmw_priv->fb_info) + return; + + vmw_fb_set_par(vmw_priv->fb_info); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 63159674bf92..3628a9fe705f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2848,3 +2848,51 @@ int vmw_kms_set_config(struct drm_mode_set *set, return drm_atomic_helper_set_config(set, ctx); } + + +/** + * vmw_kms_suspend - Save modesetting state and turn modesetting off. + * + * @dev: Pointer to the drm device + * Return: 0 on success. Negative error code on failure. + */ +int vmw_kms_suspend(struct drm_device *dev) +{ + struct vmw_private *dev_priv = vmw_priv(dev); + + dev_priv->suspend_state = drm_atomic_helper_suspend(dev); + if (IS_ERR(dev_priv->suspend_state)) { + int ret = PTR_ERR(dev_priv->suspend_state); + + DRM_ERROR("Failed kms suspend: %d\n", ret); + dev_priv->suspend_state = NULL; + + return ret; + } + + return 0; +} + + +/** + * vmw_kms_resume - Re-enable modesetting and restore state + * + * @dev: Pointer to the drm device + * Return: 0 on success. Negative error code on failure. + * + * State is resumed from a previous vmw_kms_suspend(). It's illegal + * to call this function without a previous vmw_kms_suspend(). + */ +int vmw_kms_resume(struct drm_device *dev) +{ + struct vmw_private *dev_priv = vmw_priv(dev); + int ret; + + if (WARN_ON(!dev_priv->suspend_state)) + return 0; + + ret = drm_atomic_helper_resume(dev, dev_priv->suspend_state); + dev_priv->suspend_state = NULL; + + return ret; +} -- cgit v1.2.3 From 4e3e733b45df457147441b79cf2cb654c82ec402 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 22 Mar 2018 10:30:19 +0100 Subject: drm/vmwgfx: Get rid of the device-private suspended member It was used to early block fbdev dirty processing. Replace it with an unprotected check of the par->dirty.active field. While this might race with the vmw_fb_off() function, we do a protected check later so the race will at worst lead to grabbing and releasing a couple of locks. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 1 - drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index c66f32a6a9d9..61a03ac90f8c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1449,7 +1449,6 @@ static int vmw_pm_freeze(struct device *kdev) DRM_ERROR("Failed to freeze modesetting.\n"); return ret; } - dev_priv->suspended = true; if (dev_priv->enable_fb) vmw_fb_off(dev_priv); @@ -1471,7 +1470,6 @@ static int vmw_pm_freeze(struct device *kdev) vmw_kms_resume(dev); if (dev_priv->enable_fb) vmw_fb_on(dev_priv); - dev_priv->suspended = false; vmw_fb_refresh(dev_priv); return -EBUSY; } @@ -1512,7 +1510,6 @@ static int vmw_pm_restore(struct device *kdev) if (dev_priv->enable_fb) vmw_fb_on(dev_priv); - dev_priv->suspended = false; vmw_fb_refresh(dev_priv); return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 0bf28a6528bf..fb2f3276ce82 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -497,7 +497,6 @@ struct vmw_private { struct vmw_master *active_master; struct vmw_master fbdev_master; struct notifier_block pm_nb; - bool suspended; bool refuse_hibernation; bool suspend_locked; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index e85c1868ef12..be40cff3e1f6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -187,7 +187,7 @@ static void vmw_fb_dirty_flush(struct work_struct *work) struct vmw_dma_buffer *vbo = par->vmw_bo; void *virtual; - if (vmw_priv->suspended) + if (!READ_ONCE(par->dirty.active)) return; mutex_lock(&par->bo_mutex); -- cgit v1.2.3 From 6073a09210e06f39adabd682c282b3ee14c3d33d Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Thu, 22 Mar 2018 10:33:03 +0100 Subject: drm/vmwgfx: Use kasprintf Use kasprintf instead of combination of kmalloc and sprintf. Also, remove the local variables used for storing the string length as they are not required now. Signed-off-by: Himanshu Jha Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 97000996b8dc..cdff99211602 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -328,7 +328,7 @@ int vmw_host_get_guestinfo(const char *guest_info_param, { struct rpc_channel channel; char *msg, *reply = NULL; - size_t msg_len, reply_len = 0; + size_t reply_len = 0; int ret = 0; @@ -338,15 +338,12 @@ int vmw_host_get_guestinfo(const char *guest_info_param, if (!guest_info_param || !length) return -EINVAL; - msg_len = strlen(guest_info_param) + strlen("info-get ") + 1; - msg = kzalloc(msg_len, GFP_KERNEL); + msg = kasprintf(GFP_KERNEL, "info-get %s", guest_info_param); if (!msg) { DRM_ERROR("Cannot allocate memory to get %s", guest_info_param); return -ENOMEM; } - sprintf(msg, "info-get %s", guest_info_param); - if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) || vmw_send_msg(&channel, msg) || vmw_recv_msg(&channel, (void *) &reply, &reply_len) || @@ -388,7 +385,6 @@ int vmw_host_log(const char *log) { struct rpc_channel channel; char *msg; - int msg_len; int ret = 0; @@ -398,15 +394,12 @@ int vmw_host_log(const char *log) if (!log) return ret; - msg_len = strlen(log) + strlen("log ") + 1; - msg = kzalloc(msg_len, GFP_KERNEL); + msg = kasprintf(GFP_KERNEL, "log %s", log); if (!msg) { DRM_ERROR("Cannot allocate memory for log message\n"); return -ENOMEM; } - sprintf(msg, "log %s", log); - if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) || vmw_send_msg(&channel, msg) || vmw_close_channel(&channel)) { -- cgit v1.2.3 From 89dc15b76fd3b57d0b7d3bd3556bd6fa514e0257 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 22 Mar 2018 10:34:00 +0100 Subject: drm/vmwgfx: Stricter count of legacy surface device resources For legacy surfaces, they were previously registered as device resources when the driver resources were created. Since they are evictable we instead register them as device resources once they are created on the device, just like for guest-backed surfaces. This has implications during hibernation where we can't hibernate with device resources active. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Deepak Rawat Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index db1bb166845e..b236c48bf265 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -345,7 +345,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) dev_priv->used_memory_size -= res->backup_size; mutex_unlock(&dev_priv->cmdbuf_mutex); } - vmw_fifo_resource_dec(dev_priv); } /** @@ -407,6 +406,8 @@ static int vmw_legacy_srf_create(struct vmw_resource *res) vmw_surface_define_encode(srf, cmd); vmw_fifo_commit(dev_priv, submit_size); + vmw_fifo_resource_inc(dev_priv); + /* * Surface memory usage accounting. */ @@ -558,6 +559,7 @@ static int vmw_legacy_srf_destroy(struct vmw_resource *res) */ vmw_resource_release_id(res); + vmw_fifo_resource_dec(dev_priv); return 0; } @@ -579,15 +581,11 @@ static int vmw_surface_init(struct vmw_private *dev_priv, struct vmw_resource *res = &srf->res; BUG_ON(!res_free); - if (!dev_priv->has_mob) - vmw_fifo_resource_inc(dev_priv); ret = vmw_resource_init(dev_priv, res, true, res_free, (dev_priv->has_mob) ? &vmw_gb_surface_func : &vmw_legacy_surface_func); if (unlikely(ret != 0)) { - if (!dev_priv->has_mob) - vmw_fifo_resource_dec(dev_priv); res_free(res); return ret; } -- cgit v1.2.3 From 20fb5a635a0c8478ac98f15cfafc2ea83df29565 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 22 Mar 2018 10:35:18 +0100 Subject: drm/vmwgfx: Unpin the screen object backup buffer when not used We were relying on the pinned screen object backup buffer to be destroyed when not used. But if we hold a copy of the atomic state, like when hibernating, the backup buffer might not be destroyed since it's refcounted by the atomic state. This causes us to hibernate with a buffer pinned in VRAM. Fix this by only having the buffer pinned when it is actually used by a screen object. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 3b7bf7ca18b9..419185f60278 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -405,7 +405,11 @@ vmw_sou_primary_plane_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); + struct drm_crtc *crtc = plane->state->crtc ? + plane->state->crtc : old_state->crtc; + if (vps->dmabuf) + vmw_dmabuf_unpin(vmw_priv(crtc->dev), vps->dmabuf, false); vmw_dmabuf_unreference(&vps->dmabuf); vps->dmabuf_size = 0; @@ -443,10 +447,17 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, } size = new_state->crtc_w * new_state->crtc_h * 4; + dev_priv = vmw_priv(crtc->dev); if (vps->dmabuf) { - if (vps->dmabuf_size == size) - return 0; + if (vps->dmabuf_size == size) { + /* + * Note that this might temporarily up the pin-count + * to 2, until cleanup_fb() is called. + */ + return vmw_dmabuf_pin_in_vram(dev_priv, vps->dmabuf, + true); + } vmw_dmabuf_unreference(&vps->dmabuf); vps->dmabuf_size = 0; @@ -456,7 +467,6 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, if (!vps->dmabuf) return -ENOMEM; - dev_priv = vmw_priv(crtc->dev); vmw_svga_enable(dev_priv); /* After we have alloced the backing store might not be able to @@ -467,13 +477,16 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, &vmw_vram_ne_placement, false, &vmw_dmabuf_bo_free); vmw_overlay_resume_all(dev_priv); - - if (ret != 0) + if (ret) { vps->dmabuf = NULL; /* vmw_dmabuf_init frees on error */ - else - vps->dmabuf_size = size; + return ret; + } - return ret; + /* + * TTM already thinks the buffer is pinned, but make sure the + * pin_count is upped. + */ + return vmw_dmabuf_pin_in_vram(dev_priv, vps->dmabuf, true); } -- cgit v1.2.3 From 37efe80ce85f76b3b30d7b4ea40550e6a5a5b71a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Jan 2018 18:18:43 +0100 Subject: drm/vmwgfx: use monotonic event timestamps DRM_VMW_EVENT_FENCE_SIGNALED (struct drm_vmw_event_fence) and DRM_EVENT_VBLANK (struct drm_event_vblank) pass timestamps in 32-bit seconds/microseconds format. As of commit c61eef726a78 ("drm: add support for monotonic vblank timestamps"), other DRM drivers use monotonic times for drm_event_vblank, but vmwgfx still uses CLOCK_REALTIME for both events, which suffers from the y2038/y2106 overflow as well as time jumps. For consistency, this changes vmwgfx to use ktime_get_ts64 as well, which solves those problems and avoids the deprecated do_gettimeofday() function. This should be transparent to to user space, as long as it doesn't compare the time against the result of gettimeofday(). Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 6c5c75cf5e6c..9ed544f8958f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -901,11 +901,12 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action) spin_lock_irq(&dev->event_lock); if (likely(eaction->tv_sec != NULL)) { - struct timeval tv; + struct timespec64 ts; - do_gettimeofday(&tv); - *eaction->tv_sec = tv.tv_sec; - *eaction->tv_usec = tv.tv_usec; + ktime_get_ts64(&ts); + /* monotonic time, so no y2038 overflow */ + *eaction->tv_sec = ts.tv_sec; + *eaction->tv_usec = ts.tv_nsec / NSEC_PER_USEC; } drm_send_event_locked(dev, eaction->event); -- cgit v1.2.3 From 43bfefedd0281ef476f8154397cd283a710d8baf Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 22 Mar 2018 11:14:34 +0100 Subject: drm/vmwgfx: Bump version patchlevel and date Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index fb2f3276ce82..9e60de95b863 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -43,10 +43,10 @@ #include #define VMWGFX_DRIVER_NAME "vmwgfx" -#define VMWGFX_DRIVER_DATE "20170612" +#define VMWGFX_DRIVER_DATE "20180322" #define VMWGFX_DRIVER_MAJOR 2 #define VMWGFX_DRIVER_MINOR 14 -#define VMWGFX_DRIVER_PATCHLEVEL 0 +#define VMWGFX_DRIVER_PATCHLEVEL 1 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) #define VMWGFX_MAX_RELOCATIONS 2048 -- cgit v1.2.3