Blob Blame History Raw
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 16 Jan 2018 11:07:30 +0100
Subject: drm/vmwgfx: Use the cpu blit utility for framebuffer to screen target
 blits
Git-commit: ef86cfee7d74baf2e3b883871087a684acecb595
Patch-mainline: v4.17-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

This blit was previously performed using two large vmaps, one of which
was teared down and remapped on each blit. Use the more resource-
conserving TTM cpu blit instead.

The blit is used in boundary-box computing mode which makes it possible
to minimize the bounding box used in host operations.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |   23 ++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h    |    1 
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c    |   50 +++++++++-----
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h    |    4 -
 drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c   |    5 -
 drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c   |  113 +++++++++++----------------------
 6 files changed, 101 insertions(+), 95 deletions(-)

--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -185,6 +185,22 @@ static const struct ttm_place evictable_
 	}
 };
 
+static const struct ttm_place nonfixed_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+	}
+};
+
 struct ttm_placement vmw_evictable_placement = {
 	.num_placement = 4,
 	.placement = evictable_placement_flags,
@@ -213,6 +229,13 @@ struct ttm_placement vmw_mob_ne_placemen
 	.busy_placement = &mob_ne_placement_flags
 };
 
+struct ttm_placement vmw_nonfixed_placement = {
+	.num_placement = 3,
+	.placement = nonfixed_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
 struct vmw_ttm_tt {
 	struct ttm_dma_tt dma_ttm;
 	struct vmw_private *dev_priv;
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -767,6 +767,7 @@ extern struct ttm_placement vmw_evictabl
 extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_placement vmw_mob_placement;
 extern struct ttm_placement vmw_mob_ne_placement;
+extern struct ttm_placement vmw_nonfixed_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 extern int vmw_bo_map_dma(struct ttm_buffer_object *bo);
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -681,9 +681,6 @@ vmw_du_plane_duplicate_state(struct drm_
 		return NULL;
 
 	vps->pinned = 0;
-
-	/* Mapping is managed by prepare_fb/cleanup_fb */
-	memset(&vps->host_map, 0, sizeof(vps->host_map));
 	vps->cpp = 0;
 
 	/* Each ref counted resource needs to be acquired again */
@@ -745,11 +742,6 @@ vmw_du_plane_destroy_state(struct drm_pl
 
 
 	/* Should have been freed by cleanup_fb */
-	if (vps->host_map.virtual) {
-		DRM_ERROR("Host mapping not freed\n");
-		ttm_bo_kunmap(&vps->host_map);
-	}
-
 	if (vps->surf)
 		vmw_surface_unreference(&vps->surf);
 
@@ -1128,12 +1120,14 @@ static const struct drm_framebuffer_func
 };
 
 /**
- * Pin the dmabuffer to the start of vram.
+ * Pin the dmabuffer in a location suitable for access by the
+ * display system.
  */
 static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
 {
 	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
 	struct vmw_dma_buffer *buf;
+	struct ttm_placement *placement;
 	int ret;
 
 	buf = vfb->dmabuf ?  vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
@@ -1150,12 +1144,24 @@ static int vmw_framebuffer_pin(struct vm
 		break;
 	case vmw_du_screen_object:
 	case vmw_du_screen_target:
-		if (vfb->dmabuf)
-			return vmw_dmabuf_pin_in_vram_or_gmr(dev_priv, buf,
-							     false);
+		if (vfb->dmabuf) {
+			if (dev_priv->capabilities & SVGA_CAP_3D) {
+				/*
+				 * Use surface DMA to get content to
+				 * sreen target surface.
+				 */
+				placement = &vmw_vram_gmr_placement;
+			} else {
+				/* Use CPU blit. */
+				placement = &vmw_sys_placement;
+			}
+		} else {
+			/* Use surface / image update */
+			placement = &vmw_mob_placement;
+		}
 
-		return vmw_dmabuf_pin_in_placement(dev_priv, buf,
-						   &vmw_mob_placement, false);
+		return vmw_dmabuf_pin_in_placement(dev_priv, buf, placement,
+						   false);
 	default:
 		return -EINVAL;
 	}
@@ -2418,14 +2424,21 @@ int vmw_kms_helper_dirty(struct vmw_priv
 int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
 				  struct vmw_dma_buffer *buf,
 				  bool interruptible,
-				  bool validate_as_mob)
+				  bool validate_as_mob,
+				  bool for_cpu_blit)
 {
+	struct ttm_operation_ctx ctx = {
+		.interruptible = interruptible,
+		.no_wait_gpu = false};
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
 
 	ttm_bo_reserve(bo, false, false, NULL);
-	ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
-					 validate_as_mob);
+	if (for_cpu_blit)
+		ret = ttm_bo_validate(bo, &vmw_nonfixed_placement, &ctx);
+	else
+		ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
+						 validate_as_mob);
 	if (ret)
 		ttm_bo_unreserve(bo);
 
@@ -2544,7 +2557,8 @@ int vmw_kms_helper_resource_prepare(stru
 	if (res->backup) {
 		ret = vmw_kms_helper_buffer_prepare(res->dev_priv, res->backup,
 						    interruptible,
-						    res->dev_priv->has_mob);
+						    res->dev_priv->has_mob,
+						    false);
 		if (ret)
 			goto out_unreserve;
 
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -177,7 +177,6 @@ struct vmw_plane_state {
 	int pinned;
 
 	/* For CPU Blit */
-	struct ttm_bo_kmap_obj host_map;
 	unsigned int cpp;
 };
 
@@ -294,7 +293,8 @@ int vmw_kms_helper_dirty(struct vmw_priv
 int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
 				  struct vmw_dma_buffer *buf,
 				  bool interruptible,
-				  bool validate_as_mob);
+				  bool validate_as_mob,
+				  bool for_cpu_blit);
 void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf);
 void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
 				  struct drm_file *file_priv,
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -1033,7 +1033,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct v
 	int ret;
 
 	ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
-					    false);
+					    false, false);
 	if (ret)
 		return ret;
 
@@ -1131,7 +1131,8 @@ int vmw_kms_sou_readback(struct vmw_priv
 	struct vmw_kms_dirty dirty;
 	int ret;
 
-	ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false);
+	ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false,
+					    false);
 	if (ret)
 		return ret;
 
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -114,7 +114,6 @@ struct vmw_screen_target_display_unit {
 	bool defined;
 
 	/* For CPU Blit */
-	struct ttm_bo_kmap_obj host_map;
 	unsigned int cpp;
 };
 
@@ -639,10 +638,9 @@ static void vmw_stdu_dmabuf_cpu_commit(s
 		container_of(dirty->unit, typeof(*stdu), base);
 	s32 width, height;
 	s32 src_pitch, dst_pitch;
-	u8 *src, *dst;
-	bool not_used;
-	struct ttm_bo_kmap_obj guest_map;
-	int ret;
+	struct ttm_buffer_object *src_bo, *dst_bo;
+	u32 src_offset, dst_offset;
+	struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);
 
 	if (!dirty->num_hits)
 		return;
@@ -653,57 +651,38 @@ static void vmw_stdu_dmabuf_cpu_commit(s
 	if (width == 0 || height == 0)
 		return;
 
-	ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
-			  &guest_map);
-	if (ret) {
-		DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
-			  ret);
-		goto out_cleanup;
-	}
-
-	/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
-	src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
-	src = ttm_kmap_obj_virtual(&stdu->host_map, &not_used);
-	src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
+	/* Assume we are blitting from Guest (dmabuf) to Host (display_srf) */
+	dst_pitch = stdu->display_srf->base_size.width * stdu->cpp;
+	dst_bo = &stdu->display_srf->res.backup->base;
+	dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
+
+	src_pitch = ddirty->pitch;
+	src_bo = &ddirty->buf->base;
+	src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;
+
+	/* Swap src and dst if the assumption was wrong. */
+	if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) {
+		swap(dst_pitch, src_pitch);
+		swap(dst_bo, src_bo);
+		swap(src_offset, dst_offset);
+	}
+
+	(void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
+			       src_bo, src_offset, src_pitch,
+			       width * stdu->cpp, height, &diff);
 
-	dst_pitch = ddirty->pitch;
-	dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
-	dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
-
-
-	/* Figure out the real direction */
-	if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
-		u8 *tmp;
-		s32 tmp_pitch;
-
-		tmp = src;
-		tmp_pitch = src_pitch;
-
-		src = dst;
-		src_pitch = dst_pitch;
-
-		dst = tmp;
-		dst_pitch = tmp_pitch;
-	}
-
-	/* CPU Blit */
-	while (height-- > 0) {
-		memcpy(dst, src, width * stdu->cpp);
-		dst += dst_pitch;
-		src += src_pitch;
-	}
-
-	if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
+	if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM &&
+	    drm_rect_visible(&diff.rect)) {
 		struct vmw_private *dev_priv;
 		struct vmw_stdu_update *cmd;
 		struct drm_clip_rect region;
 		int ret;
 
 		/* We are updating the actual surface, not a proxy */
-		region.x1 = ddirty->left;
-		region.x2 = ddirty->right;
-		region.y1 = ddirty->top;
-		region.y2 = ddirty->bottom;
+		region.x1 = diff.rect.x1;
+		region.x2 = diff.rect.x2;
+		region.y1 = diff.rect.y1;
+		region.y2 = diff.rect.y2;
 		ret = vmw_kms_update_proxy(
 			(struct vmw_resource *) &stdu->display_srf->res,
 			(const struct drm_clip_rect *) &region, 1, 1);
@@ -720,13 +699,12 @@ static void vmw_stdu_dmabuf_cpu_commit(s
 		}
 
 		vmw_stdu_populate_update(cmd, stdu->base.unit,
-					 ddirty->left, ddirty->right,
-					 ddirty->top, ddirty->bottom);
+					 region.x1, region.x2,
+					 region.y1, region.y2);
 
 		vmw_fifo_commit(dev_priv, sizeof(*cmd));
 	}
 
-	ttm_bo_kunmap(&guest_map);
 out_cleanup:
 	ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
 	ddirty->right = ddirty->bottom = S32_MIN;
@@ -772,9 +750,15 @@ int vmw_kms_stdu_dma(struct vmw_private
 		container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer;
 	struct vmw_stdu_dirty ddirty;
 	int ret;
+	bool cpu_blit = !(dev_priv->capabilities & SVGA_CAP_3D);
 
+	/*
+	 * VMs without 3D support don't have the surface DMA command and
+	 * we'll be using a CPU blit, and the framebuffer should be moved out
+	 * of VRAM.
+	 */
 	ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
-					    false);
+					    false, cpu_blit);
 	if (ret)
 		return ret;
 
@@ -793,8 +777,8 @@ int vmw_kms_stdu_dma(struct vmw_private
 	if (to_surface)
 		ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update);
 
-	/* 2D VMs cannot use SVGA_3D_CMD_SURFACE_DMA so do CPU blit instead */
-	if (!(dev_priv->capabilities & SVGA_CAP_3D)) {
+
+	if (cpu_blit) {
 		ddirty.base.fifo_commit = vmw_stdu_dmabuf_cpu_commit;
 		ddirty.base.clip = vmw_stdu_dmabuf_cpu_clip;
 		ddirty.base.fifo_reserve_size = 0;
@@ -1072,9 +1056,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct
 {
 	struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 
-	if (vps->host_map.virtual)
-		ttm_bo_kunmap(&vps->host_map);
-
 	if (vps->surf)
 		WARN_ON(!vps->pinned);
 
@@ -1236,24 +1217,11 @@ vmw_stdu_primary_plane_prepare_fb(struct
 	 * so cache these mappings
 	 */
 	if (vps->content_fb_type == SEPARATE_DMA &&
-	    !(dev_priv->capabilities & SVGA_CAP_3D)) {
-		ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
-				  vps->surf->res.backup->base.num_pages,
-				  &vps->host_map);
-		if (ret) {
-			DRM_ERROR("Failed to map display buffer to CPU\n");
-			goto out_srf_unpin;
-		}
-
+	    !(dev_priv->capabilities & SVGA_CAP_3D))
 		vps->cpp = new_fb->pitches[0] / new_fb->width;
-	}
 
 	return 0;
 
-out_srf_unpin:
-	vmw_resource_unpin(&vps->surf->res);
-	vps->pinned--;
-
 out_srf_unref:
 	vmw_surface_unreference(&vps->surf);
 	return ret;
@@ -1297,7 +1265,6 @@ vmw_stdu_primary_plane_atomic_update(str
 		stdu->display_srf = vps->surf;
 		stdu->content_fb_type = vps->content_fb_type;
 		stdu->cpp = vps->cpp;
-		memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 
 		vclips.x = crtc->x;
 		vclips.y = crtc->y;