Blob Blame History Raw
From 1a2d68dee9d2011beaca6303743cda7069d02797 Mon Sep 17 00:00:00 2001
From: Yongqiang Sun <yongqiang.sun@amd.com>
Date: Mon, 16 Dec 2019 17:21:10 -0500
Subject: drm/amd/display: programing surface flip by dmcub.
Git-commit: 8c0192533c39660ae229d7b80adeeb3bc63a3eba
Patch-mainline: v5.6-rc1
References: jsc#SLE-12680, jsc#SLE-12880, jsc#SLE-12882, jsc#SLE-12883, jsc#SLE-13496, jsc#SLE-15322

Programming surface flip addresses via dmcub uC for optimizing the data
flush.

Signed-off-by: Yongqiang Sun <yongqiang.sun@amd.com>
Reviewed-by: Tony Cheng <Tony.Cheng@amd.com>
Acked-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/amd/display/dc/dc.h           |   1 +
 .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 116 ++++++++++++++++++
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |  36 +++++-
 .../gpu/drm/amd/display/dmub/inc/dmub_rb.h    |  18 ++-
 4 files changed, 165 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index dfc66954a24b..ecd2257de80b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -420,6 +420,7 @@ struct dc_debug_options {
 	bool nv12_iflip_vm_wa;
 	bool disable_dram_clock_change_vactive_support;
 	bool validate_dml_output;
+	bool enable_dmcub_surface_flip;
 };
 
 struct dc_debug_data {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
index 4d2564f79395..1f4e2cd08d4c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
@@ -29,6 +29,8 @@
 #include "dm_services.h"
 #include "reg_helper.h"
 
+#include "dc_dmub_srv.h"
+
 #define DC_LOGGER_INIT(logger)
 
 #define REG(reg)\
@@ -688,6 +690,113 @@ void hubp21_validate_dml_output(struct hubp *hubp,
 				dml_dlg_attr->refcyc_per_meta_chunk_flip_l, dlg_attr.refcyc_per_meta_chunk_flip_l);
 }
 
+static void program_video_progressive_dmcub(
+		struct dc_dmub_srv *dmcub,
+		struct hubp *hubp,
+		const struct dc_plane_address *address,
+		bool flip_immediate)
+{
+	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
+	struct dmub_rb_cmd_flip surface_flip = { 0 };
+
+	surface_flip.header.type = DMUB_CMD__SURFACE_FLIP;
+
+	surface_flip.flip.addr_type = address->type;
+	surface_flip.flip.immediate = flip_immediate;
+	surface_flip.flip.vmid = address->vmid;
+
+	surface_flip.flip.hubp_inst = hubp->inst;
+	surface_flip.flip.tmz_surface = address->tmz_surface;
+
+	switch (address->type) {
+	case PLN_ADDR_TYPE_GRAPHICS:
+		if (address->grph.addr.quad_part == 0)
+			return;
+
+		if (address->grph.meta_addr.quad_part != 0) {
+			surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->grph.meta_addr.low_part;
+			surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph.meta_addr.high_part;
+		}
+
+		surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->grph.addr.low_part;
+		surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->grph.addr.high_part;
+		break;
+	case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE:
+		if (address->video_progressive.luma_addr.quad_part == 0
+				|| address->video_progressive.chroma_addr.quad_part == 0)
+			return;
+
+		if (address->video_progressive.luma_meta_addr.quad_part != 0) {
+			surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->video_progressive.luma_meta_addr.low_part;
+			surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->video_progressive.luma_meta_addr.high_part;
+
+			surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_C =
+					address->video_progressive.chroma_meta_addr.low_part;
+			surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C =
+					address->video_progressive.chroma_meta_addr.high_part;
+		}
+
+		surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->video_progressive.luma_addr.low_part;
+		surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->video_progressive.luma_addr.high_part;
+
+		surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C =
+				address->video_progressive.chroma_addr.low_part;
+		surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C =
+				address->video_progressive.chroma_addr.high_part;
+
+		break;
+	case PLN_ADDR_TYPE_GRPH_STEREO:
+		if (address->grph_stereo.left_addr.quad_part == 0)
+			return;
+		if (address->grph_stereo.right_addr.quad_part == 0)
+			return;
+
+		surface_flip.flip.grph_stereo = true;
+
+		if (address->grph_stereo.right_meta_addr.quad_part != 0) {
+			surface_flip.flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS =
+					address->grph_stereo.right_meta_addr.low_part;
+			surface_flip.flip.DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph_stereo.right_meta_addr.high_part;
+		}
+
+		if (address->grph_stereo.left_meta_addr.quad_part != 0) {
+			surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS =
+					address->grph_stereo.left_meta_addr.low_part;
+			surface_flip.flip.DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH =
+					address->grph_stereo.left_meta_addr.high_part;
+		}
+
+		surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS =
+				address->grph_stereo.left_addr.low_part;
+		surface_flip.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH =
+				address->grph_stereo.left_addr.high_part;
+
+		surface_flip.flip.DCSURF_SECONDARY_SURFACE_ADDRESS =
+				address->grph_stereo.right_addr.low_part;
+		surface_flip.flip.DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH =
+				address->grph_stereo.right_addr.high_part;
+
+		break;
+
+	}
+
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_cmd_queue(dmcub, &surface_flip.header);
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_cmd_execute(dmcub);
+	PERF_TRACE();  // TODO: remove after performance is stable.
+	dc_dmub_srv_wait_idle(dmcub);
+}
+
 bool hubp21_program_surface_flip_and_addr(
 	struct hubp *hubp,
 	const struct dc_plane_address *address,
@@ -696,6 +805,13 @@ bool hubp21_program_surface_flip_and_addr(
 	struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
 	struct dc_debug_options *debug = &hubp->ctx->dc->debug;
 
+
+	if (hubp->ctx->dc->debug.enable_dmcub_surface_flip) {
+		program_video_progressive_dmcub(hubp->ctx->dmub_srv, hubp, address, flip_immediate);
+		hubp->request_address = *address;
+		return true;
+	}
+
 	//program flip type
 	REG_UPDATE(DCSURF_FLIP_CONTROL,
 			SURFACE_FLIP_TYPE, flip_immediate);
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index d8fdf7e76ded..919323257edb 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -30,12 +30,13 @@
 #include "dmub_cmd_dal.h"
 #include "dmub_cmd_vbios.h"
 #include "atomfirmware.h"
-
+#include "dc_hw_types.h"
 #define DMUB_RB_CMD_SIZE 64
 #define DMUB_RB_MAX_ENTRY 128
 #define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY)
 #define REG_SET_MASK 0xFFFF
 
+
 /*
  * Command IDs should be treated as stable ABI.
  * Do not reuse or modify IDs.
@@ -47,6 +48,7 @@ enum dmub_cmd_type {
 	DMUB_CMD__REG_SEQ_FIELD_UPDATE_SEQ = 2,
 	DMUB_CMD__REG_SEQ_BURST_WRITE = 3,
 	DMUB_CMD__REG_REG_WAIT = 4,
+	DMUB_CMD__SURFACE_FLIP = 5,
 	DMUB_CMD__PSR = 64,
 	DMUB_CMD__VBIOS = 128,
 };
@@ -145,6 +147,37 @@ struct dmub_rb_cmd_reg_wait {
 	struct dmub_cmd_reg_wait_data reg_wait;
 };
 
+#ifndef PHYSICAL_ADDRESS_LOC
+#define PHYSICAL_ADDRESS_LOC union large_integer
+#endif
+
+struct dmub_cmd_surface_flip {
+	uint32_t DCSURF_SURFACE_CONTROL;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C;
+	uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C;
+	uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C;
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS;
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH;
+	uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS;
+	enum dc_plane_addr_type addr_type;
+	uint8_t hubp_inst;
+	bool tmz_surface;
+	bool immediate;
+	uint8_t vmid;
+	bool grph_stereo;
+};
+
+struct dmub_rb_cmd_flip {
+	struct dmub_cmd_header header;
+	struct dmub_cmd_surface_flip flip;
+};
+
 struct dmub_cmd_digx_encoder_control_data {
 	union dig_encoder_control_parameters_v1_5 dig;
 };
@@ -262,6 +295,7 @@ union dmub_rb_cmd {
 	struct dmub_rb_cmd_psr_enable psr_enable;
 	struct dmub_rb_cmd_psr_copy_settings psr_copy_settings;
 	struct dmub_rb_cmd_psr_set_level psr_set_level;
+	struct dmub_rb_cmd_flip surface_flip;
 };
 
 #pragma pack(pop)
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
index ade688fd32f0..df875fdd2ab0 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h
@@ -73,12 +73,17 @@ static inline bool dmub_rb_full(struct dmub_rb *rb)
 static inline bool dmub_rb_push_front(struct dmub_rb *rb,
 				      const struct dmub_cmd_header *cmd)
 {
-	uint8_t *wt_ptr = (uint8_t *)(rb->base_address) + rb->wrpt;
+	uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t);
+	const uint64_t *src = (const uint64_t *)cmd;
+	int i;
 
 	if (dmub_rb_full(rb))
 		return false;
 
-	dmub_memcpy(wt_ptr, cmd, DMUB_RB_CMD_SIZE);
+	// copying data
+	for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+		*dst++ = *src++;
+
 	rb->wrpt += DMUB_RB_CMD_SIZE;
 
 	if (rb->wrpt >= rb->capacity)
@@ -115,14 +120,17 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb)
 
 static inline void dmub_rb_flush_pending(const struct dmub_rb *rb)
 {
-	uint8_t buf[DMUB_RB_CMD_SIZE];
 	uint32_t rptr = rb->rptr;
 	uint32_t wptr = rb->wrpt;
 
 	while (rptr != wptr) {
-		const uint8_t *data = (const uint8_t *)rb->base_address + rptr;
+		uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t);
+		//uint64_t volatile *p = (uint64_t volatile *)data;
+		uint64_t temp;
+		int i;
 
-		dmub_memcpy(buf, data, DMUB_RB_CMD_SIZE);
+		for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+			temp = *data++;
 
 		rptr += DMUB_RB_CMD_SIZE;
 		if (rptr >= rb->capacity)
-- 
2.28.0