Blob Blame History Raw
From 676d756c6d35401fb1aa2828ca423d82fa6004d1 Mon Sep 17 00:00:00 2001
From: Jun Lei <jun.lei@amd.com>
Date: Fri, 24 Jun 2022 16:28:50 -0400
Subject: drm/amd/display: Update DML logic for unbounded req handling
Git-commit: f4b4e41a2e05270cd90c5817ab514ace95555874
Patch-mainline: v6.0-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225 jsc#PED-2849

[why]
Unbounded request logic in resource/DML has some issues where unbounded
request is being enabled incorrectly. SW today enables unbounded request
unconditionally in hardware, on the assumption that HW can always
support it in single pipe scenarios.

This worked until now because the same assumption is made in DML. A new
DML update is needed to fix a bug, where there are single pipe scenarios
where unbounded cannot be enabled, and this change in DML needs to be
ported in, and dcn32 resource logic fixed.

[how]
First, dcn32_resource should program unbounded req in HW according to
unbounded req enablement output from DML, as opposed to DML input.

Second, port in DML update which disables unbounded req in some
scenarios to fix an issue with poor stutter performance

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Jun Lei <jun.lei@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 .../drm/amd/display/dc/dcn32/dcn32_resource.c | 11 +++-
 .../dc/dml/dcn32/display_mode_vba_32.c        | 44 +++++++++++++---
 .../dc/dml/dcn32/display_mode_vba_util_32.c   | 51 ++++++++++++++++---
 .../dc/dml/dcn32/display_mode_vba_util_32.h   | 10 +++-
 .../drm/amd/display/dc/dml/display_mode_vba.c |  1 +
 5 files changed, 103 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 32da47e24839..39214a0dcdf2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -3322,6 +3322,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display
 {
 	int i, pipe_idx;
 	bool usr_retraining_support = false;
+	bool unbounded_req_enabled = false;
 
 	/* Writeback MCIF_WB arbitration parameters */
 	dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
@@ -3357,6 +3358,14 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display
 	if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
 		context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
 
+	unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+	if (unbounded_req_enabled && pipe_cnt > 1) {
+		// Unbounded requesting should not ever be used when more than 1 pipe is enabled.
+		ASSERT(false);
+		unbounded_req_enabled = false;
+	}
+
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		if (!context->res_ctx.pipe_ctx[i].stream)
 			continue;
@@ -3375,7 +3384,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display
 		} else {
 			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt,
 							pipe_idx);
-			context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode;
+			context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled;
 		}
 		if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
 			context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 1efce9f5eae3..e9204c711cb9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -224,6 +224,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 				mode_lib->vba.NumberOfActiveSurfaces,
 				mode_lib->vba.nomDETInKByte,
 				mode_lib->vba.UseUnboundedRequesting,
+				mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+				mode_lib->vba.ip.pixel_chunk_size_kbytes,
+				mode_lib->vba.ip.rob_buffer_size_kbytes,
 				mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
 				v->dummy_vars
 					.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
@@ -285,6 +288,10 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 				mode_lib->vba.DETBufferSizeC,
 				&v->UnboundedRequestEnabled,
 				&v->CompressedBufferSizeInkByte,
+				&v->CompBufReservedSpaceKBytes,
+				&v->dummy_vars
+					.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
+					.dummy_boolean,       /* bool *CompBufReservedSpaceNeedAjustment */
 				v->dummy_vars
 					.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
 					.dummy_boolean_array, /* bool ViewportSizeSupportPerSurface[] */
@@ -293,6 +300,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 					 .dummy_boolean); /* bool *ViewportSizeSupport */
 	}
 
+	v->CompBufReservedSpaceZs     = v->CompBufReservedSpaceKBytes * 1024.0 / 256.0;
+	v->CompBufReservedSpace64B    = v->CompBufReservedSpaceKBytes * 1024.0 / 64.0;
+
 	// DCFCLK Deep Sleep
 	dml32_CalculateDCFCLKDeepSleep(
 			mode_lib->vba.NumberOfActiveSurfaces,
@@ -1530,8 +1540,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 			v->TotalDataReadBandwidth,
 			mode_lib->vba.DCFCLK,
 			mode_lib->vba.ReturnBW,
-			mode_lib->vba.CompbufReservedSpace64B,
-			mode_lib->vba.CompbufReservedSpaceZs,
+			v->CompbufReservedSpace64B,
+			v->CompbufReservedSpaceZs,
 			mode_lib->vba.SRExitTime,
 			mode_lib->vba.SRExitZ8Time,
 			mode_lib->vba.SynchronizeTimingsFinal,
@@ -1596,8 +1606,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 				v->TotalDataReadBandwidth,
 				mode_lib->vba.DCFCLK,
 				mode_lib->vba.ReturnBW,
-				0, //mode_lib->vba.CompbufReservedSpace64B,
-				0, //mode_lib->vba.CompbufReservedSpaceZs,
+				0, //CompbufReservedSpace64B,
+				0, //CompbufReservedSpaceZs,
 				mode_lib->vba.SRExitTime,
 				mode_lib->vba.SRExitZ8Time,
 				mode_lib->vba.SynchronizeTimingsFinal,
@@ -1659,6 +1669,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
 {
 	unsigned int dummy_integer[4];
+	bool dummy_boolean[2];
 	bool MPCCombineMethodAsNeededForPStateChangeAndVoltage;
 	bool MPCCombineMethodAsPossible;
 	enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX];
@@ -1673,6 +1684,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 	bool SubViewportMALLPStateMethod;
 	bool PhantomPipeMALLPStateMethod;
 	unsigned int MaximumMPCCombine;
+	bool CompBufReservedSpaceNeedAdjustment;
+	bool CompBufReservedSpaceNeedAdjustmentSingleDPP;
 
 #ifdef __DML_VBA_DEBUG__
 	dml_print("DML::%s: called\n", __func__);
@@ -1905,6 +1918,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			mode_lib->vba.NumberOfActiveSurfaces,
 			mode_lib->vba.nomDETInKByte,
 			mode_lib->vba.UseUnboundedRequesting,
+			mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+			mode_lib->vba.ip.pixel_chunk_size_kbytes,
+			mode_lib->vba.ip.rob_buffer_size_kbytes,
 			mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
 			mode_lib->vba.Output,
 			mode_lib->vba.ReadBandwidthLuma,
@@ -1952,6 +1968,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 			v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], /* Long            DETBufferSizeC[]  */
 			&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0][0], /* bool           *UnboundedRequestEnabled  */
 			&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0][0], /* Long           *CompressedBufferSizeInkByte  */
+			&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1][0], /* Long           *CompBufReservedSpaceKBytes */
+			&CompBufReservedSpaceNeedAdjustmentSingleDPP,
 			mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */
 			&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool           *ViewportSizeSupport */
 
@@ -2120,9 +2138,18 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				}
 			}
 
+			// if TotalNumberOfActiveDPP is > 1, then there should be no unbounded req mode (hw limitation), the comp buf reserved adjustment is not needed regardless
+			// if TotalNumberOfActiveDPP is == 1, then will use the SingleDPP version of unbounded_req for the decision
+			CompBufReservedSpaceNeedAdjustment = (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > 1) ? 0 : CompBufReservedSpaceNeedAdjustmentSingleDPP;
+
+
+
 			if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting,
-							mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma,
-							mode_lib->vba.Output[0])) {
+					mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma,
+					mode_lib->vba.Output[0],
+					mode_lib->vba.SurfaceTiling[0],
+					CompBufReservedSpaceNeedAdjustment,
+					mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) {
 				while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP
 						|| mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) {
 					double BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
@@ -2500,6 +2527,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.NumberOfActiveSurfaces,
 					mode_lib->vba.nomDETInKByte,
 					mode_lib->vba.UseUnboundedRequesting,
+					mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+					mode_lib->vba.ip.pixel_chunk_size_kbytes,
+					mode_lib->vba.ip.rob_buffer_size_kbytes,
 					mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal,
 					mode_lib->vba.Output,
 					mode_lib->vba.ReadBandwidthLuma,
@@ -2546,6 +2576,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.DETBufferSizeCThisState,
 					&mode_lib->vba.UnboundedRequestEnabledThisState,
 					&mode_lib->vba.CompressedBufferSizeInkByteThisState,
+					&dummy_integer[0], /* Long CompBufReservedSpaceKBytes */
+					&dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */
 					v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0],
 					&mode_lib->vba.ViewportSizeSupport[i][j]);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 67cbc7923652..c8a3f367d622 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -400,6 +400,9 @@ void dml32_CalculateSwathAndDETConfiguration(
 		unsigned int NumberOfActiveSurfaces,
 		unsigned int nomDETInKByte,
 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+		unsigned int PixelChunkSizeKBytes,
+		unsigned int ROBSizeKBytes,
 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
 		enum output_encoder_class Output[],
 		double ReadBandwidthLuma[],
@@ -447,6 +450,8 @@ void dml32_CalculateSwathAndDETConfiguration(
 		unsigned int DETBufferSizeC[],
 		bool *UnboundedRequestEnabled,
 		unsigned int *CompressedBufferSizeInkByte,
+		unsigned int *CompBufReservedSpaceKBytes,
+		bool *CompBufReservedSpaceNeedAdjustment,
 		bool ViewportSizeSupportPerSurface[],
 		bool *ViewportSizeSupport)
 {
@@ -465,6 +470,8 @@ void dml32_CalculateSwathAndDETConfiguration(
 
 #ifdef __DML_VBA_DEBUG__
 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
+	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
+	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
 #endif
 	dml32_CalculateSwathWidth(ForceSingleDPP,
 			NumberOfActiveSurfaces,
@@ -534,8 +541,24 @@ void dml32_CalculateSwathAndDETConfiguration(
 		}
 	}
 
-	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP,
-			NoChromaSurfaces, Output[0]);
+	// By default, just set the reserved space to 2 pixel chunks size
+	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
+
+	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
+	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
+	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
+	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
+
+	if (*CompBufReservedSpaceNeedAdjustment == 1) {
+		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
+	}
+
+	#ifdef __DML_VBA_DEBUG__
+		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
+		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
+	#endif
+
+	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
 
 	dml32_CalculateDETBufferSize(DETSizeOverride,
 			UseMALLForPStateChange,
@@ -853,9 +876,12 @@ void dml32_CalculateSwathWidth(
 } // CalculateSwathWidth
 
 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
-		unsigned int TotalNumberOfActiveDPP,
-		bool NoChroma,
-		enum output_encoder_class Output)
+			unsigned int TotalNumberOfActiveDPP,
+			bool NoChroma,
+			enum output_encoder_class Output,
+			enum dm_swizzle_mode SurfaceTiling,
+			bool CompBufReservedSpaceNeedAdjustment,
+			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
 {
 	bool ret_val = false;
 
@@ -863,7 +889,20 @@ bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequest
 			TotalNumberOfActiveDPP == 1 && NoChroma);
 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
 		ret_val = false;
-	return ret_val;
+
+	if (SurfaceTiling == dm_sw_linear)
+		ret_val = false;
+
+	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
+		ret_val = false;
+
+#ifdef __DML_VBA_DEBUG__
+	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
+	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
+#endif
+
+	return (ret_val);
 }
 
 void dml32_CalculateDETBufferSize(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 72461b934ee0..d293856ba906 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -90,6 +90,9 @@ void dml32_CalculateSwathAndDETConfiguration(
 		unsigned int NumberOfActiveSurfaces,
 		unsigned int nomDETInKByte,
 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
+		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
+		unsigned int PixelChunkSizeKBytes,
+		unsigned int ROBSizeKBytes,
 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
 		enum output_encoder_class Output[],
 		double ReadBandwidthLuma[],
@@ -137,6 +140,8 @@ void dml32_CalculateSwathAndDETConfiguration(
 		unsigned int DETBufferSizeC[],
 		bool *UnboundedRequestEnabled,
 		unsigned int *CompressedBufferSizeInkByte,
+		unsigned int *CompBufReservedSpaceKBytes,
+		bool *CompBufReservedSpaceNeedAdjustment,
 		bool ViewportSizeSupportPerSurface[],
 		bool *ViewportSizeSupport);
 
@@ -181,7 +186,10 @@ void dml32_CalculateSwathWidth(
 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
 		unsigned int TotalNumberOfActiveDPP,
 		bool NoChroma,
-		enum output_encoder_class Output);
+		enum output_encoder_class Output,
+		enum dm_swizzle_mode SurfaceTiling,
+		bool CompBufReservedSpaceNeedAdjustment,
+		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
 
 void dml32_CalculateDETBufferSize(
 		unsigned int DETSizeOverride[],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index de78bb8489cb..503e7d984ff0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -111,6 +111,7 @@ dml_get_attr_func(tcalc, mode_lib->vba.TCalc);
 dml_get_attr_func(fraction_of_urgent_bandwidth, mode_lib->vba.FractionOfUrgentBandwidth);
 dml_get_attr_func(fraction_of_urgent_bandwidth_imm_flip, mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip);
 
+
 dml_get_attr_func(cstate_max_cap_mode, mode_lib->vba.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
 dml_get_attr_func(comp_buffer_size_kbytes, mode_lib->vba.CompressedBufferSizeInkByte);
 dml_get_attr_func(pixel_chunk_size_in_kbyte, mode_lib->vba.PixelChunkSizeInKByte);
-- 
2.38.1