Blob Blame History Raw
From a2692b6c0ac444eddb6411e9e3ff9e50c26229fb Mon Sep 17 00:00:00 2001
From: Krunoslav Kovac <krunoslav.kovac@amd.com>
Date: Tue, 8 Aug 2023 14:33:42 -0400
Subject: drm/amd/display: PQ tail accuracy
Git-commit: 4d5fd3d08ea9926fb2031ff7cfb4d72b7c950301
Patch-mainline: v6.7-rc1
References: jsc#PED-3527 jsc#PED-5475 jsc#PED-6068 jsc#PED-6070 jsc#PED-6116 jsc#PED-6120 jsc#PED-5065 jsc#PED-5477 jsc#PED-5511 jsc#PED-6041 jsc#PED-6069 jsc#PED-6071

[WHY & HOW]
HW LUTs changed slightly in DCN3: 256 base+slope pairs were replaced by
257 bases. Code was still calculating all 256 base+slope and then
creating 257th pt as last base + last slope.
This was done in wrong format, and then "fixed" it by making the last
two points the same thus making the last slope=0.
However, this also created some precision problems near the end that
are not visible but they do show up with capture cards.

Solution is to calculate 257 and remove deltas since we no longer have
those HW registers.

Reviewed-by: Anthony Koo <anthony.koo@amd.com>
Acked-by: Wayne Lin <wayne.lin@amd.com>
Signed-off-by: Krunoslav Kovac <krunoslav.kovac@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 .../amd/display/dc/dcn30/dcn30_cm_common.c    | 104 ++++++------------
 .../gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c  |  17 ---
 2 files changed, 33 insertions(+), 88 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
index e0df9b0065f9..ddb344056d40 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
@@ -114,7 +114,6 @@ bool cm3_helper_translate_curve_to_hw_format(
 	struct pwl_result_data *rgb;
 	struct pwl_result_data *rgb_plus_1;
 	struct pwl_result_data *rgb_minus_1;
-	struct fixed31_32 end_value;
 
 	int32_t region_start, region_end;
 	int32_t i;
@@ -176,7 +175,7 @@ bool cm3_helper_translate_curve_to_hw_format(
 				NUMBER_SW_SEGMENTS;
 		for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
 				i += increment) {
-			if (j == hw_points - 1)
+			if (j == hw_points)
 				break;
 			rgb_resulted[j].red = output_tf->tf_pts.red[i];
 			rgb_resulted[j].green = output_tf->tf_pts.green[i];
@@ -187,13 +186,13 @@ bool cm3_helper_translate_curve_to_hw_format(
 
 	/* last point */
 	start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
-	rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
-	rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
-	rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
+	rgb_resulted[hw_points].red = output_tf->tf_pts.red[start_index];
+	rgb_resulted[hw_points].green = output_tf->tf_pts.green[start_index];
+	rgb_resulted[hw_points].blue = output_tf->tf_pts.blue[start_index];
 
-	rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red;
-	rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
-	rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue;
+	rgb_resulted[hw_points+1].red = rgb_resulted[hw_points].red;
+	rgb_resulted[hw_points+1].green = rgb_resulted[hw_points].green;
+	rgb_resulted[hw_points+1].blue = rgb_resulted[hw_points].blue;
 
 	// All 3 color channels have same x
 	corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
@@ -220,34 +219,16 @@ bool cm3_helper_translate_curve_to_hw_format(
 	/* see comment above, m_arrPoints[1].y should be the Y value for the
 	 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
 	 */
-	corner_points[1].red.y = rgb_resulted[hw_points - 1].red;
-	corner_points[1].green.y = rgb_resulted[hw_points - 1].green;
-	corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue;
+	corner_points[1].red.y = rgb_resulted[hw_points].red;
+	corner_points[1].green.y = rgb_resulted[hw_points].green;
+	corner_points[1].blue.y = rgb_resulted[hw_points].blue;
 	corner_points[1].red.slope = dc_fixpt_zero;
 	corner_points[1].green.slope = dc_fixpt_zero;
 	corner_points[1].blue.slope = dc_fixpt_zero;
 
-	if (output_tf->tf == TRANSFER_FUNCTION_PQ || output_tf->tf == TRANSFER_FUNCTION_HLG) {
-		/* for PQ/HLG, we want to have a straight line from last HW X point,
-		 * and the slope to be such that we hit 1.0 at 10000/1000 nits.
-		 */
-
-		if (output_tf->tf == TRANSFER_FUNCTION_PQ)
-			end_value = dc_fixpt_from_int(125);
-		else
-			end_value = dc_fixpt_from_fraction(125, 10);
-
-		corner_points[1].red.slope = dc_fixpt_div(
-			dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y),
-			dc_fixpt_sub(end_value, corner_points[1].red.x));
-		corner_points[1].green.slope = dc_fixpt_div(
-			dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y),
-			dc_fixpt_sub(end_value, corner_points[1].green.x));
-		corner_points[1].blue.slope = dc_fixpt_div(
-			dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y),
-			dc_fixpt_sub(end_value, corner_points[1].blue.x));
-	}
-	lut_params->hw_points_num = hw_points;
+	// DCN3+ have 257 pts in lieu of no separate slope registers
+	// Prior HW had 256 base+slope pairs
+	lut_params->hw_points_num = hw_points + 1;
 
 	k = 0;
 	for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
@@ -267,38 +248,37 @@ bool cm3_helper_translate_curve_to_hw_format(
 	rgb_plus_1 = rgb_resulted + 1;
 	rgb_minus_1 = rgb;
 
-	i = 1;
-	while (i != hw_points + 1) {
-		if (i >= hw_points - 1) {
-			if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
-				rgb_plus_1->red = dc_fixpt_add(rgb->red, rgb_minus_1->delta_red);
-			if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
-				rgb_plus_1->green = dc_fixpt_add(rgb->green, rgb_minus_1->delta_green);
-			if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
-				rgb_plus_1->blue = dc_fixpt_add(rgb->blue, rgb_minus_1->delta_blue);
-		}
-
-		rgb->delta_red   = dc_fixpt_sub(rgb_plus_1->red,   rgb->red);
-		rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
-		rgb->delta_blue  = dc_fixpt_sub(rgb_plus_1->blue,  rgb->blue);
+	if (fixpoint == true) {
+		i = 1;
+		while (i != hw_points + 2) {
+			if (i >= hw_points) {
+				if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
+					rgb_plus_1->red = dc_fixpt_add(rgb->red,
+							rgb_minus_1->delta_red);
+				if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
+					rgb_plus_1->green = dc_fixpt_add(rgb->green,
+							rgb_minus_1->delta_green);
+				if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
+					rgb_plus_1->blue = dc_fixpt_add(rgb->blue,
+							rgb_minus_1->delta_blue);
+			}
 
-		if (fixpoint == true) {
 			rgb->delta_red_reg   = dc_fixpt_clamp_u0d10(rgb->delta_red);
 			rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green);
 			rgb->delta_blue_reg  = dc_fixpt_clamp_u0d10(rgb->delta_blue);
 			rgb->red_reg         = dc_fixpt_clamp_u0d14(rgb->red);
 			rgb->green_reg       = dc_fixpt_clamp_u0d14(rgb->green);
 			rgb->blue_reg        = dc_fixpt_clamp_u0d14(rgb->blue);
-		}
 
-		++rgb_plus_1;
-		rgb_minus_1 = rgb;
-		++rgb;
-		++i;
+			++rgb_plus_1;
+			rgb_minus_1 = rgb;
+			++rgb;
+			++i;
+		}
 	}
 	cm3_helper_convert_to_custom_float(rgb_resulted,
 						lut_params->corner_points,
-						hw_points, fixpoint);
+						hw_points+1, fixpoint);
 
 	return true;
 }
@@ -603,24 +583,6 @@ bool cm3_helper_convert_to_custom_float(
 			return false;
 		}
 
-		if (!convert_to_custom_float_format(rgb->delta_red, &fmt,
-						    &rgb->delta_red_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
-
-		if (!convert_to_custom_float_format(rgb->delta_green, &fmt,
-						    &rgb->delta_green_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
-
-		if (!convert_to_custom_float_format(rgb->delta_blue, &fmt,
-						    &rgb->delta_blue_reg)) {
-			BREAK_TO_DEBUGGER();
-			return false;
-		}
-
 		++rgb;
 		++i;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
index 6cf40c1332bc..d1500b223858 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
@@ -278,22 +278,10 @@ static void mpc3_program_ogam_pwl(
 {
 	uint32_t i;
 	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
-	uint32_t last_base_value_red = rgb[num-1].red_reg + rgb[num-1].delta_red_reg;
-	uint32_t last_base_value_green = rgb[num-1].green_reg + rgb[num-1].delta_green_reg;
-	uint32_t last_base_value_blue = rgb[num-1].blue_reg + rgb[num-1].delta_blue_reg;
-
-	/*the entries of DCN3AG gamma LUTs take 18bit base values as opposed to
-	 *38 base+delta values per entry in earlier DCN architectures
-	 *last base value for our lut is compute by adding the last base value
-	 *in our data + last delta
-	 */
 
 	if (is_rgb_equal(rgb,  num)) {
 		for (i = 0 ; i < num; i++)
 			REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].red_reg);
-
-		REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, last_base_value_red);
-
 	} else {
 
 		REG_UPDATE(MPCC_OGAM_LUT_CONTROL[mpcc_id],
@@ -302,8 +290,6 @@ static void mpc3_program_ogam_pwl(
 		for (i = 0 ; i < num; i++)
 			REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].red_reg);
 
-		REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, last_base_value_red);
-
 		REG_SET(MPCC_OGAM_LUT_INDEX[mpcc_id], 0, MPCC_OGAM_LUT_INDEX, 0);
 
 		REG_UPDATE(MPCC_OGAM_LUT_CONTROL[mpcc_id],
@@ -312,8 +298,6 @@ static void mpc3_program_ogam_pwl(
 		for (i = 0 ; i < num; i++)
 			REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].green_reg);
 
-		REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, last_base_value_green);
-
 		REG_SET(MPCC_OGAM_LUT_INDEX[mpcc_id], 0, MPCC_OGAM_LUT_INDEX, 0);
 
 		REG_UPDATE(MPCC_OGAM_LUT_CONTROL[mpcc_id],
@@ -322,7 +306,6 @@ static void mpc3_program_ogam_pwl(
 		for (i = 0 ; i < num; i++)
 			REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].blue_reg);
 
-		REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, last_base_value_blue);
 	}
 
 }
-- 
2.43.0