From 7e452fdbfca85cd279ecb0d8e9ab6fdd1e8c97fc Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Thu, 17 Aug 2017 19:15:23 +0530 Subject: [PATCH] drm/i915/skl+: Optimize WM calculation Git-commit: 7e452fdbfca85cd279ecb0d8e9ab6fdd1e8c97fc Patch-mainline: v4.15-rc1 References: FATE#322643 bsc#1055900 Plane configuration parameters doesn't change for each WM-level calculation. Currently we compute same parameters 8 times for each wm-level. This patch optimizes it by calculating these parameters in beginning & reuse during each level-wm calculation. Changes since V1: - rebase on top of Rodrigo's series for CNL Signed-off-by: Mahesh Kumar Acked-by: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170817134529.2839-3-mahesh1.kumar@intel.com Acked-by: Takashi Iwai --- drivers/gpu/drm/i915/i915_drv.h | 14 ++ drivers/gpu/drm/i915/intel_pm.c | 190 ++++++++++++++++++++++------------------ 2 files changed, 119 insertions(+), 85 deletions(-) --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1836,6 +1836,20 @@ struct skl_wm_level { uint8_t plane_res_l; }; +/* Stores plane specific WM parameters */ +struct skl_wm_params { + bool x_tiled, y_tiled; + bool rc_surface; + uint32_t width; + uint8_t cpp; + uint32_t plane_pixel_rate; + uint32_t y_min_scanlines; + uint32_t plane_bytes_per_line; + uint_fixed_16_16_t plane_blocks_per_line; + uint_fixed_16_16_t y_tile_minimum; + uint32_t linetime_us; +}; + /* * This struct helps tracking the state needed for runtime PM, which puts the * device in PCI D3 state. Notice that when this happens, nothing on the --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4367,134 +4367,146 @@ skl_adjusted_plane_pixel_rate(const stru downscale_amount); } -static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, - struct intel_crtc_state *cstate, - const struct intel_plane_state *intel_pstate, - uint16_t ddb_allocation, - int level, - uint16_t *out_blocks, /* out */ - uint8_t *out_lines, /* out */ - bool *enabled /* out */) +static int +skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, + struct intel_crtc_state *cstate, + const struct intel_plane_state *intel_pstate, + struct skl_wm_params *wp) { struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); const struct drm_plane_state *pstate = &intel_pstate->base; const struct drm_framebuffer *fb = pstate->fb; - uint32_t latency = dev_priv->wm.skl_latency[level]; - uint_fixed_16_16_t method1, method2; - uint_fixed_16_16_t plane_blocks_per_line; - uint_fixed_16_16_t selected_result; uint32_t interm_pbpl; - uint32_t plane_bytes_per_line; - uint32_t res_blocks, res_lines; - uint8_t cpp; - uint32_t width = 0; - uint32_t plane_pixel_rate; - uint_fixed_16_16_t y_tile_minimum; - uint32_t y_min_scanlines; struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); - bool y_tiled, x_tiled; - if (latency == 0 || - !intel_wm_plane_visible(cstate, intel_pstate)) { - *enabled = false; + if (!intel_wm_plane_visible(cstate, intel_pstate)) return 0; - } - - y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || - fb->modifier == I915_FORMAT_MOD_Yf_TILED || - fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || - fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; - x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; - - /* Display WA #1141: kbl,cfl */ - if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && - dev_priv->ipc_enabled) - latency += 4; - if (apply_memory_bw_wa && x_tiled) - latency += 15; + wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || + fb->modifier == I915_FORMAT_MOD_Yf_TILED || + fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; + wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; + wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; if (plane->id == PLANE_CURSOR) { - width = intel_pstate->base.crtc_w; + wp->width = intel_pstate->base.crtc_w; } else { /* * Src coordinates are already rotated by 270 degrees for * the 90/270 degree plane rotation cases (to match the * GTT mapping), hence no need to account for rotation here. */ - width = drm_rect_width(&intel_pstate->base.src) >> 16; + wp->width = drm_rect_width(&intel_pstate->base.src) >> 16; } - cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : - fb->format->cpp[0]; - plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); + wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : + fb->format->cpp[0]; + wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, + intel_pstate); if (drm_rotation_90_or_270(pstate->rotation)) { - switch (cpp) { + switch (wp->cpp) { case 1: - y_min_scanlines = 16; + wp->y_min_scanlines = 16; break; case 2: - y_min_scanlines = 8; + wp->y_min_scanlines = 8; break; case 4: - y_min_scanlines = 4; + wp->y_min_scanlines = 4; break; default: - MISSING_CASE(cpp); + MISSING_CASE(wp->cpp); return -EINVAL; } } else { - y_min_scanlines = 4; + wp->y_min_scanlines = 4; } if (apply_memory_bw_wa) - y_min_scanlines *= 2; + wp->y_min_scanlines *= 2; - plane_bytes_per_line = width * cpp; - if (y_tiled) { - interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * - y_min_scanlines, 512); + wp->plane_bytes_per_line = wp->width * wp->cpp; + if (wp->y_tiled) { + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line * + wp->y_min_scanlines, 512); if (INTEL_GEN(dev_priv) >= 10) interm_pbpl++; - plane_blocks_per_line = div_fixed16(interm_pbpl, - y_min_scanlines); - } else if (x_tiled && INTEL_GEN(dev_priv) == 9) { - interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); - plane_blocks_per_line = u32_to_fixed16(interm_pbpl); + wp->plane_blocks_per_line = div_fixed16(interm_pbpl, + wp->y_min_scanlines); + } else if (wp->x_tiled && IS_GEN9(dev_priv)) { + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512); + wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { - interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; - plane_blocks_per_line = u32_to_fixed16(interm_pbpl); + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1; + wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); + } + + wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines, + wp->plane_blocks_per_line); + wp->linetime_us = fixed16_to_u32_round_up( + intel_get_linetime_us(cstate)); + + return 0; +} + +static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, + struct intel_crtc_state *cstate, + const struct intel_plane_state *intel_pstate, + uint16_t ddb_allocation, + int level, + const struct skl_wm_params *wp, + uint16_t *out_blocks, /* out */ + uint8_t *out_lines, /* out */ + bool *enabled /* out */) +{ + const struct drm_plane_state *pstate = &intel_pstate->base; + uint32_t latency = dev_priv->wm.skl_latency[level]; + uint_fixed_16_16_t method1, method2; + uint_fixed_16_16_t selected_result; + uint32_t res_blocks, res_lines; + struct intel_atomic_state *state = + to_intel_atomic_state(cstate->base.state); + bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + + if (latency == 0 || + !intel_wm_plane_visible(cstate, intel_pstate)) { + *enabled = false; + return 0; } - method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency); - method2 = skl_wm_method2(plane_pixel_rate, + /* Display WA #1141: kbl,cfl */ + if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && + dev_priv->ipc_enabled) + latency += 4; + + if (apply_memory_bw_wa && wp->x_tiled) + latency += 15; + + method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, + wp->cpp, latency); + method2 = skl_wm_method2(wp->plane_pixel_rate, cstate->base.adjusted_mode.crtc_htotal, latency, - plane_blocks_per_line); - - y_tile_minimum = mul_u32_fixed16(y_min_scanlines, - plane_blocks_per_line); + wp->plane_blocks_per_line); - if (y_tiled) { - selected_result = max_fixed16(method2, y_tile_minimum); + if (wp->y_tiled) { + selected_result = max_fixed16(method2, wp->y_tile_minimum); } else { - uint32_t linetime_us; - - linetime_us = fixed16_to_u32_round_up( - intel_get_linetime_us(cstate)); - if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && - (plane_bytes_per_line / 512 < 1)) + if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / + 512 < 1) && (wp->plane_bytes_per_line / 512 < 1)) selected_result = method2; else if (ddb_allocation >= - fixed16_to_u32_round_up(plane_blocks_per_line)) + fixed16_to_u32_round_up(wp->plane_blocks_per_line)) selected_result = min_fixed16(method1, method2); - else if (latency >= linetime_us) + else if (latency >= wp->linetime_us) selected_result = min_fixed16(method1, method2); else selected_result = method1; @@ -4502,19 +4514,18 @@ static int skl_compute_plane_wm(const st res_blocks = fixed16_to_u32_round_up(selected_result) + 1; res_lines = div_round_up_fixed16(selected_result, - plane_blocks_per_line); + wp->plane_blocks_per_line); /* Display WA #1125: skl,bxt,kbl,glk */ - if (level == 0 && - (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || - fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS)) - res_blocks += fixed16_to_u32_round_up(y_tile_minimum); + if (level == 0 && wp->rc_surface) + res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); /* Display WA #1126: skl,bxt,kbl,glk */ if (level >= 1 && level <= 7) { - if (y_tiled) { - res_blocks += fixed16_to_u32_round_up(y_tile_minimum); - res_lines += y_min_scanlines; + if (wp->y_tiled) { + res_blocks += fixed16_to_u32_round_up( + wp->y_tile_minimum); + res_lines += wp->y_min_scanlines; } else { res_blocks++; } @@ -4552,6 +4563,7 @@ skl_compute_wm_levels(const struct drm_i struct skl_ddb_allocation *ddb, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, + const struct skl_wm_params *wm_params, struct skl_plane_wm *wm) { struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); @@ -4575,6 +4587,7 @@ skl_compute_wm_levels(const struct drm_i intel_pstate, ddb_blocks, level, + wm_params, &result->plane_res_b, &result->plane_res_l, &result->plane_en); @@ -4639,11 +4652,18 @@ static int skl_build_pipe_wm(struct inte const struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); enum plane_id plane_id = to_intel_plane(plane)->id; + struct skl_wm_params wm_params; wm = &pipe_wm->planes[plane_id]; + memset(&wm_params, 0, sizeof(struct skl_wm_params)); + + ret = skl_compute_plane_wm_params(dev_priv, cstate, + intel_pstate, &wm_params); + if (ret) + return ret; ret = skl_compute_wm_levels(dev_priv, ddb, cstate, - intel_pstate, wm); + intel_pstate, &wm_params, wm); if (ret) return ret; skl_compute_transition_wm(cstate, &wm->trans_wm);