From: Ben Skeggs Date: Tue, 8 May 2018 20:39:47 +1000 Subject: drm/nouveau/gr/gp102-: setup stencil zbc Git-commit: 4b2c71edf0d7832ef4d2fe5b17402d1130b415dc Patch-mainline: v4.18-rc1 References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166 Signed-off-by: Ben Skeggs Acked-by: Petr Tesarik --- drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h | 3 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 12 ++-- drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 12 ++++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 12 ++++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c | 2 drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c | 58 +++++++++++++++++++++- drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c | 2 drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c | 2 drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c | 2 drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild | 1 drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c | 10 +++ drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c | 6 +- drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c | 51 +++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h | 5 + 14 files changed, 162 insertions(+), 16 deletions(-) --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h @@ -20,12 +20,14 @@ struct nvkm_ltc { int zbc_max; u32 zbc_color[NVKM_LTC_MAX_ZBC_CNT][4]; u32 zbc_depth[NVKM_LTC_MAX_ZBC_CNT]; + u32 zbc_stencil[NVKM_LTC_MAX_ZBC_CNT]; }; void nvkm_ltc_tags_clear(struct nvkm_device *, u32 first, u32 count); int nvkm_ltc_zbc_color_get(struct nvkm_ltc *, int index, const u32[4]); int nvkm_ltc_zbc_depth_get(struct nvkm_ltc *, int index, const u32); +int nvkm_ltc_zbc_stencil_get(struct nvkm_ltc *, int index, const u32); void nvkm_ltc_invalidate(struct nvkm_ltc *); void nvkm_ltc_flush(struct nvkm_ltc *); @@ -36,4 +38,5 @@ int gk20a_ltc_new(struct nvkm_device *, int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); +int gp102_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); #endif --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2204,7 +2204,7 @@ nv132_chipset = { .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2240,7 +2240,7 @@ nv134_chipset = { .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2276,7 +2276,7 @@ nv136_chipset = { .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2312,7 +2312,7 @@ nv137_chipset = { .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2348,7 +2348,7 @@ nv138_chipset = { .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2380,7 +2380,7 @@ nv13b_chipset = { .fuse = gm107_fuse_new, .ibus = gp10b_ibus_new, .imem = gk20a_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp10b_mc_new, .mmu = gp10b_mmu_new, .secboot = gp10b_secboot_new, --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -750,7 +750,7 @@ gf100_gr_zbc_init(struct gf100_gr *gr) const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; - int index, c = ltc->zbc_min, d = ltc->zbc_min; + int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min; if (!gr->zbc_color[0].format) { gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]); c++; @@ -759,12 +759,22 @@ gf100_gr_zbc_init(struct gf100_gr *gr) gf100_gr_zbc_color_get(gr, 4, &f32_1[0], &f32_1[4]); c++; gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000); d++; gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000); d++; + if (gr->func->zbc->stencil_get) { + gr->func->zbc->stencil_get(gr, 1, 0x00, 0x00); s++; + gr->func->zbc->stencil_get(gr, 1, 0x01, 0x01); s++; + gr->func->zbc->stencil_get(gr, 1, 0xff, 0xff); s++; + } } for (index = c; index <= ltc->zbc_max; index++) gr->func->zbc->clear_color(gr, index); for (index = d; index <= ltc->zbc_max; index++) gr->func->zbc->clear_depth(gr, index); + + if (gr->func->zbc->clear_stencil) { + for (index = s; index <= ltc->zbc_max; index++) + gr->func->zbc->clear_stencil(gr, index); + } } /** --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -72,6 +72,12 @@ struct gf100_gr_zbc_depth { u32 l2; }; +struct gf100_gr_zbc_stencil { + u32 format; + u32 ds; + u32 l2; +}; + struct gf100_gr { const struct gf100_gr_func *func; struct nvkm_gr base; @@ -95,6 +101,7 @@ struct gf100_gr { struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT]; struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT]; + struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_CNT]; u8 rop_nr; u8 gpc_nr; @@ -132,6 +139,9 @@ void *gf100_gr_dtor(struct nvkm_gr *); struct gf100_gr_func_zbc { void (*clear_color)(struct gf100_gr *, int zbc); void (*clear_depth)(struct gf100_gr *, int zbc); + int (*stencil_get)(struct gf100_gr *, int format, + const u32 ds, const u32 l2); + void (*clear_stencil)(struct gf100_gr *, int zbc); }; struct gf100_gr_func { @@ -219,11 +229,11 @@ void gm200_gr_init_ds_hww_esr_2(struct g void gp100_gr_init_rop_active_fbps(struct gf100_gr *); void gp100_gr_init_fecs_exceptions(struct gf100_gr *); void gp100_gr_init_shader_exceptions(struct gf100_gr *, int, int); -extern const struct gf100_gr_func_zbc gp100_gr_zbc; void gp100_gr_zbc_clear_color(struct gf100_gr *, int); void gp100_gr_zbc_clear_depth(struct gf100_gr *, int); void gp102_gr_init_swdx_pes_mask(struct gf100_gr *); +extern const struct gf100_gr_func_zbc gp102_gr_zbc; #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object) #include --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c @@ -62,7 +62,7 @@ gp100_gr_zbc_clear_depth(struct gf100_gr gr->zbc_depth[zbc].format << ((znum % 4) * 7)); } -const struct gf100_gr_func_zbc +static const struct gf100_gr_func_zbc gp100_gr_zbc = { .clear_color = gp100_gr_zbc_clear_color, .clear_depth = gp100_gr_zbc_clear_depth, --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c @@ -26,6 +26,62 @@ #include +static void +gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const int znum = zbc - 1; + const u32 zoff = znum * 4; + + if (gr->zbc_stencil[zbc].format) + nvkm_wr32(device, 0x41815c + zoff, gr->zbc_stencil[zbc].ds); + nvkm_mask(device, 0x418198 + ((znum / 4) * 4), + 0x0000007f << ((znum % 4) * 7), + gr->zbc_stencil[zbc].format << ((znum % 4) * 7)); +} + +static int +gp102_gr_zbc_stencil_get(struct gf100_gr *gr, int format, + const u32 ds, const u32 l2) +{ + struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; + int zbc = -ENOSPC, i; + + for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { + if (gr->zbc_stencil[i].format) { + if (gr->zbc_stencil[i].format != format) + continue; + if (gr->zbc_stencil[i].ds != ds) + continue; + if (gr->zbc_stencil[i].l2 != l2) { + WARN_ON(1); + return -EINVAL; + } + return i; + } else { + zbc = (zbc < 0) ? i : zbc; + } + } + + if (zbc < 0) + return zbc; + + gr->zbc_stencil[zbc].format = format; + gr->zbc_stencil[zbc].ds = ds; + gr->zbc_stencil[zbc].l2 = l2; + nvkm_ltc_zbc_stencil_get(ltc, zbc, l2); + gr->func->zbc->clear_stencil(gr, zbc); + return zbc; +} + +const struct gf100_gr_func_zbc +gp102_gr_zbc = { + .clear_color = gp100_gr_zbc_clear_color, + .clear_depth = gp100_gr_zbc_clear_depth, + .stencil_get = gp102_gr_zbc_stencil_get, + .clear_stencil = gp102_gr_zbc_clear_stencil, +}; + void gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr) { @@ -65,7 +121,7 @@ gp102_gr = { .tpc_nr = 5, .ppc_nr = 3, .grctx = &gp102_grctx, - .zbc = &gp100_gr_zbc, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c @@ -49,7 +49,7 @@ gp104_gr = { .tpc_nr = 5, .ppc_nr = 3, .grctx = &gp104_grctx, - .zbc = &gp100_gr_zbc, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c @@ -51,7 +51,7 @@ gp107_gr = { .tpc_nr = 3, .ppc_nr = 1, .grctx = &gp107_grctx, - .zbc = &gp100_gr_zbc, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c @@ -49,7 +49,7 @@ gp10b_gr = { .tpc_nr = 2, .ppc_nr = 1, .grctx = &gp102_grctx, - .zbc = &gp100_gr_zbc, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild @@ -4,3 +4,4 @@ nvkm-y += nvkm/subdev/ltc/gk104.o nvkm-y += nvkm/subdev/ltc/gm107.o nvkm-y += nvkm/subdev/ltc/gm200.o nvkm-y += nvkm/subdev/ltc/gp100.o +nvkm-y += nvkm/subdev/ltc/gp102.o --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c @@ -55,6 +55,14 @@ nvkm_ltc_zbc_depth_get(struct nvkm_ltc * return index; } +int +nvkm_ltc_zbc_stencil_get(struct nvkm_ltc *ltc, int index, const u32 stencil) +{ + ltc->zbc_stencil[index] = stencil; + ltc->func->zbc_clear_stencil(ltc, index, stencil); + return index; +} + void nvkm_ltc_invalidate(struct nvkm_ltc *ltc) { @@ -92,6 +100,8 @@ nvkm_ltc_init(struct nvkm_subdev *subdev for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { ltc->func->zbc_clear_color(ltc, i, ltc->zbc_color[i]); ltc->func->zbc_clear_depth(ltc, i, ltc->zbc_depth[i]); + if (ltc->func->zbc_clear_stencil) + ltc->func->zbc_clear_stencil(ltc, i, ltc->zbc_stencil[i]); } ltc->func->init(ltc); --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c @@ -23,7 +23,7 @@ */ #include "priv.h" -static void +void gp100_ltc_intr(struct nvkm_ltc *ltc) { struct nvkm_device *device = ltc->subdev.device; @@ -38,7 +38,7 @@ gp100_ltc_intr(struct nvkm_ltc *ltc) } } -static int +int gp100_ltc_oneinit(struct nvkm_ltc *ltc) { struct nvkm_device *device = ltc->subdev.device; @@ -48,7 +48,7 @@ gp100_ltc_oneinit(struct nvkm_ltc *ltc) return 0; } -static void +void gp100_ltc_init(struct nvkm_ltc *ltc) { /*XXX: PMU LS call to setup tagram address */ --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c @@ -0,0 +1,51 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +void +gp102_ltc_zbc_clear_stencil(struct nvkm_ltc *ltc, int i, const u32 stencil) +{ + struct nvkm_device *device = ltc->subdev.device; + nvkm_mask(device, 0x17e338, 0x0000000f, i); + nvkm_wr32(device, 0x17e204, stencil); +} + +static const struct nvkm_ltc_func +gp102_ltc = { + .oneinit = gp100_ltc_oneinit, + .init = gp100_ltc_init, + .intr = gp100_ltc_intr, + .cbc_clear = gm107_ltc_cbc_clear, + .cbc_wait = gm107_ltc_cbc_wait, + .zbc = 16, + .zbc_clear_color = gm107_ltc_zbc_clear_color, + .zbc_clear_depth = gm107_ltc_zbc_clear_depth, + .zbc_clear_stencil = gp102_ltc_zbc_clear_stencil, + .invalidate = gf100_ltc_invalidate, + .flush = gf100_ltc_flush, +}; + +int +gp102_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc) +{ + return nvkm_ltc_new_(&gp102_ltc, device, index, pltc); +} --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h @@ -18,6 +18,7 @@ struct nvkm_ltc_func { int zbc; void (*zbc_clear_color)(struct nvkm_ltc *, int, const u32[4]); void (*zbc_clear_depth)(struct nvkm_ltc *, int, const u32); + void (*zbc_clear_stencil)(struct nvkm_ltc *, int, const u32); void (*invalidate)(struct nvkm_ltc *); void (*flush)(struct nvkm_ltc *); @@ -40,4 +41,8 @@ void gm107_ltc_cbc_clear(struct nvkm_ltc void gm107_ltc_cbc_wait(struct nvkm_ltc *); void gm107_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]); void gm107_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32); + +int gp100_ltc_oneinit(struct nvkm_ltc *); +void gp100_ltc_init(struct nvkm_ltc *); +void gp100_ltc_intr(struct nvkm_ltc *); #endif