From: Ben Skeggs Date: Tue, 8 May 2018 20:39:46 +1000 Subject: drm/nouveau/gr/gf100-: virtualise rop_mapping Git-commit: ff209c235de9c3437e131b39eb976ff4bcc4c516 Patch-mainline: v4.18-rc1 References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166 Signed-off-by: Ben Skeggs Acked-by: Petr Tesarik --- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c | 6 +- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 5 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c | 1 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c | 1 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c | 1 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c | 61 ++++++++++++++++++++- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c | 1 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c | 61 --------------------- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c | 1 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c | 1 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c | 1 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c | 2 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c | 2 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c | 2 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c | 2 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c | 2 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c | 1 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c | 1 18 files changed, 82 insertions(+), 70 deletions(-) --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -1105,7 +1105,7 @@ gf100_grctx_generate_r4060a8(struct gf10 } void -gf100_grctx_generate_r418bb8(struct gf100_gr *gr) +gf100_grctx_generate_rop_mapping(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; u32 data[6] = {}, data2[2] = {}; @@ -1241,6 +1241,8 @@ gf100_grctx_generate_floorsweep(struct g if (func->r4060a8) func->r4060a8(gr); + + func->rop_mapping(gr); } void @@ -1272,7 +1274,6 @@ gf100_grctx_generate_main(struct gf100_g grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); - gf100_grctx_generate_r418bb8(gr); gf100_grctx_generate_r406800(gr); gf100_gr_icmd(gr, grctx->icmd); @@ -1424,4 +1425,5 @@ gf100_grctx = { .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -53,6 +53,7 @@ struct gf100_grctx_func { void (*sm_id)(struct gf100_gr *, int gpc, int tpc, int sm); void (*tpc_nr)(struct gf100_gr *, int gpc); void (*r4060a8)(struct gf100_gr *); + void (*rop_mapping)(struct gf100_gr *); }; extern const struct gf100_grctx_func gf100_grctx; @@ -63,11 +64,11 @@ void gf100_grctx_generate_pagepool(struc void gf100_grctx_generate_attrib(struct gf100_grctx *); void gf100_grctx_generate_unkn(struct gf100_gr *); void gf100_grctx_generate_floorsweep(struct gf100_gr *); -void gf100_grctx_generate_r418bb8(struct gf100_gr *); void gf100_grctx_generate_r406800(struct gf100_gr *); void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int); void gf100_grctx_generate_tpc_nr(struct gf100_gr *, int); void gf100_grctx_generate_r4060a8(struct gf100_gr *); +void gf100_grctx_generate_rop_mapping(struct gf100_gr *); extern const struct gf100_grctx_func gf108_grctx; void gf108_grctx_generate_attrib(struct gf100_grctx *); @@ -78,6 +79,7 @@ extern const struct gf100_grctx_func gf1 extern const struct gf100_grctx_func gf117_grctx; void gf117_grctx_generate_attrib(struct gf100_grctx *); +void gf117_grctx_generate_rop_mapping(struct gf100_gr *); extern const struct gf100_grctx_func gf119_grctx; @@ -88,7 +90,6 @@ void gk104_grctx_generate_bundle(struct void gk104_grctx_generate_pagepool(struct gf100_grctx *); void gk104_grctx_generate_patch_ltc(struct gf100_grctx *); void gk104_grctx_generate_unkn(struct gf100_gr *); -void gk104_grctx_generate_r418bb8(struct gf100_gr *); void gm107_grctx_generate_bundle(struct gf100_grctx *); void gm107_grctx_generate_pagepool(struct gf100_grctx *); --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c @@ -99,4 +99,5 @@ gf104_grctx = { .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c @@ -797,4 +797,5 @@ gf108_grctx = { .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c @@ -350,4 +350,5 @@ gf110_grctx = { .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -180,6 +180,65 @@ gf117_grctx_pack_ppc[] = { ******************************************************************************/ void +gf117_grctx_generate_rop_mapping(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data[6] = {}, data2[2] = {}; + u8 tpcnr[GPC_MAX]; + u8 shift, ntpcv; + int gpc, tpc, i; + + /* calculate first set of magics */ + memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); + + gpc = -1; + for (tpc = 0; tpc < gr->tpc_total; tpc++) { + do { + gpc = (gpc + 1) % gr->gpc_nr; + } while (!tpcnr[gpc]); + tpcnr[gpc]--; + + data[tpc / 6] |= gpc << ((tpc % 6) * 5); + } + + for (; tpc < 32; tpc++) + data[tpc / 6] |= 7 << ((tpc % 6) * 5); + + /* and the second... */ + shift = 0; + ntpcv = gr->tpc_total; + while (!(ntpcv & (1 << 4))) { + ntpcv <<= 1; + shift++; + } + + data2[0] = (ntpcv << 16); + data2[0] |= (shift << 21); + data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24); + for (i = 1; i < 7; i++) + data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5); + + /* GPC_BROADCAST */ + nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x418b08 + (i * 4), data[i]); + + /* GPC_BROADCAST.TP_BROADCAST */ + nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) | + gr->screen_tile_row_offset | data2[0]); + nvkm_wr32(device, 0x41bfe4, data2[1]); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]); + + /* UNK78xx */ + nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x40780c + (i * 4), data[i]); +} + +void gf117_grctx_generate_attrib(struct gf100_grctx *info) { struct gf100_gr *gr = info->gr; @@ -241,7 +300,6 @@ gf117_grctx_generate_main(struct gf100_g grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); - gk104_grctx_generate_r418bb8(gr); gf100_grctx_generate_r406800(gr); for (i = 0; i < 8; i++) @@ -276,4 +334,5 @@ gf117_grctx = { .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c @@ -520,4 +520,5 @@ gf119_grctx = { .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -893,65 +893,6 @@ gk104_grctx_generate_unkn(struct gf100_g } void -gk104_grctx_generate_r418bb8(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - u32 data[6] = {}, data2[2] = {}; - u8 tpcnr[GPC_MAX]; - u8 shift, ntpcv; - int gpc, tpc, i; - - /* calculate first set of magics */ - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - - data[tpc / 6] |= gpc << ((tpc % 6) * 5); - } - - for (; tpc < 32; tpc++) - data[tpc / 6] |= 7 << ((tpc % 6) * 5); - - /* and the second... */ - shift = 0; - ntpcv = gr->tpc_total; - while (!(ntpcv & (1 << 4))) { - ntpcv <<= 1; - shift++; - } - - data2[0] = (ntpcv << 16); - data2[0] |= (shift << 21); - data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24); - for (i = 1; i < 7; i++) - data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5); - - /* GPC_BROADCAST */ - nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | - gr->screen_tile_row_offset); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x418b08 + (i * 4), data[i]); - - /* GPC_BROADCAST.TP_BROADCAST */ - nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) | - gr->screen_tile_row_offset | data2[0]); - nvkm_wr32(device, 0x41bfe4, data2[1]); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]); - - /* UNK78xx */ - nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) | - gr->screen_tile_row_offset); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x40780c + (i * 4), data[i]); -} - -void gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -975,7 +916,6 @@ gk104_grctx_generate_main(struct gf100_g grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); - gk104_grctx_generate_r418bb8(gr); gf100_grctx_generate_r406800(gr); for (i = 0; i < 8; i++) @@ -1018,4 +958,5 @@ gk104_grctx = { .patch_ltc = gk104_grctx_generate_patch_ltc, .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c @@ -833,4 +833,5 @@ gk110_grctx = { .patch_ltc = gk104_grctx_generate_patch_ltc, .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c @@ -94,4 +94,5 @@ gk110b_grctx = { .patch_ltc = gk104_grctx_generate_patch_ltc, .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c @@ -555,4 +555,5 @@ gk208_grctx = { .patch_ltc = gk104_grctx_generate_patch_ltc, .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c @@ -43,7 +43,6 @@ gk20a_grctx_generate_main(struct gf100_g grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); - gk104_grctx_generate_r418bb8(gr); gf100_grctx_generate_r406800(gr); for (i = 0; i < 8; i++) @@ -83,4 +82,5 @@ gk20a_grctx = { .alpha_nr = 0x648, .sm_id = gf100_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -961,7 +961,6 @@ gm107_grctx_generate_main(struct gf100_g grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); - gk104_grctx_generate_r418bb8(gr); gf100_grctx_generate_r406800(gr); nvkm_wr32(device, 0x4064d0, 0x00000001); @@ -1005,4 +1004,5 @@ gm107_grctx = { .alpha_nr = 0x1000, .sm_id = gm107_grctx_generate_sm_id, .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c @@ -77,7 +77,6 @@ gm200_grctx_generate_main(struct gf100_g grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); - gk104_grctx_generate_r418bb8(gr); for (i = 0; i < 8; i++) nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); @@ -115,4 +114,5 @@ gm200_grctx = { .alpha_nr_max = 0x1800, .alpha_nr = 0x1000, .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c @@ -40,7 +40,6 @@ gm20b_grctx_generate_main(struct gf100_g grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); - gk104_grctx_generate_r418bb8(gr); for (i = 0; i < 8; i++) nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); @@ -84,4 +83,5 @@ gm20b_grctx = { .alpha_nr_max = 0xc00, .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -139,7 +139,6 @@ gp100_grctx_generate_main(struct gf100_g grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); - gk104_grctx_generate_r418bb8(gr); for (i = 0; i < 8; i++) nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); @@ -174,4 +173,5 @@ gp100_grctx = { .alpha_nr_max = 0xc00, .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -95,4 +95,5 @@ gp102_grctx = { .alpha_nr_max = 0xc00, .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c @@ -45,4 +45,5 @@ gp107_grctx = { .alpha_nr_max = 0xc00, .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, };