Blob Blame History Raw
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 8 May 2018 20:39:46 +1000
Subject: drm/nouveau/gr/gf100-: virtualise dist_skip_table + improve algorithm
Git-commit: 60770fa28bd7d69097d3a186fe8cfa1ec21c9c1d
Patch-mainline: v4.18-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

The algorithm for GM200 and newer matches RM for all the boards I have, but
I don't have enough data to try and figure something out for earlier boards,
so these will still write zeroes to the table as we did before.

The code in NVGPU isn't helpful here, it appears to handle specific cases.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c  |    2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h  |    3 ++
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c  |   15 +++++++++---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c  |    5 ----
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c  |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c  |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c  |    5 ----
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c  |   25 +++++++++++++++++++--
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c  |    3 --
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c  |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c  |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c     |    3 ++
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h     |    1 
 14 files changed, 51 insertions(+), 16 deletions(-)

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1360,6 +1360,8 @@ gf100_grctx_generate_floorsweep(struct g
 		func->alpha_beta_tables(gr);
 	if (func->max_ways_evict)
 		func->max_ways_evict(gr);
+	if (func->dist_skip_table)
+		func->dist_skip_table(gr);
 }
 
 void
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -56,6 +56,7 @@ struct gf100_grctx_func {
 	void (*rop_mapping)(struct gf100_gr *);
 	void (*alpha_beta_tables)(struct gf100_gr *);
 	void (*max_ways_evict)(struct gf100_gr *);
+	void (*dist_skip_table)(struct gf100_gr *);
 };
 
 extern const struct gf100_grctx_func gf100_grctx;
@@ -83,6 +84,7 @@ extern const struct gf100_grctx_func gf1
 extern const struct gf100_grctx_func gf117_grctx;
 void gf117_grctx_generate_attrib(struct gf100_grctx *);
 void gf117_grctx_generate_rop_mapping(struct gf100_gr *);
+void gf117_grctx_generate_dist_skip_table(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf119_grctx;
 
@@ -111,6 +113,7 @@ void gm107_grctx_generate_pagepool(struc
 void gm107_grctx_generate_attrib(struct gf100_grctx *);
 
 extern const struct gf100_grctx_func gm200_grctx;
+void gm200_grctx_generate_dist_skip_table(struct gf100_gr *);
 void gm200_grctx_generate_405b60(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gm20b_grctx;
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -180,6 +180,16 @@ gf117_grctx_pack_ppc[] = {
  ******************************************************************************/
 
 void
+gf117_grctx_generate_dist_skip_table(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int i;
+
+	for (i = 0; i < 8; i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+}
+
+void
 gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -282,7 +292,6 @@ gf117_grctx_generate_main(struct gf100_g
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
-	int i;
 
 	nvkm_mc_unk260(device, 0);
 
@@ -301,9 +310,6 @@ gf117_grctx_generate_main(struct gf100_g
 
 	gf100_grctx_generate_floorsweep(gr);
 
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
@@ -336,4 +342,5 @@ gf117_grctx = {
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
 	.alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
 	.max_ways_evict = gf100_grctx_generate_max_ways_evict,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -898,7 +898,6 @@ gk104_grctx_generate_main(struct gf100_g
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
-	int i;
 
 	nvkm_mc_unk260(device, 0);
 
@@ -917,9 +916,6 @@ gk104_grctx_generate_main(struct gf100_g
 
 	gf100_grctx_generate_floorsweep(gr);
 
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-
 	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
 	nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
 
@@ -1006,4 +1002,5 @@ gk104_grctx = {
 	.tpc_nr = gf100_grctx_generate_tpc_nr,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
 	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -835,4 +835,5 @@ gk110_grctx = {
 	.tpc_nr = gf100_grctx_generate_tpc_nr,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
 	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -96,4 +96,5 @@ gk110b_grctx = {
 	.tpc_nr = gf100_grctx_generate_tpc_nr,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
 	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -557,4 +557,5 @@ gk208_grctx = {
 	.tpc_nr = gf100_grctx_generate_tpc_nr,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
 	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -945,7 +945,6 @@ gm107_grctx_generate_main(struct gf100_g
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
-	int i;
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -962,9 +961,6 @@ gm107_grctx_generate_main(struct gf100_g
 
 	gf100_grctx_generate_floorsweep(gr);
 
-	nvkm_wr32(device, 0x4064d0, 0x00000001);
-	for (i = 1; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 	nvkm_wr32(device, 0x406500, 0x00000001);
 
 	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
@@ -1005,4 +1001,5 @@ gm107_grctx = {
 	.tpc_nr = gf100_grctx_generate_tpc_nr,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
 	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -78,8 +78,6 @@ gm200_grctx_generate_main(struct gf100_g
 
 	gf100_grctx_generate_floorsweep(gr);
 
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 	nvkm_wr32(device, 0x406500, 0x00000000);
 
 	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
@@ -98,6 +96,28 @@ gm200_grctx_generate_main(struct gf100_g
 	nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
 }
 
+void
+gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 data[8] = {};
+	int gpc, ppc, i;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+			u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
+			u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc];
+			while (ppc_tpcs-- > gr->ppc_tpc_min)
+				ppc_tpcm &= ppc_tpcm - 1;
+			ppc_tpcm ^= gr->ppc_tpc_mask[gpc][ppc];
+			((u8 *)data)[gpc] |= ppc_tpcm;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(data); i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), data[i]);
+}
+
 const struct gf100_grctx_func
 gm200_grctx = {
 	.main  = gm200_grctx_generate_main,
@@ -115,4 +135,5 @@ gm200_grctx = {
 	.alpha_nr = 0x1000,
 	.sm_id = gm107_grctx_generate_sm_id,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -140,8 +140,6 @@ gp100_grctx_generate_main(struct gf100_g
 
 	gf100_grctx_generate_floorsweep(gr);
 
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 	nvkm_wr32(device, 0x406500, 0x00000000);
 
 	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
@@ -174,4 +172,5 @@ gp100_grctx = {
 	.alpha_nr = 0x800,
 	.sm_id = gm107_grctx_generate_sm_id,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
@@ -96,4 +96,5 @@ gp102_grctx = {
 	.alpha_nr = 0x800,
 	.sm_id = gm107_grctx_generate_sm_id,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
@@ -46,4 +46,5 @@ gp107_grctx = {
 	.alpha_nr = 0x800,
 	.sm_id = gm107_grctx_generate_sm_id,
 	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
 };
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -1685,6 +1685,9 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 				continue;
 			gr->ppc_mask[i] |= (1 << j);
 			gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
+			if (gr->ppc_tpc_min == 0 ||
+			    gr->ppc_tpc_min > gr->ppc_tpc_nr[i][j])
+				gr->ppc_tpc_min = gr->ppc_tpc_nr[i][j];
 		}
 	}
 
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -105,6 +105,7 @@ struct gf100_gr {
 	u8 ppc_mask[GPC_MAX];
 	u8 ppc_tpc_mask[GPC_MAX][4];
 	u8 ppc_tpc_nr[GPC_MAX][4];
+	u8 ppc_tpc_min;
 
 	struct gf100_gr_data mmio_data[4];
 	struct gf100_gr_mmio mmio_list[4096/8];