Blob Blame History Raw
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 8 May 2018 20:39:46 +1000
Subject: drm/nouveau/gr/gf100-: virtualise tpc_per_gpc
Git-commit: e51f75d5012e88c90b3a05b6706475d83cb7a6eb
Patch-mainline: v4.18-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

GM20B now also shares the same code, as NVGPU shows it doesn't need
special treatment.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c |   25 ++++++++--------------
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c |    1 
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c |   15 -------------
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c |    1 
 9 files changed, 10 insertions(+), 37 deletions(-)

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1080,19 +1080,6 @@ gf100_grctx_generate_unkn(struct gf100_g
 }
 
 void
-gf100_grctx_generate_r406028(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	u32 tmp[GPC_MAX / 8] = {}, i = 0;
-	for (i = 0; i < gr->gpc_nr; i++)
-		tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4);
-	for (i = 0; i < 4; i++) {
-		nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]);
-		nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]);
-	}
-}
-
-void
 gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -1229,8 +1216,10 @@ gf100_grctx_generate_sm_id(struct gf100_
 void
 gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *func = gr->func->grctx;
-	int tpc, gpc, sm;
+	int tpc, gpc, sm, i, j;
+	u32 data;
 
 	for (tpc = 0, sm = 0; tpc < gr->tpc_max; tpc++) {
 		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
@@ -1240,6 +1229,13 @@ gf100_grctx_generate_floorsweep(struct g
 				func->tpc_nr(gr, gpc);
 		}
 	}
+
+	for (gpc = 0, i = 0; i < 4; i++) {
+		for (data = 0, j = 0; j < 8 && gpc < gr->gpc_nr; j++, gpc++)
+			data |= gr->tpc_nr[gpc] << (j * 4);
+		nvkm_wr32(device, 0x406028 + (i * 4), data);
+		nvkm_wr32(device, 0x405870 + (i * 4), data);
+	}
 }
 
 void
@@ -1271,7 +1267,6 @@ gf100_grctx_generate_main(struct gf100_g
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
-	gf100_grctx_generate_r406028(gr);
 	gf100_grctx_generate_r4060a8(gr);
 	gf100_grctx_generate_r418bb8(gr);
 	gf100_grctx_generate_r406800(gr);
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -62,7 +62,6 @@ void gf100_grctx_generate_pagepool(struc
 void gf100_grctx_generate_attrib(struct gf100_grctx *);
 void gf100_grctx_generate_unkn(struct gf100_gr *);
 void gf100_grctx_generate_floorsweep(struct gf100_gr *);
-void gf100_grctx_generate_r406028(struct gf100_gr *);
 void gf100_grctx_generate_r4060a8(struct gf100_gr *);
 void gf100_grctx_generate_r418bb8(struct gf100_gr *);
 void gf100_grctx_generate_r406800(struct gf100_gr *);
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -241,7 +241,6 @@ gf117_grctx_generate_main(struct gf100_g
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
-	gf100_grctx_generate_r406028(gr);
 	gf100_grctx_generate_r4060a8(gr);
 	gk104_grctx_generate_r418bb8(gr);
 	gf100_grctx_generate_r406800(gr);
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -975,7 +975,6 @@ gk104_grctx_generate_main(struct gf100_g
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
-	gf100_grctx_generate_r406028(gr);
 	gk104_grctx_generate_r418bb8(gr);
 	gf100_grctx_generate_r406800(gr);
 
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -43,7 +43,6 @@ gk20a_grctx_generate_main(struct gf100_g
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
-	gf100_grctx_generate_r406028(gr);
 	gk104_grctx_generate_r418bb8(gr);
 	gf100_grctx_generate_r406800(gr);
 
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -961,7 +961,6 @@ gm107_grctx_generate_main(struct gf100_g
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
-	gf100_grctx_generate_r406028(gr);
 	gk104_grctx_generate_r418bb8(gr);
 	gf100_grctx_generate_r406800(gr);
 
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -77,7 +77,6 @@ gm200_grctx_generate_main(struct gf100_g
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
-	gf100_grctx_generate_r406028(gr);
 	gk104_grctx_generate_r418bb8(gr);
 
 	for (i = 0; i < 8; i++)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -22,20 +22,6 @@
 #include "ctxgf100.h"
 
 static void
-gm20b_grctx_generate_r406028(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	u32 tpc_per_gpc = 0;
-	int i;
-
-	for (i = 0; i < gr->gpc_nr; i++)
-		tpc_per_gpc |= gr->tpc_nr[i] << (4 * i);
-
-	nvkm_wr32(device, 0x406028, tpc_per_gpc);
-	nvkm_wr32(device, 0x405870, tpc_per_gpc);
-}
-
-static void
 gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -54,7 +40,6 @@ gm20b_grctx_generate_main(struct gf100_g
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
-	gm20b_grctx_generate_r406028(gr);
 	gk104_grctx_generate_r418bb8(gr);
 
 	for (i = 0; i < 8; i++)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -139,7 +139,6 @@ gp100_grctx_generate_main(struct gf100_g
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
-	gf100_grctx_generate_r406028(gr);
 	gk104_grctx_generate_r418bb8(gr);
 
 	for (i = 0; i < 8; i++)