Blob Blame History Raw
From 5145d57ec5f5cf7dadaa6ccd9c9f1e4dae82570b Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Thu, 18 Jul 2019 16:57:22 -0500
Subject: drm/amdkfd: Extend CU mask to 8 SEs (v3)
Git-commit: 5145d57ec5f5cf7dadaa6ccd9c9f1e4dae82570b
Patch-mainline: v5.4-rc1
References: bsc#1152489

Following bitmap layout logic introduced by:
"drm/amdgpu: support get_cu_info for Arcturus".

v2: squash in fixup for gfx_v9_0.c (Alex)
v3: squash in debug print output fix

Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c          |  4 ++++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c   | 10 +++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h   |  2 ++
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c    | 18 +++++++++++++++---
 drivers/gpu/drm/amd/include/v9_structs.h       |  8 ++++----
 5 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c4df0c525270..c7910be64bf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3341,6 +3341,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
 	mqd->compute_misc_reserved = 0x00000003;
 
 	mqd->dynamic_cu_mask_addr_lo =
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index d6cf391da591..88813dad731f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -98,8 +98,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 		uint32_t *se_mask)
 {
 	struct kfd_cu_info cu_info;
-	uint32_t cu_per_sh[4] = {0};
-	int i, se, cu = 0;
+	uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0};
+	int i, se, sh, cu = 0;
 
 	amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
 
@@ -107,8 +107,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 		cu_mask_count = cu_info.cu_active_number;
 
 	for (se = 0; se < cu_info.num_shader_engines; se++)
-		for (i = 0; i < 4; i++)
-			cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]);
+		for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
+			cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
 
 	/* Symmetrically map cu_mask to all SEs:
 	 * cu_mask[0] bit0 -> se_mask[0] bit0;
@@ -128,6 +128,6 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 				se = 0;
 				cu++;
 			}
-		} while (cu >= cu_per_sh[se] && cu < 32);
+		} while (cu >= cu_per_se[se] && cu < 32);
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 550b61e81015..fbdb16418847 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -26,6 +26,8 @@
 
 #include "kfd_priv.h"
 
+#define KFD_MAX_NUM_SE 8
+
 /**
  * struct mqd_manager
  *
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 0c58f91b3ff3..d3380c5bdbde 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -46,7 +46,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 			struct queue_properties *q)
 {
 	struct v9_mqd *m;
-	uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
+	uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
 
 	if (q->cu_mask_count == 0)
 		return;
@@ -59,12 +59,20 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 	m->compute_static_thread_mgmt_se1 = se_mask[1];
 	m->compute_static_thread_mgmt_se2 = se_mask[2];
 	m->compute_static_thread_mgmt_se3 = se_mask[3];
+	m->compute_static_thread_mgmt_se4 = se_mask[4];
+	m->compute_static_thread_mgmt_se5 = se_mask[5];
+	m->compute_static_thread_mgmt_se6 = se_mask[6];
+	m->compute_static_thread_mgmt_se7 = se_mask[7];
 
-	pr_debug("update cu mask to %#x %#x %#x %#x\n",
+	pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
 		m->compute_static_thread_mgmt_se0,
 		m->compute_static_thread_mgmt_se1,
 		m->compute_static_thread_mgmt_se2,
-		m->compute_static_thread_mgmt_se3);
+		m->compute_static_thread_mgmt_se3,
+		m->compute_static_thread_mgmt_se4,
+		m->compute_static_thread_mgmt_se5,
+		m->compute_static_thread_mgmt_se6,
+		m->compute_static_thread_mgmt_se7);
 }
 
 static void set_priority(struct v9_mqd *m, struct queue_properties *q)
@@ -125,6 +133,10 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
 	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
 	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
 	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
 
 	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
 			0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
index 8b383dbe1cda..a0c672889fe4 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -196,10 +196,10 @@ struct v9_mqd {
 	uint32_t compute_wave_restore_addr_lo;
 	uint32_t compute_wave_restore_addr_hi;
 	uint32_t compute_wave_restore_control;
-	uint32_t reserved_39;
-	uint32_t reserved_40;
-	uint32_t reserved_41;
-	uint32_t reserved_42;
+	uint32_t compute_static_thread_mgmt_se4;
+	uint32_t compute_static_thread_mgmt_se5;
+	uint32_t compute_static_thread_mgmt_se6;
+	uint32_t compute_static_thread_mgmt_se7;
 	uint32_t reserved_43;
 	uint32_t reserved_44;
 	uint32_t reserved_45;
-- 
2.28.0