Blob Blame History Raw
From 5f11b354025a0cfaaf88a17dc2789b2489de77f5 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <jay.cornwall@amd.com>
Date: Wed, 12 Jul 2023 15:51:47 -0500
Subject: drm/amdkfd: Sign-extend TMA address in trap handler
Git-commit: 05c899eacc0412bae0581add19c4062db8bdffda
Patch-mainline: v6.6-rc1
References: jsc#PED-3527 jsc#PED-5475 jsc#PED-6068 jsc#PED-6070 jsc#PED-6116 jsc#PED-6120 jsc#PED-5065 jsc#PED-5477 jsc#PED-5511 jsc#PED-6041 jsc#PED-6069 jsc#PED-6071

SMEM instructions can reach addresses above 47 bits but require
bit 47 to be sign-extended through bits [63:48].

This allows the TMA to be relocated in a following patch.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 58 ++++++++++++-------
 .../amd/amdkfd/cwsr_trap_handler_gfx10.asm    |  5 ++
 .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm |  5 ++
 3 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 717ad0633dbe..d7cd5fa313ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -274,14 +274,14 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
 
 
 static const uint32_t cwsr_trap_gfx9_hex[] = {
-	0xbf820001, 0xbf820254,
+	0xbf820001, 0xbf820258,
 	0xb8f8f802, 0x8978ff78,
 	0x00020006, 0xb8fbf803,
 	0x866eff78, 0x00002000,
 	0xbf840009, 0x866eff6d,
 	0x00ff0000, 0xbf85001e,
 	0x866eff7b, 0x00000400,
-	0xbf850051, 0xbf8e0010,
+	0xbf850055, 0xbf8e0010,
 	0xb8fbf803, 0xbf82fffa,
 	0x866eff7b, 0x03c00900,
 	0xbf850015, 0x866eff7b,
@@ -294,13 +294,15 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0xbf850007, 0xb8eef801,
 	0x866eff6e, 0x00000800,
 	0xbf850003, 0x866eff7b,
-	0x00000400, 0xbf850036,
+	0x00000400, 0xbf85003a,
 	0xb8faf807, 0x867aff7a,
 	0x001f8000, 0x8e7a8b7a,
 	0x8977ff77, 0xfc000000,
 	0x87777a77, 0xba7ff807,
 	0x00000000, 0xb8faf812,
 	0xb8fbf813, 0x8efa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x877bff7b, 0xffff0000,
 	0xc0031bbd, 0x00000010,
 	0xbf8cc07f, 0x8e6e976e,
 	0x8977ff77, 0x00800000,
@@ -676,14 +678,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 };
 
 static const uint32_t cwsr_trap_nv1x_hex[] = {
-	0xbf820001, 0xbf8201f1,
+	0xbf820001, 0xbf8201f5,
 	0xb0804004, 0xb978f802,
 	0x8a78ff78, 0x00020006,
 	0xb97bf803, 0x876eff78,
 	0x00002000, 0xbf840009,
 	0x876eff6d, 0x00ff0000,
 	0xbf85001e, 0x876eff7b,
-	0x00000400, 0xbf850057,
+	0x00000400, 0xbf85005b,
 	0xbf8e0010, 0xb97bf803,
 	0xbf82fffa, 0x876eff7b,
 	0x00000900, 0xbf850015,
@@ -697,7 +699,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0xb96ef801, 0x876eff6e,
 	0x00000800, 0xbf850003,
 	0x876eff7b, 0x00000400,
-	0xbf85003c, 0x8a77ff77,
+	0xbf850040, 0x8a77ff77,
 	0xff000000, 0xb97af807,
 	0x877bff7a, 0x02000000,
 	0x8f7b867b, 0x88777b77,
@@ -706,6 +708,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 	0x8a7aff7a, 0x023f8000,
 	0xb9faf807, 0xb97af812,
 	0xb97bf813, 0x8ffa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x887bff7b, 0xffff0000,
 	0xf4011bbd, 0xfa000010,
 	0xbf8cc07f, 0x8f6e976e,
 	0x8a77ff77, 0x00800000,
@@ -1094,14 +1098,14 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
 };
 
 static const uint32_t cwsr_trap_arcturus_hex[] = {
-	0xbf820001, 0xbf8202d0,
+	0xbf820001, 0xbf8202d4,
 	0xb8f8f802, 0x8978ff78,
 	0x00020006, 0xb8fbf803,
 	0x866eff78, 0x00002000,
 	0xbf840009, 0x866eff6d,
 	0x00ff0000, 0xbf85001e,
 	0x866eff7b, 0x00000400,
-	0xbf850051, 0xbf8e0010,
+	0xbf850055, 0xbf8e0010,
 	0xb8fbf803, 0xbf82fffa,
 	0x866eff7b, 0x03c00900,
 	0xbf850015, 0x866eff7b,
@@ -1114,13 +1118,15 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 	0xbf850007, 0xb8eef801,
 	0x866eff6e, 0x00000800,
 	0xbf850003, 0x866eff7b,
-	0x00000400, 0xbf850036,
+	0x00000400, 0xbf85003a,
 	0xb8faf807, 0x867aff7a,
 	0x001f8000, 0x8e7a8b7a,
 	0x8977ff77, 0xfc000000,
 	0x87777a77, 0xba7ff807,
 	0x00000000, 0xb8faf812,
 	0xb8fbf813, 0x8efa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x877bff7b, 0xffff0000,
 	0xc0031bbd, 0x00000010,
 	0xbf8cc07f, 0x8e6e976e,
 	0x8977ff77, 0x00800000,
@@ -1572,14 +1578,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 };
 
 static const uint32_t cwsr_trap_aldebaran_hex[] = {
-	0xbf820001, 0xbf8202db,
+	0xbf820001, 0xbf8202df,
 	0xb8f8f802, 0x8978ff78,
 	0x00020006, 0xb8fbf803,
 	0x866eff78, 0x00002000,
 	0xbf840009, 0x866eff6d,
 	0x00ff0000, 0xbf85001e,
 	0x866eff7b, 0x00000400,
-	0xbf850051, 0xbf8e0010,
+	0xbf850055, 0xbf8e0010,
 	0xb8fbf803, 0xbf82fffa,
 	0x866eff7b, 0x03c00900,
 	0xbf850015, 0x866eff7b,
@@ -1592,13 +1598,15 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
 	0xbf850007, 0xb8eef801,
 	0x866eff6e, 0x00000800,
 	0xbf850003, 0x866eff7b,
-	0x00000400, 0xbf850036,
+	0x00000400, 0xbf85003a,
 	0xb8faf807, 0x867aff7a,
 	0x001f8000, 0x8e7a8b7a,
 	0x8977ff77, 0xfc000000,
 	0x87777a77, 0xba7ff807,
 	0x00000000, 0xb8faf812,
 	0xb8fbf813, 0x8efa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x877bff7b, 0xffff0000,
 	0xc0031bbd, 0x00000010,
 	0xbf8cc07f, 0x8e6e976e,
 	0x8977ff77, 0x00800000,
@@ -2061,14 +2069,14 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx10_hex[] = {
-	0xbf820001, 0xbf82021c,
+	0xbf820001, 0xbf820220,
 	0xb0804004, 0xb978f802,
 	0x8a78ff78, 0x00020006,
 	0xb97bf803, 0x876eff78,
 	0x00002000, 0xbf840009,
 	0x876eff6d, 0x00ff0000,
 	0xbf85001e, 0x876eff7b,
-	0x00000400, 0xbf850041,
+	0x00000400, 0xbf850045,
 	0xbf8e0010, 0xb97bf803,
 	0xbf82fffa, 0x876eff7b,
 	0x00000900, 0xbf850015,
@@ -2082,8 +2090,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xb96ef801, 0x876eff6e,
 	0x00000800, 0xbf850003,
 	0x876eff7b, 0x00000400,
-	0xbf850026, 0xb97af812,
+	0xbf85002a, 0xb97af812,
 	0xb97bf813, 0x8ffa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x887bff7b, 0xffff0000,
 	0xf4011bbd, 0xfa000010,
 	0xbf8cc07f, 0x8f6e976e,
 	0x8a77ff77, 0x00800000,
@@ -2496,7 +2506,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx11_hex[] = {
-	0xbfa00001, 0xbfa00221,
+	0xbfa00001, 0xbfa00225,
 	0xb0804006, 0xb8f8f802,
 	0x9178ff78, 0x00020006,
 	0xb8fbf803, 0xbf0d9e6d,
@@ -2506,7 +2516,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
 	0xbfa10009, 0x8b6eff6d,
 	0x00ff0000, 0xbfa2001e,
 	0x8b6eff7b, 0x00000400,
-	0xbfa20041, 0xbf830010,
+	0xbfa20045, 0xbf830010,
 	0xb8fbf803, 0xbfa0fffa,
 	0x8b6eff7b, 0x00000900,
 	0xbfa20015, 0x8b6eff7b,
@@ -2519,9 +2529,11 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
 	0xbfa20007, 0xb8eef801,
 	0x8b6eff6e, 0x00000800,
 	0xbfa20003, 0x8b6eff7b,
-	0x00000400, 0xbfa20026,
+	0x00000400, 0xbfa2002a,
 	0xbefa4d82, 0xbf89fc07,
-	0x84fa887a, 0xf4005bbd,
+	0x84fa887a, 0xbf0d8f7b,
+	0xbfa10002, 0x8c7bff7b,
+	0xffff0000, 0xf4005bbd,
 	0xf8000010, 0xbf89fc07,
 	0x846e976e, 0x9177ff77,
 	0x00800000, 0x8c776e77,
@@ -2939,14 +2951,14 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
-	0xbf820001, 0xbf8202d7,
+	0xbf820001, 0xbf8202db,
 	0xb8f8f802, 0x8978ff78,
 	0x00020006, 0xb8fbf803,
 	0x866eff78, 0x00002000,
 	0xbf840009, 0x866eff6d,
 	0x00ff0000, 0xbf85001a,
 	0x866eff7b, 0x00000400,
-	0xbf85004d, 0xbf8e0010,
+	0xbf850051, 0xbf8e0010,
 	0xb8fbf803, 0xbf82fffa,
 	0x866eff7b, 0x03c00900,
 	0xbf850011, 0x866eff7b,
@@ -2957,13 +2969,15 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
 	0x866e6f6e, 0xbf850006,
 	0x866eff6d, 0x00ff0000,
 	0xbf850003, 0x866eff7b,
-	0x00000400, 0xbf850036,
+	0x00000400, 0xbf85003a,
 	0xb8faf807, 0x867aff7a,
 	0x001f8000, 0x8e7a8b7a,
 	0x8979ff79, 0xfc000000,
 	0x87797a79, 0xba7ff807,
 	0x00000000, 0xb8faf812,
 	0xb8fbf813, 0x8efa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x877bff7b, 0xffff0000,
 	0xc0031bbd, 0x00000010,
 	0xbf8cc07f, 0x8e6e976e,
 	0x8979ff79, 0x00800000,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 8b92c33c2a7c..fdab64624422 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -276,6 +276,11 @@ L_FETCH_2ND_TRAP:
 #endif
 	s_lshl_b64	[ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
 
+	s_bitcmp1_b32	ttmp15, 0xF
+	s_cbranch_scc0	L_NO_SIGN_EXTEND_TMA
+	s_or_b32	ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
 	s_load_dword    ttmp2, [ttmp14, ttmp15], 0x10 glc:1			// debug trap enabled flag
 	s_waitcnt       lgkmcnt(0)
 	s_lshl_b32      ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index f2087cc2e89d..e506411ad28a 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -283,6 +283,11 @@ L_FETCH_2ND_TRAP:
     s_getreg_b32    ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
     s_lshl_b64      [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
 
+    s_bitcmp1_b32   ttmp15, 0xF
+    s_cbranch_scc0  L_NO_SIGN_EXTEND_TMA
+    s_or_b32        ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
     s_load_dword    ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
     s_waitcnt       lgkmcnt(0)
     s_lshl_b32      ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
-- 
2.43.0