Blob Blame History Raw
From 9ad99a19046a6576b791145452e9f95250fe5db8 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Mon, 3 Oct 2022 17:53:25 -0400
Subject: drm/amdgpu: Correct amdgpu_amdkfd_total_mem_size calculation
Git-commit: 2302d507149f0ae7cc697089ab5675a2d4cf9d2a
Patch-mainline: v6.1-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225 jsc#PED-2849

amdkfd_total_mem_size is the size of total GPUs vram plus system memory
to estimate page tables memory usage and leave enough VRAM room for page
tables allocation.

Calculate amdkfd_total_mem_size in amdgpu_amdkfd_device_probe is
incorrect because adev->gmc.real_vram_size is still 0 called from
amdgpu_device_ip_early_init. Move the calculation
to amdgpu_amdkfd_device_init to get the correct VRAM size.

Do reverse calculation in amdgpu_amdkfd_device_fini_sw to support
hot-unplugging GPUs.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 9e98f3866edc..03bbfaa51cbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -75,9 +75,6 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 		return;
 
 	adev->kfd.dev = kgd2kfd_probe(adev, vf);
-
-	if (adev->kfd.dev)
-		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 }
 
 /**
@@ -201,6 +198,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
 						adev_to_drm(adev), &gpu_resources);
 
+		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+
 		INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
 	}
 }
@@ -210,6 +209,7 @@ void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
 	if (adev->kfd.dev) {
 		kgd2kfd_device_exit(adev->kfd.dev);
 		adev->kfd.dev = NULL;
+		amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
 	}
 }
 
-- 
2.38.1