Blob Blame History Raw
From e352625796b343cc85b7648a74a57e739b12dc27 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Tue, 27 Aug 2019 16:32:55 +0800
Subject: drm/amdgpu: introduce vram lost for reset (v2)
Git-commit: e352625796b343cc85b7648a74a57e739b12dc27
Patch-mainline: v5.4-rc1
References: bsc#1152489

for SOC15/vega10 the BACO reset & mode1 would introduce vram lost
in high end address range, current kmd's vram lost checking cannot
catch it since it only check very ahead visible frame buffer

v2:
cover NV as well

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/nv.c            | 7 +++++--
 drivers/gpu/drm/amd/amdgpu/soc15.c         | 2 ++
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b2c801fb4ab5..bd37df5dd6d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1151,6 +1151,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
 #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
 #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
 
 /* Common functions */
 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2f884699eaef..b8c33df3f4c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3483,7 +3483,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 	amdgpu_virt_init_data_exchange(adev);
 	amdgpu_virt_release_full_gpu(adev, true);
 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
-		atomic_inc(&adev->vram_lost_counter);
+		amdgpu_inc_vram_lost(adev);
 		r = amdgpu_device_recover_vram(adev);
 	}
 
@@ -3649,7 +3649,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
 				if (vram_lost) {
 					DRM_INFO("VRAM is lost due to GPU reset!\n");
-					atomic_inc(&tmp_adev->vram_lost_counter);
+					amdgpu_inc_vram_lost(tmp_adev);
 				}
 
 				r = amdgpu_gtt_mgr_recover(
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index e150c281feb6..1b476fcd2197 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -316,10 +316,13 @@ static int nv_asic_reset(struct amdgpu_device *adev)
 	int ret = 0;
 	struct smu_context *smu = &adev->smu;
 
-	if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
+	if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+		amdgpu_inc_vram_lost(adev);
 		ret = smu_baco_reset(smu);
-	else
+	} else {
+		amdgpu_inc_vram_lost(adev);
 		ret = nv_asic_mode1_reset(adev);
+	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 53f21c567e97..c357bec3558f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -558,10 +558,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 {
 	switch (soc15_asic_reset_method(adev)) {
 		case AMD_RESET_METHOD_BACO:
+			amdgpu_inc_vram_lost(adev);
 			return soc15_asic_baco_reset(adev);
 		case AMD_RESET_METHOD_MODE2:
 			return soc15_mode2_reset(adev);
 		default:
+			amdgpu_inc_vram_lost(adev);
 			return soc15_asic_mode1_reset(adev);
 	}
 }
-- 
2.28.0