Blob Blame History Raw
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 15 May 2017 14:20:00 +0800
Subject: drm/amdgpu: check if vram is lost v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Git-commit: 0c49e0b8a43c8addb0498cd32390f4ef08b5dd27
Patch-mainline: v4.13-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

backup first 64 byte of gart table as reset magic, check if magic is same
after gpu hw reset.
v2: use memcmp instead of manual innovation.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |    2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   20 +++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1427,6 +1427,7 @@ typedef void (*amdgpu_wreg_t)(struct amd
 typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
 typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
 
+#define AMDGPU_RESET_MAGIC_NUM 64
 struct amdgpu_device {
 	struct device			*dev;
 	struct drm_device		*ddev;
@@ -1619,6 +1620,7 @@ struct amdgpu_device {
 
 	/* record hw reset is performed */
 	bool has_hw_reset;
+	u8				reset_magic[AMDGPU_RESET_MAGIC_NUM];
 
 };
 
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1654,6 +1654,17 @@ static int amdgpu_init(struct amdgpu_dev
 	return 0;
 }
 
+static void amdgpu_fill_reset_magic(struct amdgpu_device *adev)
+{
+	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
+}
+
+static bool amdgpu_check_vram_lost(struct amdgpu_device *adev)
+{
+	return !!memcmp(adev->gart.ptr, adev->reset_magic,
+			AMDGPU_RESET_MAGIC_NUM);
+}
+
 static int amdgpu_late_init(struct amdgpu_device *adev)
 {
 	int i = 0, r;
@@ -1684,6 +1695,8 @@ static int amdgpu_late_init(struct amdgp
 		}
 	}
 
+	amdgpu_fill_reset_magic(adev);
+
 	return 0;
 }
 
@@ -2758,7 +2771,7 @@ int amdgpu_gpu_reset(struct amdgpu_devic
 {
 	int i, r;
 	int resched;
-	bool need_full_reset;
+	bool need_full_reset, vram_lost = false;
 
 	if (!amdgpu_check_soft_reset(adev)) {
 		DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
@@ -2821,12 +2834,17 @@ retry:
 			r = amdgpu_resume_phase1(adev);
 			if (r)
 				goto out;
+			vram_lost = amdgpu_check_vram_lost(adev);
+			if (vram_lost)
+				DRM_ERROR("VRAM is lost!\n");
 			r = amdgpu_ttm_recover_gart(adev);
 			if (r)
 				goto out;
 			r = amdgpu_resume_phase2(adev);
 			if (r)
 				goto out;
+			if (vram_lost)
+				amdgpu_fill_reset_magic(adev);
 		}
 	}
 out: