Blob Blame History Raw
From f767e5d9f930ed6fce4756d6f16fb996a078b6f7 Mon Sep 17 00:00:00 2001
From: Hua Zhang <hua.zhang@amd.com>
Date: Thu, 14 May 2020 15:47:30 +0800
Subject: drm/amd/powerplay: skip smu_i2c_eeprom_init/fini under sriov mode
Git-commit: a16be2fe145517897e4bc558d249d9fb54053085
Patch-mainline: v5.9-rc1
References: jsc#SLE-12680, jsc#SLE-12880, jsc#SLE-12882, jsc#SLE-12883, jsc#SLE-13496, jsc#SLE-15322

When smu_i2c_eeprom_init is called on the smu resuming process
under sroiv mode, there will be a call trace:
[  436.377690]  dump_stack+0x63/0x85
[  436.377695]  kobject_init+0x77/0x90
[  436.377704]  device_initialize+0x28/0x110
[  436.377708]  device_register+0x12/0x20
[  436.377756]  i2c_register_adapter+0xeb/0x400
[  436.377763]  i2c_add_adapter+0x5a/0x80
[  436.377951]  arcturus_i2c_eeprom_control_init+0x60/0x80 [amdgpu]
[  436.378123]  smu_resume+0xcc/0x110 [amdgpu]
[  436.378247]  amdgpu_device_gpu_recover+0xfb1/0xfc0 [amdgpu]
[  436.378401]  amdgpu_job_timedout+0xf2/0x150 [amdgpu]
[  436.378414]  drm_sched_job_timedout+0x70/0xc0 [amd_sched]
[  436.378420]  ? drm_sched_job_timedout+0x70/0xc0 [amd_sched]
[  436.378430]  process_one_work+0x1fd/0x3f0
[  436.378438]  worker_thread+0x34/0x410
[  436.378444]  kthread+0x121/0x140
[  436.378451]  ? process_one_work+0x3f0/0x3f0
[  436.378456]  ? kthread_create_worker_on_cpu+0x70/0x70
[  436.378464]  ret_from_fork+0x35/0x40

This is because smu_i2c_eeprom is not released on gpu recovering.
Actually, smu_i2c_eeprom_init/fini are only needed under bare
mental mode.

Signed-off-by: Hua Zhang <hua.zhang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 24 +++++++++++++---------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 8017386d3c33..a78a1f542ea9 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1363,9 +1363,11 @@ static int smu_hw_init(void *handle)
 	if (ret)
 		goto failed;
 
-	ret = smu_i2c_eeprom_init(smu, &adev->pm.smu_i2c);
-	if (ret)
-		goto failed;
+	if (!amdgpu_sriov_vf(adev)) {
+		ret = smu_i2c_eeprom_init(smu, &adev->pm.smu_i2c);
+		if (ret)
+			goto failed;
+	}
 
 	adev->pm.dpm_enabled = true;
 
@@ -1406,9 +1408,9 @@ static int smu_hw_fini(void *handle)
 
 	adev->pm.dpm_enabled = false;
 
-	smu_i2c_eeprom_fini(smu, &adev->pm.smu_i2c);
-
 	if (!amdgpu_sriov_vf(adev)){
+		smu_i2c_eeprom_fini(smu, &adev->pm.smu_i2c);
+
 		ret = smu_stop_thermal_control(smu);
 		if (ret) {
 			pr_warn("Fail to stop thermal control!\n");
@@ -1549,9 +1551,9 @@ static int smu_suspend(void *handle)
 
 	adev->pm.dpm_enabled = false;
 
-	smu_i2c_eeprom_fini(smu, &adev->pm.smu_i2c);
+	if (!amdgpu_sriov_vf(adev)) {
+		smu_i2c_eeprom_fini(smu, &adev->pm.smu_i2c);
 
-	if(!amdgpu_sriov_vf(adev)) {
 		ret = smu_disable_dpm(smu);
 		if (ret)
 			return ret;
@@ -1596,9 +1598,11 @@ static int smu_resume(void *handle)
 	if (ret)
 		goto failed;
 
-	ret = smu_i2c_eeprom_init(smu, &adev->pm.smu_i2c);
-	if (ret)
-		goto failed;
+	if (!amdgpu_sriov_vf(adev)) {
+		ret = smu_i2c_eeprom_init(smu, &adev->pm.smu_i2c);
+		if (ret)
+			goto failed;
+	}
 
 	if (smu->is_apu)
 		smu_set_gfx_cgpg(&adev->smu, true);
-- 
2.29.2