From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 12 Jul 2018 14:31:25 +0200 Subject: drm/amdgpu: fix TTM move entity init order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Git-commit: b7d85e1db32ea85b09f58f416da48f44285ff41f Patch-mainline: v4.19-rc1 References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166 We are initializing the entity before the scheduler is actually initialized. This can lead to all kind of problem, but especially NULL pointer deref because of Nayan's scheduler work. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher Acked-by: Petr Tesarik --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 16 deletions(-) --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -104,8 +104,6 @@ static void amdgpu_ttm_mem_global_releas static int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; - struct amdgpu_ring *ring; - struct drm_sched_rq *rq; int r; /* ensure reference is false in case init fails */ @@ -138,21 +136,10 @@ static int amdgpu_ttm_global_init(struct mutex_init(&adev->mman.gtt_window_lock); - ring = adev->mman.buffer_funcs_ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, - rq, NULL); - if (r) { - DRM_ERROR("Failed setting up TTM BO move run queue.\n"); - goto error_entity; - } - adev->mman.mem_global_referenced = true; return 0; -error_entity: - drm_global_item_unref(&adev->mman.bo_global_ref.ref); error_bo: drm_global_item_unref(&adev->mman.mem_global_ref); error_mem: @@ -162,8 +149,6 @@ error_mem: static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) { if (adev->mman.mem_global_referenced) { - drm_sched_entity_destroy(adev->mman.entity.sched, - &adev->mman.entity); mutex_destroy(&adev->mman.gtt_window_lock); drm_global_item_unref(&adev->mman.bo_global_ref.ref); drm_global_item_unref(&adev->mman.mem_global_ref); @@ -1922,10 +1907,30 @@ void amdgpu_ttm_set_buffer_funcs_status( { struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM]; uint64_t size; + int r; - if (!adev->mman.initialized || adev->in_gpu_reset) + if (!adev->mman.initialized || adev->in_gpu_reset || + adev->mman.buffer_funcs_enabled == enable) return; + if (enable) { + struct amdgpu_ring *ring; + struct drm_sched_rq *rq; + + ring = adev->mman.buffer_funcs_ring; + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, + rq, NULL); + if (r) { + DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", + r); + return; + } + } else { + drm_sched_entity_destroy(adev->mman.entity.sched, + &adev->mman.entity); + } + /* this just adjusts TTM size idea, which sets lpfn to the correct value */ if (enable) size = adev->gmc.real_vram_size;