Blob Blame History Raw
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 9 Oct 2017 15:18:43 +0200
Subject: drm/amdgpu: move the VRAM lost counter per context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Git-commit: e55f2b646df3318e24f12b8388ab6e5cccb3e92d
Patch-mainline: v4.15-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

Instead of per device track the VRAM lost per context and return ECANCELED
instead of ENODEV.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |    6 ++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |    9 +++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |    1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   16 ----------------
 4 files changed, 8 insertions(+), 24 deletions(-)

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -732,10 +732,11 @@ struct amdgpu_ctx {
 	struct amdgpu_device    *adev;
 	struct amdgpu_queue_mgr queue_mgr;
 	unsigned		reset_counter;
+	uint32_t		vram_lost_counter;
 	spinlock_t		ring_lock;
 	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
-	bool 			preamble_presented;
+	bool			preamble_presented;
 	enum amd_sched_priority init_priority;
 	enum amd_sched_priority override_priority;
 	struct mutex            lock;
@@ -778,7 +779,6 @@ struct amdgpu_fpriv {
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
-	u32			vram_lost_counter;
 };
 
 /*
@@ -1860,8 +1860,6 @@ static inline bool amdgpu_has_atpx(void)
 extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
 extern const int amdgpu_max_kms_ioctl;
 
-bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
-			  struct amdgpu_fpriv *fpriv);
 int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
 void amdgpu_driver_unload_kms(struct drm_device *dev);
 void amdgpu_driver_lastclose_kms(struct drm_device *dev);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -172,7 +172,11 @@ static int amdgpu_cs_parser_init(struct
 	if (ret)
 		goto free_all_kdata;
 
-	p->job->vram_lost_counter = fpriv->vram_lost_counter;
+	p->job->vram_lost_counter = atomic_read(&p->adev->vram_lost_counter);
+	if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
+		ret = -ECANCELED;
+		goto free_all_kdata;
+	}
 
 	if (p->uf_entry.robj)
 		p->job->uf_addr = uf_offset;
@@ -1203,7 +1207,6 @@ static int amdgpu_cs_submit(struct amdgp
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	union drm_amdgpu_cs *cs = data;
 	struct amdgpu_cs_parser parser = {};
 	bool reserved_buffers = false;
@@ -1211,8 +1214,6 @@ int amdgpu_cs_ioctl(struct drm_device *d
 
 	if (!adev->accel_working)
 		return -EBUSY;
-	if (amdgpu_kms_vram_lost(adev, fpriv))
-		return -ENODEV;
 
 	parser.adev = adev;
 	parser.filp = filp;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -75,6 +75,7 @@ static int amdgpu_ctx_init(struct amdgpu
 	}
 
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	ctx->init_priority = priority;
 	ctx->override_priority = AMD_SCHED_PRIORITY_UNSET;
 
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -790,21 +790,6 @@ void amdgpu_driver_lastclose_kms(struct
 }
 
 /**
- * amdgpu_kms_vram_lost - check if VRAM was lost for this client
- *
- * @adev: amdgpu device
- * @fpriv: client private
- *
- * Check if all CS is blocked for the client because of lost VRAM
- */
-bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
-			  struct amdgpu_fpriv *fpriv)
-{
-	return fpriv->vram_lost_counter !=
-		atomic_read(&adev->vram_lost_counter);
-}
-
-/**
  * amdgpu_driver_open_kms - drm callback for open
  *
  * @dev: drm dev pointer
@@ -860,7 +845,6 @@ int amdgpu_driver_open_kms(struct drm_de
 
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
-	fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	file_priv->driver_priv = fpriv;
 
 out_suspend: