Blob Blame History Raw
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 27 Sep 2017 00:09:52 -0400
Subject: drm/amdkfd: Improve process termination handling
Git-commit: 9fd3f1bfae6c6c75f0c8aedb5d499d74cdb52eb9
Patch-mainline: v4.15-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

Separate device queue termination from process queue manager
termination. Unmap all queues at once instead of one at a time.
Unmap device queues before the PASID is unbound, in the
kfd_process_iommu_unbind_callback.

When resetting wavefronts in non-HWS mode, do it before the VMID is
released.

Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: shaoyun liu <shaoyun.liu@amd.com>
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  185 +++++++++++++----
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |    5 
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                  |   18 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c               |   36 ---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   37 ++-
 5 files changed, 200 insertions(+), 81 deletions(-)

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -296,65 +296,73 @@ out_deallocate_hqd:
 	return retval;
 }
 
-static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
+ * to avoid asynchronized access
+ */
+static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q)
 {
 	int retval;
 	struct mqd_manager *mqd;
 
-	retval = 0;
-
-	mutex_lock(&dqm->lock);
+	mqd = dqm->ops.get_mqd_manager(dqm,
+		get_mqd_type_from_queue_type(q->properties.type));
+	if (!mqd)
+		return -ENOMEM;
 
 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
-		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
-		if (mqd == NULL) {
-			retval = -ENOMEM;
-			goto out;
-		}
 		deallocate_hqd(dqm, q);
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
-		if (mqd == NULL) {
-			retval = -ENOMEM;
-			goto out;
-		}
 		dqm->sdma_queue_count--;
 		deallocate_sdma_queue(dqm, q->sdma_id);
 	} else {
 		pr_debug("q->properties.type %d is invalid\n",
 				q->properties.type);
-		retval = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
+	dqm->total_queue_count--;
 
 	retval = mqd->destroy_mqd(mqd, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 				KFD_UNMAP_LATENCY_MS,
 				q->pipe, q->queue);
-
-	if (retval)
-		goto out;
+	if (retval == -ETIME)
+		qpd->reset_wavefronts = true;
 
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
 	list_del(&q->list);
-	if (list_empty(&qpd->queues_list))
+	if (list_empty(&qpd->queues_list)) {
+		if (qpd->reset_wavefronts) {
+			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
+					dqm->dev);
+			/* dbgdev_wave_reset_wavefronts has to be called before
+			 * deallocate_vmid(), i.e. when vmid is still in use.
+			 */
+			dbgdev_wave_reset_wavefronts(dqm->dev,
+					qpd->pqm->process);
+			qpd->reset_wavefronts = false;
+		}
+
 		deallocate_vmid(dqm, qpd, q);
+	}
 	if (q->properties.is_active)
 		dqm->queue_count--;
 
-	/*
-	 * Unconditionally decrement this counter, regardless of the queue's
-	 * type
-	 */
-	dqm->total_queue_count--;
-	pr_debug("Total of %d queues are accountable so far\n",
-			dqm->total_queue_count);
+	return retval;
+}
 
-out:
+static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int retval;
+
+	mutex_lock(&dqm->lock);
+	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
 	mutex_unlock(&dqm->lock);
+
 	return retval;
 }
 
@@ -921,10 +929,7 @@ static int unmap_queues_cpsch(struct dev
 				enum kfd_unmap_queues_filter filter,
 				uint32_t filter_param)
 {
-	int retval;
-	struct kfd_process_device *pdd;
-
-	retval = 0;
+	int retval = 0;
 
 	if (!dqm->active_runlist)
 		return retval;
@@ -948,12 +953,9 @@ static int unmap_queues_cpsch(struct dev
 	/* should be timed out */
 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
-	if (retval) {
-		pdd = kfd_get_process_device_data(dqm->dev,
-				kfd_get_process(current));
-		pdd->reset_wavefronts = true;
+	if (retval)
 		return retval;
-	}
+
 	pm_release_ib(&dqm->packets);
 	dqm->active_runlist = false;
 
@@ -1015,7 +1017,10 @@ static int destroy_queue_cpsch(struct de
 	if (q->properties.is_active)
 		dqm->queue_count--;
 
-	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+	retval = execute_queues_cpsch(dqm,
+				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+	if (retval == -ETIME)
+		qpd->reset_wavefronts = true;
 
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
@@ -1105,6 +1110,108 @@ out:
 	return retval;
 }
 
+static int process_termination_nocpsch(struct device_queue_manager *dqm,
+		struct qcm_process_device *qpd)
+{
+	struct queue *q, *next;
+	struct device_process_node *cur, *next_dpn;
+	int retval = 0;
+
+	mutex_lock(&dqm->lock);
+
+	/* Clear all user mode queues */
+	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+		int ret;
+
+		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
+		if (ret)
+			retval = ret;
+	}
+
+	/* Unregister process */
+	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
+		if (qpd == cur->qpd) {
+			list_del(&cur->list);
+			kfree(cur);
+			dqm->processes_count--;
+			break;
+		}
+	}
+
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+
+static int process_termination_cpsch(struct device_queue_manager *dqm,
+		struct qcm_process_device *qpd)
+{
+	int retval;
+	struct queue *q, *next;
+	struct kernel_queue *kq, *kq_next;
+	struct mqd_manager *mqd;
+	struct device_process_node *cur, *next_dpn;
+	enum kfd_unmap_queues_filter filter =
+		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
+
+	retval = 0;
+
+	mutex_lock(&dqm->lock);
+
+	/* Clean all kernel queues */
+	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
+		list_del(&kq->list);
+		dqm->queue_count--;
+		qpd->is_debug = false;
+		dqm->total_queue_count--;
+		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
+	}
+
+	/* Clear all user mode queues */
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+			dqm->sdma_queue_count--;
+
+		if (q->properties.is_active)
+			dqm->queue_count--;
+
+		dqm->total_queue_count--;
+	}
+
+	/* Unregister process */
+	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
+		if (qpd == cur->qpd) {
+			list_del(&cur->list);
+			kfree(cur);
+			dqm->processes_count--;
+			break;
+		}
+	}
+
+	retval = execute_queues_cpsch(dqm, filter, 0);
+	if (retval || qpd->reset_wavefronts) {
+		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
+		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
+		qpd->reset_wavefronts = false;
+	}
+
+	/* lastly, free mqd resources */
+	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+		mqd = dqm->ops.get_mqd_manager(dqm,
+			get_mqd_type_from_queue_type(q->properties.type));
+		if (!mqd) {
+			retval = -ENOMEM;
+			goto out;
+		}
+		list_del(&q->list);
+		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+	}
+
+out:
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 {
 	struct device_queue_manager *dqm;
@@ -1133,6 +1240,7 @@ struct device_queue_manager *device_queu
 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		dqm->ops.process_termination = process_termination_cpsch;
 		break;
 	case KFD_SCHED_POLICY_NO_HWS:
 		/* initialize dqm for no cp scheduling */
@@ -1147,6 +1255,7 @@ struct device_queue_manager *device_queu
 		dqm->ops.initialize = initialize_nocpsch;
 		dqm->ops.uninitialize = uninitialize;
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		dqm->ops.process_termination = process_termination_nocpsch;
 		break;
 	default:
 		pr_err("Invalid scheduling policy %d\n", sched_policy);
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -77,6 +77,8 @@ struct device_process_node {
  * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
  * memory apertures.
  *
+ * @process_termination: Clears all process queues belongs to that device.
+ *
  */
 
 struct device_queue_manager_ops {
@@ -120,6 +122,9 @@ struct device_queue_manager_ops {
 					   enum cache_policy alternate_policy,
 					   void __user *alternate_aperture_base,
 					   uint64_t alternate_aperture_size);
+
+	int (*process_termination)(struct device_queue_manager *dqm,
+			struct qcm_process_device *qpd);
 };
 
 struct device_queue_manager_asic_ops {
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -421,6 +421,12 @@ struct qcm_process_device {
 	unsigned int queue_count;
 	unsigned int vmid;
 	bool is_debug;
+
+	/* This flag tells if we should reset all wavefronts on
+	 * process termination
+	 */
+	bool reset_wavefronts;
+
 	/*
 	 * All the memory management data should be here too
 	 */
@@ -454,6 +460,8 @@ struct kfd_process_device {
 	/* The device that owns this data. */
 	struct kfd_dev *dev;
 
+	/* The process that owns this kfd_process_device. */
+	struct kfd_process *process;
 
 	/* per-process-per device QCM data structure */
 	struct qcm_process_device qpd;
@@ -469,10 +477,12 @@ struct kfd_process_device {
 	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
 	enum kfd_pdd_bound bound;
 
-	/* This flag tells if we should reset all
-	 * wavefronts on process termination
+	/* Flag used to tell the pdd has dequeued from the dqm.
+	 * This is used to prevent dev->dqm->ops.process_termination() from
+	 * being called twice when it is already called in IOMMU callback
+	 * function.
 	 */
-	bool reset_wavefronts;
+	bool already_dequeued;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -659,6 +669,8 @@ struct process_queue_node {
 	struct list_head process_queue_list;
 };
 
+void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
+void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
 void pqm_uninit(struct process_queue_manager *pqm);
 int pqm_create_queue(struct process_queue_manager *pqm,
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -171,9 +171,6 @@ static void kfd_process_wq_release(struc
 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
 				pdd->dev->id, p->pasid);
 
-		if (pdd->reset_wavefronts)
-			dbgdev_wave_reset_wavefronts(pdd->dev, p);
-
 		if (pdd->bound == PDD_BOUND)
 			amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
 
@@ -237,24 +234,17 @@ static void kfd_process_notifier_release
 
 	mutex_lock(&p->mutex);
 
-	/* In case our notifier is called before IOMMU notifier */
+	kfd_process_dequeue_from_all_devices(p);
 	pqm_uninit(&p->pqm);
 
 	/* Iterate over all process device data structure and check
-	 * if we should delete debug managers and reset all wavefronts
+	 * if we should delete debug managers
 	 */
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
 		if ((pdd->dev->dbgmgr) &&
 				(pdd->dev->dbgmgr->pasid == p->pasid))
 			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
 
-		if (pdd->reset_wavefronts) {
-			pr_warn("Resetting all wave fronts\n");
-			dbgdev_wave_reset_wavefronts(pdd->dev, p);
-			pdd->reset_wavefronts = false;
-		}
-	}
-
 	mutex_unlock(&p->mutex);
 
 	/*
@@ -368,8 +358,9 @@ struct kfd_process_device *kfd_create_pr
 		INIT_LIST_HEAD(&pdd->qpd.queues_list);
 		INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
 		pdd->qpd.dqm = dev->dqm;
-		pdd->reset_wavefronts = false;
+		pdd->process = p;
 		pdd->bound = PDD_UNBOUND;
+		pdd->already_dequeued = false;
 		list_add(&pdd->per_device_list, &p->per_device_data);
 	}
 
@@ -498,19 +489,12 @@ void kfd_process_iommu_unbind_callback(s
 	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
 		kfd_dbgmgr_destroy(dev->dbgmgr);
 
-	pqm_uninit(&p->pqm);
-
 	pdd = kfd_get_process_device_data(dev, p);
-
-	if (!pdd) {
-		mutex_unlock(&p->mutex);
-		return;
-	}
-
-	if (pdd->reset_wavefronts) {
-		dbgdev_wave_reset_wavefronts(pdd->dev, p);
-		pdd->reset_wavefronts = false;
-	}
+	if (pdd)
+		/* For GPU relying on IOMMU, we need to dequeue here
+		 * when PASID is still bound.
+		 */
+		kfd_process_dequeue_from_device(pdd);
 
 	mutex_unlock(&p->mutex);
 }
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -63,6 +63,25 @@ static int find_available_queue_slot(str
 	return 0;
 }
 
+void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
+{
+	struct kfd_dev *dev = pdd->dev;
+
+	if (pdd->already_dequeued)
+		return;
+
+	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+	pdd->already_dequeued = true;
+}
+
+void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+		kfd_process_dequeue_from_device(pdd);
+}
+
 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 {
 	INIT_LIST_HEAD(&pqm->queues);
@@ -78,21 +97,14 @@ int pqm_init(struct process_queue_manage
 
 void pqm_uninit(struct process_queue_manager *pqm)
 {
-	int retval;
 	struct process_queue_node *pqn, *next;
 
 	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
-		retval = pqm_destroy_queue(
-				pqm,
-				(pqn->q != NULL) ?
-					pqn->q->properties.queue_id :
-					pqn->kq->queue->properties.queue_id);
-
-		if (retval != 0) {
-			pr_err("failed to destroy queue\n");
-			return;
-		}
+		uninit_queue(pqn->q);
+		list_del(&pqn->process_queue_list);
+		kfree(pqn);
 	}
+
 	kfree(pqm->queue_slot_bitmap);
 	pqm->queue_slot_bitmap = NULL;
 }
@@ -290,9 +302,6 @@ int pqm_destroy_queue(struct process_que
 	if (pqn->q) {
 		dqm = pqn->q->device->dqm;
 		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
-		if (retval != 0)
-			return retval;
-
 		uninit_queue(pqn->q);
 	}