Blob Blame History Raw
From dd88c66f36eb8cd3fc3bc31f6317cbbb19480334 Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Mon, 6 Jun 2022 13:06:30 +0530
Subject: drm/amd/amdgpu: Enable high priority gfx queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Git-commit: b07d1d73b09ef40e91ace51a2e167391676a8175
Patch-mainline: v6.0-rc1
References: jsc#PED-1166 jsc#PED-1168 jsc#PED-1170 jsc#PED-1218 jsc#PED-1220 jsc#PED-1222 jsc#PED-1223 jsc#PED-1225 jsc#PED-2849

Starting from SIENNA CICHLID asic supports two gfx pipes, enabling
two graphics queues, 1 on each pipe, pipe0 queue0 would be the normal
piority queue and pipe1 queue0 would be the high priority queue

Only one queue per pipe is visble to SPI, SPI looks at the priority
value assigned to CP_GFX_HQD_QUEUE_PRIORITY from each of the queue's
HQD/MQD.

Create contexts applying AMDGPU_CTX_PRIORITY_HIGH which submits job
to the high priority queue on GFX pipe1. There would be starvation
of LP workload if HP workload is always available.

v2:
  - remove unnecessary check(Nirmoy)
  - make pipe1 hardware support a separate patch(Nirmoy)
  - remove duplicate code(Shashank)
  - add CSA support for second gfx pipe(Alex)

v3(Christian):
  - fix incorrect indentation
  - merge COMPUTE and GFX switch cases as both calls the same function.

v4:
  - rebase w/ latest code base

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Patrik Jakobsson <pjakobsson@suse.de>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  |  7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 55 +++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 12 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 32 +++++++++++---
 5 files changed, 81 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 7dc92ef36b2b..2ef5296216d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -110,7 +110,7 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 	return -EACCES;
 }
 
-static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio)
+static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
 {
 	switch (prio) {
 	case AMDGPU_CTX_PRIORITY_HIGH:
@@ -143,8 +143,9 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
 			ctx->init_priority : ctx->override_priority;
 
 	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
 	case AMDGPU_HW_IP_COMPUTE:
-		hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
+		hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
 		break;
 	case AMDGPU_HW_IP_VCE:
 	case AMDGPU_HW_IP_VCN_ENC:
@@ -779,7 +780,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
 				      amdgpu_ctx_to_drm_sched_prio(priority));
 
 	/* set hw priority */
-	if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
+	if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
 		hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
 		hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
 		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 16699158e00d..00c69f0a9f52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -142,7 +142,12 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
 	}
 }
 
-static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
+static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
+{
+	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
+}
+
+static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
 {
 	if (amdgpu_compute_multipipe != -1) {
 		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
@@ -158,6 +163,28 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
 	return adev->gfx.mec.num_mec > 1;
 }
 
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+						struct amdgpu_ring *ring)
+{
+	int queue = ring->queue;
+	int pipe = ring->pipe;
+
+	/* Policy: use pipe1 queue0 as high priority graphics queue if we
+	 * have more than one gfx pipe.
+	 */
+	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
+	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
+		int me = ring->me;
+		int bit;
+
+		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
+		if (ring == &adev->gfx.gfx_ring[bit])
+			return true;
+	}
+
+	return false;
+}
+
 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 					       struct amdgpu_ring *ring)
 {
@@ -174,7 +201,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
 	int i, queue, pipe;
-	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
 	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
 				     adev->gfx.mec.num_queue_per_pipe,
 				     adev->gfx.num_compute_rings);
@@ -200,18 +227,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 
 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 {
-	int i, queue, me;
-
-	for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
-		queue = i % adev->gfx.me.num_queue_per_pipe;
-		me = (i / adev->gfx.me.num_queue_per_pipe)
-		      / adev->gfx.me.num_pipe_per_me;
+	int i, queue, pipe;
+	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
+	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
+					adev->gfx.me.num_queue_per_pipe;
 
-		if (me >= adev->gfx.me.num_me)
-			break;
+	if (multipipe_policy) {
 		/* policy: amdgpu owns the first queue per pipe at this stage
 		 * will extend to mulitple queues per pipe later */
-		if (me == 0 && queue < 1)
+		for (i = 0; i < max_queues_per_me; i++) {
+			pipe = i % adev->gfx.me.num_pipe_per_me;
+			queue = (i / adev->gfx.me.num_pipe_per_me) %
+				adev->gfx.me.num_queue_per_pipe;
+
+			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
+					adev->gfx.me.queue_bitmap);
+		}
+	} else {
+		for (i = 0; i < max_queues_per_me; ++i)
 			set_bit(i, adev->gfx.me.queue_bitmap);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..23a696d38390 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -396,6 +396,8 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
 				     int pipe, int queue);
 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 					       struct amdgpu_ring *ring);
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+						struct amdgpu_ring *ring);
 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
 			       int pipe, int queue);
 void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..1fa9edf04022 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -543,12 +543,12 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
 	 */
 	prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
 
-	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
-		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
-			prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
-			prop->hqd_queue_priority =
-				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
-		}
+	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+	    amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
+	    (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+	    amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+		prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+		prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index c5f46d264b23..6831d1eb18bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -53,7 +53,7 @@
  * 2. Async ring
  */
 #define GFX10_NUM_GFX_RINGS_NV1X	1
-#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid	1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid	2
 #define GFX10_MEC_HPD_SIZE	2048
 
 #define F32_CE_PROGRAM_RAM_SIZE		65536
@@ -4711,6 +4711,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
 {
 	struct amdgpu_ring *ring;
 	unsigned int irq_type;
+	unsigned int hw_prio;
 
 	ring = &adev->gfx.gfx_ring[ring_id];
 
@@ -4728,8 +4729,10 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 
 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+	hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
-			     AMDGPU_RING_PRIO_DEFAULT, NULL);
+				hw_prio, NULL);
 }
 
 static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
@@ -6581,6 +6584,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
 	}
 }
 
+static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+					   struct v10_gfx_mqd *mqd,
+					   struct amdgpu_mqd_prop *prop)
+{
+	bool priority = 0;
+	u32 tmp;
+
+	/* set up default queue priority level
+	 * 0x0 = low priority, 0x1 = high priority
+	 */
+	if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+		priority = 1;
+
+	tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+	mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
 static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 				  struct amdgpu_mqd_prop *prop)
 {
@@ -6609,11 +6630,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
 	mqd->cp_gfx_hqd_vmid = 0;
 
-	/* set up default queue priority level
-	 * 0x0 = low priority, 0x1 = high priority */
-	tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
-	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
-	mqd->cp_gfx_hqd_queue_priority = tmp;
+	/* set up gfx queue priority */
+	gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
 
 	/* set up time quantum */
 	tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
-- 
2.38.1