From: Felix Kuehling Date: Thu, 15 Mar 2018 17:27:50 -0400 Subject: drm/amdkfd: Add TC flush on VMID deallocation for Hawaii MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Git-commit: 552764b680a65d6069ad651b356d5465082939d0 Patch-mainline: v4.17-rc1 References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166 On GFX7 the CP does not perform a TC flush when queues are unmapped. To avoid TC eviction from accessing an invalid VMID, flush it explicitly before releasing a VMID. v2: Fix unnecessary list_for_each_entry_safe v3: Moved allocation to kfd_process_device_init_vm Signed-off-by: Amber Lin Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay Acked-by: Petr Tesarik --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 22 ++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 37 ++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 34 ++++++++++++++++ 4 files changed, 95 insertions(+), 1 deletion(-) --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -142,12 +142,31 @@ static int allocate_vmid(struct device_q return 0; } +static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, + struct qcm_process_device *qpd) +{ + uint32_t len; + + if (!qpd->ib_kaddr) + return -ENOMEM; + + len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + + return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, + qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); +} + static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; + /* On GFX v7, CP doesn't flush TC at dequeue */ + if (q->device->device_info->asic_family == CHIP_HAWAII) + if (flush_texture_cache_nocpsch(q->device, qpd)) + pr_err("Failed to flush TC\n"); + kfd_flush_tlb(qpd_to_pdd(qpd)); /* Release the vmid mapping */ @@ -792,11 +811,12 @@ static void uninitialize(struct device_q static int start_nocpsch(struct device_queue_manager *dqm) { init_interrupts(dqm); - return 0; + return pm_init(&dqm->packets, dqm); } static int stop_nocpsch(struct device_queue_manager *dqm) { + pm_uninit(&dqm->packets); return 0; } --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -355,6 +355,43 @@ static int pm_create_runlist_ib(struct p return retval; } +/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size + * of this packet + * @gpu_addr - GPU address of the packet. It's a virtual address. + * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer + * Return - length of the packet + */ +uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + WARN_ON(!buffer); + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(*packet)); + + packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, + sizeof(*packet)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + packet->bitfields2.atc = 0; + + packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel___release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return sizeof(*packet) / sizeof(unsigned int); +} + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { pm->dqm = dqm; --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -494,6 +494,7 @@ struct qcm_process_device { /* IB memory */ uint64_t ib_base; + void *ib_kaddr; }; /* KFD Memory Eviction */ @@ -834,6 +835,8 @@ int pm_send_unmap_queue(struct packet_ma void pm_release_ib(struct packet_manager *pm); +uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); + uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -149,6 +149,36 @@ err_alloc_mem: return err; } +/* kfd_process_device_reserve_ib_mem - Reserve memory inside the + * process for IB usage The memory reserved is for KFD to submit + * IB to AMDGPU from kernel. If the memory is reserved + * successfully, ib_kaddr will have the CPU/kernel + * address. Check ib_kaddr before accessing the memory. + */ +static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) +{ + struct qcm_process_device *qpd = &pdd->qpd; + uint32_t flags = ALLOC_MEM_FLAGS_GTT | + ALLOC_MEM_FLAGS_NO_SUBSTITUTE | + ALLOC_MEM_FLAGS_WRITABLE | + ALLOC_MEM_FLAGS_EXECUTABLE; + void *kaddr; + int ret; + + if (qpd->ib_kaddr || !qpd->ib_base) + return 0; + + /* ib_base is only set for dGPU */ + ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags, + &kaddr); + if (ret) + return ret; + + qpd->ib_kaddr = kaddr; + + return 0; +} + struct kfd_process *kfd_create_process(struct file *filep) { struct kfd_process *process; @@ -610,6 +640,9 @@ int kfd_process_device_init_vm(struct kf return ret; } + ret = kfd_process_device_reserve_ib_mem(pdd); + if (ret) + goto err_reserve_ib_mem; ret = kfd_process_device_init_cwsr_dgpu(pdd); if (ret) goto err_init_cwsr; @@ -619,6 +652,7 @@ int kfd_process_device_init_vm(struct kf return 0; err_init_cwsr: +err_reserve_ib_mem: kfd_process_device_free_bos(pdd); if (!drm_file) dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);