From: Leo Liu Date: Wed, 21 Dec 2016 13:21:52 -0500 Subject: drm/amdgpu: add initial vcn support and decode tests Git-commit: 95d0906f8506550a7c4a59c770732e7de912cffc Patch-mainline: v4.13-rc1 References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166 VCN is the new media block on Raven. Add core support and the ring and ib tests for decode. Signed-off-by: Leo Liu Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Acked-by: Petr Tesarik --- drivers/gpu/drm/amd/amdgpu/Makefile | 4 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 42 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 425 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 35 ++ 4 files changed, 501 insertions(+), 5 deletions(-) --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -93,6 +93,10 @@ amdgpu-y += \ vce_v3_0.o \ vce_v4_0.o +# add VCN block +amdgpu-y += \ + amdgpu_vcn.o + # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1181,6 +1181,31 @@ void amdgpu_wb_free_64bit(struct amdgpu_ void amdgpu_get_pcie_info(struct amdgpu_device *adev); /* + * VCN + */ +#define AMDGPU_VCN_STACK_SIZE (200*1024) +#define AMDGPU_VCN_HEAP_SIZE (256*1024) +#define AMDGPU_VCN_SESSION_SIZE (50*1024) +#define AMDGPU_VCN_FIRMWARE_OFFSET 256 +#define AMDGPU_VCN_MAX_ENC_RINGS 3 + +struct amdgpu_vcn { + struct amdgpu_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + unsigned fw_version; + void *saved_bo; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ + struct amdgpu_ring ring_dec; + struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; + struct amdgpu_irq_src irq; + struct amd_sched_entity entity_dec; + struct amd_sched_entity entity_enc; + uint32_t srbm_soft_reset; +}; + +/* * SDMA */ struct amdgpu_sdma_instance { @@ -1572,11 +1597,18 @@ struct amdgpu_device { /* sdma */ struct amdgpu_sdma sdma; - /* uvd */ - struct amdgpu_uvd uvd; - - /* vce */ - struct amdgpu_vce vce; + union { + struct { + /* uvd */ + struct amdgpu_uvd uvd; + + /* vce */ + struct amdgpu_vce vce; + }; + + /* vcn */ + struct amdgpu_vcn vcn; + }; /* firmwares */ struct amdgpu_firmware firmware; --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -0,0 +1,425 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include +#include +#include +#include + +#include "amdgpu.h" +#include "amdgpu_pm.h" +#include "amdgpu_vcn.h" +#include "soc15d.h" +#include "soc15_common.h" + +#include "vega10/soc15ip.h" +#include "raven1/VCN/vcn_1_0_offset.h" + +/* 1 second timeout */ +#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) + +/* Firmware Names */ +#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" + +MODULE_FIRMWARE(FIRMWARE_RAVEN); + +static void amdgpu_vcn_idle_work_handler(struct work_struct *work); + +int amdgpu_vcn_sw_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; + unsigned long bo_size; + const char *fw_name; + const struct common_firmware_header *hdr; + unsigned version_major, version_minor, family_id; + int r; + + INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + + switch (adev->asic_type) { + case CHIP_RAVEN: + fw_name = FIRMWARE_RAVEN; + break; + default: + return -EINVAL; + } + + r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); + if (r) { + dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", + fw_name); + return r; + } + + r = amdgpu_ucode_validate(adev->vcn.fw); + if (r) { + dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", + fw_name); + release_firmware(adev->vcn.fw); + adev->vcn.fw = NULL; + return r; + } + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + family_id = le32_to_cpu(hdr->ucode_version) & 0xff; + version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; + version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + + + bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE + + AMDGPU_VCN_SESSION_SIZE * 40; + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, + &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } + + ring = &adev->vcn.ring_dec; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up VCN dec run queue.\n"); + return r; + } + + return 0; +} + +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) +{ + kfree(adev->vcn.saved_bo); + + amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); + + amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, + &adev->vcn.gpu_addr, + (void **)&adev->vcn.cpu_addr); + + amdgpu_ring_fini(&adev->vcn.ring_dec); + + release_firmware(adev->vcn.fw); + + return 0; +} + +int amdgpu_vcn_suspend(struct amdgpu_device *adev) +{ + unsigned size; + void *ptr; + + if (adev->vcn.vcpu_bo == NULL) + return 0; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + size = amdgpu_bo_size(adev->vcn.vcpu_bo); + ptr = adev->vcn.cpu_addr; + + adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->vcn.saved_bo) + return -ENOMEM; + + memcpy_fromio(adev->vcn.saved_bo, ptr, size); + + return 0; +} + +int amdgpu_vcn_resume(struct amdgpu_device *adev) +{ + unsigned size; + void *ptr; + + if (adev->vcn.vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.vcpu_bo); + ptr = adev->vcn.cpu_addr; + + if (adev->vcn.saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.saved_bo, size); + kfree(adev->vcn.saved_bo); + adev->vcn.saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + memset_io(ptr, 0, size); + } + + return 0; +} + +static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, + bool direct, struct dma_fence **fence) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + struct amdgpu_device *adev = ring->adev; + uint64_t addr; + int i, r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto err; + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) + goto err; + + ib = &job->ibs[0]; + addr = amdgpu_bo_gpu_offset(bo); + ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); + ib->ptr[1] = addr; + ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); + ib->ptr[3] = addr >> 32; + ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); + ib->ptr[5] = 0; + for (i = 6; i < 16; i += 2) { + ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); + ib->ptr[i+1] = 0; + } + ib->length_dw = 16; + + if (direct) { + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err_free; + + amdgpu_job_free(job); + } else { + r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec, + AMDGPU_FENCE_OWNER_UNDEFINED, &f); + if (r) + goto err_free; + } + + ttm_eu_fence_buffer_objects(&ticket, &head, f); + + if (fence) + *fence = dma_fence_get(f); + amdgpu_bo_unref(&bo); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + +err: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + +static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_bo *bo; + uint32_t *msg; + int r, i; + + r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, &bo); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&msg); + if (r) { + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; + } + + /* stitch together an vcn create msg */ + msg[0] = cpu_to_le32(0x00000de4); + msg[1] = cpu_to_le32(0x00000000); + msg[2] = cpu_to_le32(handle); + msg[3] = cpu_to_le32(0x00000000); + msg[4] = cpu_to_le32(0x00000000); + msg[5] = cpu_to_le32(0x00000000); + msg[6] = cpu_to_le32(0x00000000); + msg[7] = cpu_to_le32(0x00000780); + msg[8] = cpu_to_le32(0x00000440); + msg[9] = cpu_to_le32(0x00000000); + msg[10] = cpu_to_le32(0x01b37000); + for (i = 11; i < 1024; ++i) + msg[i] = cpu_to_le32(0x0); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); +} + +static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_bo *bo; + uint32_t *msg; + int r, i; + + r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, &bo); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&msg); + if (r) { + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; + } + + /* stitch together an vcn destroy msg */ + msg[0] = cpu_to_le32(0x00000de4); + msg[1] = cpu_to_le32(0x00000002); + msg[2] = cpu_to_le32(handle); + msg[3] = cpu_to_le32(0x00000000); + for (i = 4; i < 1024; ++i) + msg[i] = cpu_to_le32(0x0); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); +} + +static void amdgpu_vcn_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vcn.idle_work.work); + unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + + if (fences == 0) { + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + } + } else { + schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + } +} + +void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (set_clocks) { + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, true); + } else { + amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); + } + } +} + +void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) +{ + schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); +} + +int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence; + long r; + + r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); + if (r) { + DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + goto error; + } + + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } + + dma_fence_put(fence); + +error: + return r; +} --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_VCN_H__ +#define __AMDGPU_VCN_H__ + +int amdgpu_vcn_sw_init(struct amdgpu_device *adev); +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); +int amdgpu_vcn_suspend(struct amdgpu_device *adev); +int amdgpu_vcn_resume(struct amdgpu_device *adev); +void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); +int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +#endif