From aeb5399470e34cbf6942cd1f28d601b99d9ba46e Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 29 Jul 2022 14:17:06 +0000 Subject: [PATCH 1/3] WIP: amdgpu: more diagnostics about ring seq on timeout --- sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c | 4 ++++ sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c index f66737abee9b..dc951b76a577 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c @@ -259,6 +259,10 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) if (unlikely(seq == last_seq)) return false; + device_printf(adev->dev, "[%p] ring %u (%s) seq %"PRIu32 + " -> %"PRIu32"\n", + __builtin_return_address(0), ring->idx, ring->name, last_seq, seq); + last_seq &= drv->num_fences_mask; seq &= drv->num_fences_mask; diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c index 863270a4bc4c..36cce376ae92 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c @@ -48,9 +48,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) } amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); - DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", + DRM_ERROR("ring %u (%s) %s timeout, signaled seq=%u, emitted seq=%u cpu_addr=%p last_seq=%"PRIu32"\n", + ring->idx, ring->name, job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); + ring->fence_drv.sync_seq, + ring->fence_drv.cpu_addr, + (ring->fence_drv.cpu_addr ? le32_to_cpu(*ring->fence_drv.cpu_addr) : 0)); DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", ti.process_name, ti.tgid, ti.task_name, ti.pid); From 899f801afca0efdf054c443cdf78bd204274f00b Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 29 Jul 2022 18:57:44 +0000 Subject: [PATCH 2/3] WIP: amdgpu: trace gfx ring wptr --- .../bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c | 10 ++++++++++ .../bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c index 6de72244f165..45e248071fdb 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c @@ -877,6 +877,16 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) return r; } + device_printf(adev->dev, + "wb_obj=%p mem_type=0x%x placement=0x%x kmap_type=0x%x pa=0x%llx gpu_addr=0x%llx wb=%p\n", + adev->wb.wb_obj, + adev->wb.wb_obj->tbo.mem.mem_type, + adev->wb.wb_obj->tbo.mem.placement, + (int)adev->wb.wb_obj->kmap.bo_kmap_type, + (unsigned long long)page_to_phys(adev->wb.wb_obj->tbo.ttm->pages[0]), + (unsigned long long)adev->wb.gpu_addr, + adev->wb.wb); + adev->wb.num_wb = AMDGPU_MAX_WB; memset(&adev->wb.used, 0, sizeof(adev->wb.used)); diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c index 56407a92cd47..d45983443b0a 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c @@ -2015,6 +2015,12 @@ static int gfx_v8_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0; } + device_printf(adev->dev, "%s ring @ %p:" + " %s doorbell 0x%"PRIx32"\n", + ring->name, ring, + (ring->use_doorbell ? "use" : "don't use"), + ring->doorbell_index); + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) @@ -6030,6 +6036,10 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + device_printf(adev->dev, + "%s ring @ %p: write wptr @ %p: 0x%"PRIx64"\n", + ring->name, ring, &adev->wb.wb[ring->wptr_offs], ring->wptr); + if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); From 243d934f6317f455573270a6266616d18c05316a Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sat, 30 Jul 2022 17:26:40 +0000 Subject: [PATCH 3/3] amdgpu: Zero-initialize ih ring. --- sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c index d2caee8524e8..92963c95b41e 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c @@ -96,6 +96,13 @@ fail2: bus_dmamap_destroy(adev->ddev->dmat, ih->ring_map); fail3: __unused bus_dmamem_unmap(adev->ddev->dmat, kva, size); goto fail2; } + memset(kva, 0, size); + /* + * bus_dmamap_sync is probably not necessary here -- or + * if it is necessary, we need a bunch more elsewhere. + */ + bus_dmamap_sync(adev->ddev->dmat, ih->ring_map, 0, size, + BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); ih->ring = kva; dma_addr = ih->ring_map->dm_segs[0].ds_addr; #else