From 472d9c0f763539e036b6a54cee4f710166c74eba Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 29 Jul 2022 21:24:57 +0000 Subject: [PATCH 01/13] WIP: radeon: flail around with r600 dma ring test --- sys/external/bsd/drm2/dist/drm/radeon/radeon_r600_dma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sys/external/bsd/drm2/dist/drm/radeon/radeon_r600_dma.c b/sys/external/bsd/drm2/dist/drm/radeon/radeon_r600_dma.c index 08b60f996d5a..8d18dd0704da 100644 --- a/sys/external/bsd/drm2/dist/drm/radeon/radeon_r600_dma.c +++ b/sys/external/bsd/drm2/dist/drm/radeon/radeon_r600_dma.c @@ -92,6 +92,7 @@ void r600_dma_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring) { WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc); + (void)RREG32(DMA_RB_WPTR); } /** @@ -249,6 +250,7 @@ int r600_dma_ring_test(struct radeon_device *rdev, tmp = 0xCAFEDEAD; rdev->wb.wb[index/4] = cpu_to_le32(tmp); + mb(); r = radeon_ring_lock(rdev, ring, 4); if (r) { @@ -262,6 +264,7 @@ int r600_dma_ring_test(struct radeon_device *rdev, radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { + mb(); tmp = le32_to_cpu(rdev->wb.wb[index/4]); if (tmp == 0xDEADBEEF) break; From adce3453b5b3d3769dc0f43779b61942e5cf2e7d Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 29 Jul 2022 21:48:23 +0000 Subject: [PATCH 02/13] WIP: radeon: show wb mapping --- sys/external/bsd/drm2/dist/drm/radeon/radeon_device.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sys/external/bsd/drm2/dist/drm/radeon/radeon_device.c b/sys/external/bsd/drm2/dist/drm/radeon/radeon_device.c index 083745eb1606..656f179cec45 100644 --- a/sys/external/bsd/drm2/dist/drm/radeon/radeon_device.c +++ b/sys/external/bsd/drm2/dist/drm/radeon/radeon_device.c @@ -513,6 +513,14 @@ int radeon_wb_init(struct radeon_device *rdev) radeon_wb_fini(rdev); return r; } + device_printf(rdev->dev, "wb mem_type=0x%x placement=0x%x" + " pa=0x%llx gpu_addr=0x%llx kmap_type=0x%x kva=%p\n", + (unsigned)rdev->wb.wb_obj->tbo.mem.mem_type, + (unsigned)rdev->wb.wb_obj->tbo.mem.placement, + (unsigned long long)page_to_phys(rdev->wb.wb_obj->tbo.ttm->pages[0]), + (unsigned long long)rdev->wb.gpu_addr, + (unsigned)rdev->wb.wb_obj->kmap.bo_kmap_type, + rdev->wb.wb); } /* clear wb memory */ From aeb5399470e34cbf6942cd1f28d601b99d9ba46e Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 29 Jul 2022 14:17:06 +0000 Subject: [PATCH 03/13] WIP: amdgpu: more diagnostics about ring seq on timeout --- sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c | 4 ++++ sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c index f66737abee9b..dc951b76a577 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c @@ -259,6 +259,10 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) if (unlikely(seq == last_seq)) return false; + device_printf(adev->dev, "[%p] ring %u (%s) seq %"PRIu32 + " -> %"PRIu32"\n", + __builtin_return_address(0), ring->idx, ring->name, last_seq, seq); + last_seq &= drv->num_fences_mask; seq &= drv->num_fences_mask; diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c index 863270a4bc4c..36cce376ae92 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_job.c @@ -48,9 +48,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) } amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); - DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", + DRM_ERROR("ring %u (%s) %s timeout, signaled seq=%u, emitted seq=%u cpu_addr=%p last_seq=%"PRIu32"\n", + ring->idx, ring->name, job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); + ring->fence_drv.sync_seq, + ring->fence_drv.cpu_addr, + (ring->fence_drv.cpu_addr ? le32_to_cpu(*ring->fence_drv.cpu_addr) : 0)); DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", ti.process_name, ti.tgid, ti.task_name, ti.pid); From 899f801afca0efdf054c443cdf78bd204274f00b Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 29 Jul 2022 18:57:44 +0000 Subject: [PATCH 04/13] WIP: amdgpu: trace gfx ring wptr --- .../bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c | 10 ++++++++++ .../bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c index 6de72244f165..45e248071fdb 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_device.c @@ -877,6 +877,16 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) return r; } + device_printf(adev->dev, + "wb_obj=%p mem_type=0x%x placement=0x%x kmap_type=0x%x pa=0x%llx gpu_addr=0x%llx wb=%p\n", + adev->wb.wb_obj, + adev->wb.wb_obj->tbo.mem.mem_type, + adev->wb.wb_obj->tbo.mem.placement, + (int)adev->wb.wb_obj->kmap.bo_kmap_type, + (unsigned long long)page_to_phys(adev->wb.wb_obj->tbo.ttm->pages[0]), + (unsigned long long)adev->wb.gpu_addr, + adev->wb.wb); + adev->wb.num_wb = AMDGPU_MAX_WB; memset(&adev->wb.used, 0, sizeof(adev->wb.used)); diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c index 56407a92cd47..d45983443b0a 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_gfx_v8_0.c @@ -2015,6 +2015,12 @@ static int gfx_v8_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0; } + device_printf(adev->dev, "%s ring @ %p:" + " %s doorbell 0x%"PRIx32"\n", + ring->name, ring, + (ring->use_doorbell ? "use" : "don't use"), + ring->doorbell_index); + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) @@ -6030,6 +6036,10 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + device_printf(adev->dev, + "%s ring @ %p: write wptr @ %p: 0x%"PRIx64"\n", + ring->name, ring, &adev->wb.wb[ring->wptr_offs], ring->wptr); + if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); From 243d934f6317f455573270a6266616d18c05316a Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sat, 30 Jul 2022 17:26:40 +0000 Subject: [PATCH 05/13] amdgpu: Zero-initialize ih ring. --- sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c index d2caee8524e8..92963c95b41e 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_ih.c @@ -96,6 +96,13 @@ fail2: bus_dmamap_destroy(adev->ddev->dmat, ih->ring_map); fail3: __unused bus_dmamem_unmap(adev->ddev->dmat, kva, size); goto fail2; } + memset(kva, 0, size); + /* + * bus_dmamap_sync is probably not necessary here -- or + * if it is necessary, we need a bunch more elsewhere. + */ + bus_dmamap_sync(adev->ddev->dmat, ih->ring_map, 0, size, + BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); ih->ring = kva; dma_addr = ih->ring_map->dm_segs[0].ds_addr; #else From 0b3528f26bbb659ecb31371ce27b0cd13286a889 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Mon, 1 Aug 2022 22:48:27 +0000 Subject: [PATCH 06/13] WIP: radeon acpi crap --- sys/external/bsd/drm2/dist/drm/radeon/radeon_acpi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sys/external/bsd/drm2/dist/drm/radeon/radeon_acpi.c b/sys/external/bsd/drm2/dist/drm/radeon/radeon_acpi.c index 94b5ebcdee4f..231025fa1307 100644 --- a/sys/external/bsd/drm2/dist/drm/radeon/radeon_acpi.c +++ b/sys/external/bsd/drm2/dist/drm/radeon/radeon_acpi.c @@ -367,6 +367,7 @@ out: * handles it. * Returns NOTIFY code */ +#ifndef __NetBSD__ static int radeon_atif_handler(struct radeon_device *rdev, struct acpi_bus_event *event) { @@ -436,6 +437,7 @@ static int radeon_atif_handler(struct radeon_device *rdev, */ return NOTIFY_BAD; } +#endif /* Call the ATCS method */ @@ -689,6 +691,15 @@ int radeon_acpi_pcie_performance_request(struct radeon_device *rdev, * acpi events. * Returns NOTIFY code */ +#ifdef __NetBSD__ +static int +radeon_acpi_event(ACPI_HANDLE handle, uint32_t notify, void *context) +{ + + radeon_pm_acpi_event_handler(rdev); + /* XXX ATIF handler */ +} +#else static int radeon_acpi_event(struct notifier_block *nb, unsigned long val, void *data) @@ -708,6 +719,7 @@ static int radeon_acpi_event(struct notifier_block *nb, /* Check for pending SBIOS requests */ return radeon_atif_handler(rdev, entry); } +#endif /* Call all ACPI methods here */ /** From 3f0495292c5e9ee30dcb1795cdf8386f0f05b376 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Tue, 23 Aug 2022 13:51:41 +0000 Subject: [PATCH 07/13] WIP: radeon: track down framebuffer mapping flags --- sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c | 1 + .../bsd/drm2/dist/drm/radeon/radeon_object.c | 14 +++++++++++++- sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c | 3 +++ sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c | 10 ++++++++++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c b/sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c index 6a6e19dbaf96..d199ea12608c 100644 --- a/sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c +++ b/sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c @@ -256,6 +256,7 @@ static int radeonfb_create(struct drm_fb_helper *helper, } rbo = gem_to_radeon_bo(gobj); + printf("%s:%d: bo=%p\n", __func__, __LINE__, &rbo->tbo); #ifdef __NetBSD__ ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->fb, &mode_cmd, gobj); diff --git a/sys/external/bsd/drm2/dist/drm/radeon/radeon_object.c b/sys/external/bsd/drm2/dist/drm/radeon/radeon_object.c index 6d160155ec57..7f683e73c571 100644 --- a/sys/external/bsd/drm2/dist/drm/radeon/radeon_object.c +++ b/sys/external/bsd/drm2/dist/drm/radeon/radeon_object.c @@ -184,6 +184,11 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; else rbo->placements[i].lpfn = 0; + printf("%s:%d: bo=%p placement[%u] fpfn=0x%x lpfn=0x%x flags=0x%x\n", + __func__, __LINE__, &rbo->tbo, i, + rbo->placements[i].fpfn, + rbo->placements[i].lpfn, + rbo->placements[i].flags); } } @@ -240,6 +245,8 @@ int radeon_bo_create(struct radeon_device *rdev, /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 */ + printf("%s:%d: clear WC|UC in 0x%x -> 0x%x\n", __func__, __LINE__, + bo->flags, bo->flags & ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC)); bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) /* Don't try to enable write-combining when it can't work, or things @@ -254,13 +261,18 @@ int radeon_bo_create(struct radeon_device *rdev, if (bo->flags & RADEON_GEM_GTT_WC) DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " "better performance thanks to write-combining\n"); + printf("%s:%d: clear WC|UC in 0x%x -> 0x%x\n", __func__, __LINE__, + bo->flags, bo->flags & ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC)); bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #else /* For architectures that don't support WC memory, * mask out the WC flag from the BO */ - if (!drm_arch_can_wc_memory()) + if (!drm_arch_can_wc_memory()) { + printf("%s:%d: clear WC in 0x%x -> 0x%x\n", __func__, __LINE__, + bo->flags, bo->flags & ~RADEON_GEM_GTT_WC); bo->flags &= ~RADEON_GEM_GTT_WC; + } #endif radeon_ttm_placement_from_domain(bo, domain); diff --git a/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c b/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c index 063ce959e30d..a88b5a89b13f 100644 --- a/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c +++ b/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c @@ -1284,6 +1284,9 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, if (ret) return ret; } + printf("%s:%d: bo=%p mem_type=0x%x placement=0x%x is_iomem=%d\n", + __func__, __LINE__, + bo, bo->mem.mem_type, bo->mem.placement, bo->mem.bus.is_iomem); return 0; } EXPORT_SYMBOL(ttm_bo_validate); diff --git a/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c b/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c index 035c49a0fefd..0312d3cd11a1 100644 --- a/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c +++ b/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c @@ -644,6 +644,8 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem = &bo->mem; if (bo->mem.bus.addr) { + printf("%s:%d: bo=%p placement=0x%x premapped %p\n", + __func__, __LINE__, bo, mem->placement, bo->mem.bus.addr); map->bo_kmap_type = ttm_bo_map_premapped; map->virtual = (void *)(((u8 *)bo->mem.bus.addr) + offset); } else { @@ -657,6 +659,8 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo, addr = (bo->mem.bus.base + bo->mem.bus.offset + offset); if (ISSET(mem->placement, TTM_PL_FLAG_WC)) flags |= BUS_SPACE_MAP_PREFETCHABLE; + printf("%s:%d: bo=%p placement=0x%x bus_space_map flags=0x%x\n", + __func__, __LINE__, bo, mem->placement, flags); /* XXX errno NetBSD->Linux */ ret = -bus_space_map(bo->bdev->memt, addr, size, flags, &map->u.io.memh); @@ -664,6 +668,8 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo, return ret; map->u.io.size = size; map->virtual = bus_space_vaddr(bo->bdev->memt, map->u.io.memh); + printf("%s:%d: mapped bo=0x%"PRIxBUSADDR" at %p\n", + __func__, __LINE__, addr, map->virtual); } #else if (mem->placement & TTM_PL_FLAG_WC) @@ -698,6 +704,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, return ret; if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) { + printf("%s:%d: bo=%p placement=0x%x kmap\n", + __func__, __LINE__, bo, mem->placement); /* * We're mapping a single page, and the desired * page protection is consistent with the bo. @@ -717,6 +725,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, * or to make the buffer object look contiguous. */ prot = ttm_io_prot(mem->placement, PAGE_KERNEL); + printf("%s:%d: bo=%p placement=0x%x vmap prot=0x%x\n", + __func__, __LINE__, bo, mem->placement, prot); map->bo_kmap_type = ttm_bo_map_vmap; map->virtual = vmap(ttm->pages + start_page, num_pages, 0, prot); From 7d99d8757c255fe3e02bf7fcdbe09dce3fd0cc2b Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Mon, 29 Aug 2022 14:32:58 +0000 Subject: [PATCH 08/13] Revert "WIP: radeon: track down framebuffer mapping flags" This reverts commit 1ae814b1e5bf33dce4bf8a489c32d7d12eb3bf0c. --- sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c | 1 - .../bsd/drm2/dist/drm/radeon/radeon_object.c | 14 +------------- sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c | 3 --- sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c | 10 ---------- 4 files changed, 1 insertion(+), 27 deletions(-) diff --git a/sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c b/sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c index d199ea12608c..6a6e19dbaf96 100644 --- a/sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c +++ b/sys/external/bsd/drm2/dist/drm/radeon/radeon_fb.c @@ -256,7 +256,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, } rbo = gem_to_radeon_bo(gobj); - printf("%s:%d: bo=%p\n", __func__, __LINE__, &rbo->tbo); #ifdef __NetBSD__ ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->fb, &mode_cmd, gobj); diff --git a/sys/external/bsd/drm2/dist/drm/radeon/radeon_object.c b/sys/external/bsd/drm2/dist/drm/radeon/radeon_object.c index 7f683e73c571..6d160155ec57 100644 --- a/sys/external/bsd/drm2/dist/drm/radeon/radeon_object.c +++ b/sys/external/bsd/drm2/dist/drm/radeon/radeon_object.c @@ -184,11 +184,6 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; else rbo->placements[i].lpfn = 0; - printf("%s:%d: bo=%p placement[%u] fpfn=0x%x lpfn=0x%x flags=0x%x\n", - __func__, __LINE__, &rbo->tbo, i, - rbo->placements[i].fpfn, - rbo->placements[i].lpfn, - rbo->placements[i].flags); } } @@ -245,8 +240,6 @@ int radeon_bo_create(struct radeon_device *rdev, /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 */ - printf("%s:%d: clear WC|UC in 0x%x -> 0x%x\n", __func__, __LINE__, - bo->flags, bo->flags & ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC)); bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) /* Don't try to enable write-combining when it can't work, or things @@ -261,18 +254,13 @@ int radeon_bo_create(struct radeon_device *rdev, if (bo->flags & RADEON_GEM_GTT_WC) DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " "better performance thanks to write-combining\n"); - printf("%s:%d: clear WC|UC in 0x%x -> 0x%x\n", __func__, __LINE__, - bo->flags, bo->flags & ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC)); bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #else /* For architectures that don't support WC memory, * mask out the WC flag from the BO */ - if (!drm_arch_can_wc_memory()) { - printf("%s:%d: clear WC in 0x%x -> 0x%x\n", __func__, __LINE__, - bo->flags, bo->flags & ~RADEON_GEM_GTT_WC); + if (!drm_arch_can_wc_memory()) bo->flags &= ~RADEON_GEM_GTT_WC; - } #endif radeon_ttm_placement_from_domain(bo, domain); diff --git a/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c b/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c index a88b5a89b13f..063ce959e30d 100644 --- a/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c +++ b/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo.c @@ -1284,9 +1284,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, if (ret) return ret; } - printf("%s:%d: bo=%p mem_type=0x%x placement=0x%x is_iomem=%d\n", - __func__, __LINE__, - bo, bo->mem.mem_type, bo->mem.placement, bo->mem.bus.is_iomem); return 0; } EXPORT_SYMBOL(ttm_bo_validate); diff --git a/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c b/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c index 0312d3cd11a1..035c49a0fefd 100644 --- a/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c +++ b/sys/external/bsd/drm2/dist/drm/ttm/ttm_bo_util.c @@ -644,8 +644,6 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem = &bo->mem; if (bo->mem.bus.addr) { - printf("%s:%d: bo=%p placement=0x%x premapped %p\n", - __func__, __LINE__, bo, mem->placement, bo->mem.bus.addr); map->bo_kmap_type = ttm_bo_map_premapped; map->virtual = (void *)(((u8 *)bo->mem.bus.addr) + offset); } else { @@ -659,8 +657,6 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo, addr = (bo->mem.bus.base + bo->mem.bus.offset + offset); if (ISSET(mem->placement, TTM_PL_FLAG_WC)) flags |= BUS_SPACE_MAP_PREFETCHABLE; - printf("%s:%d: bo=%p placement=0x%x bus_space_map flags=0x%x\n", - __func__, __LINE__, bo, mem->placement, flags); /* XXX errno NetBSD->Linux */ ret = -bus_space_map(bo->bdev->memt, addr, size, flags, &map->u.io.memh); @@ -668,8 +664,6 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo, return ret; map->u.io.size = size; map->virtual = bus_space_vaddr(bo->bdev->memt, map->u.io.memh); - printf("%s:%d: mapped bo=0x%"PRIxBUSADDR" at %p\n", - __func__, __LINE__, addr, map->virtual); } #else if (mem->placement & TTM_PL_FLAG_WC) @@ -704,8 +698,6 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, return ret; if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) { - printf("%s:%d: bo=%p placement=0x%x kmap\n", - __func__, __LINE__, bo, mem->placement); /* * We're mapping a single page, and the desired * page protection is consistent with the bo. @@ -725,8 +717,6 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, * or to make the buffer object look contiguous. */ prot = ttm_io_prot(mem->placement, PAGE_KERNEL); - printf("%s:%d: bo=%p placement=0x%x vmap prot=0x%x\n", - __func__, __LINE__, bo, mem->placement, prot); map->bo_kmap_type = ttm_bo_map_vmap; map->virtual = vmap(ttm->pages + start_page, num_pages, 0, prot); From ec5c6924dbecc7681c6b2ef68c2f68c57fb479b8 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Mon, 5 Sep 2022 22:04:55 +0000 Subject: [PATCH 09/13] drm: Allow DRM_IOCTL_GET_UNIQUE on render nodes. On NetBSD, libdrm uses this to discover what kind of bus the device is on, without which it refuses to expose the render node at all, rendering it useless. With this change, libdrm is able to use render nodes on NetBSD. Since this is just reading out information about the bus type and bus/dev/func numbers, I don't think it's problematic to expose to render nodes. While here, fix the locking around and access path to the master. --- sys/external/bsd/drm2/dist/drm/drm_ioctl.c | 34 +++++++++++++++------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/sys/external/bsd/drm2/dist/drm/drm_ioctl.c b/sys/external/bsd/drm2/dist/drm/drm_ioctl.c index b05c0303f734..4e9e61c15507 100644 --- a/sys/external/bsd/drm2/dist/drm/drm_ioctl.c +++ b/sys/external/bsd/drm2/dist/drm/drm_ioctl.c @@ -129,19 +129,33 @@ int drm_getunique(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_unique *u = data; - struct drm_master *master = file_priv->master; + struct drm_master *master; + int ret; - mutex_lock(&master->dev->master_mutex); - if (u->unique_len >= master->unique_len) { - if (copy_to_user(u->unique, master->unique, master->unique_len)) { - mutex_unlock(&master->dev->master_mutex); - return -EFAULT; - } + mutex_lock(&dev->master_mutex); + master = dev->master; + if (master == NULL) { + ret = -ENXIO; + goto out; + } + /* + * Copy out only if the user allocated enough space. Either + * way, on success, report the actual size -- so the user can + * allocate enough space if they didn't before, or so they know + * how much we + */ + if (u->unique_len < master->unique_len) { + ret = 0; + } else { + ret = copy_to_user(u->unique, master->unique, + master->unique_len); + if (ret) + goto out; } u->unique_len = master->unique_len; - mutex_unlock(&master->dev->master_mutex); +out: mutex_unlock(&dev->master_mutex); - return 0; + return ret; } static void @@ -597,7 +611,7 @@ EXPORT_SYMBOL(drm_ioctl_permit); /* Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0), + DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0), DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY), From 648016b1b81753d8130b5e91057578dbdbc909b7 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 7 Sep 2022 15:55:51 +0000 Subject: [PATCH 10/13] WIP: i915: Print when candidate kern/56591 branches are hit. --- .../bsd/drm2/dist/drm/i915/gem/i915_gem_execbuffer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sys/external/bsd/drm2/dist/drm/i915/gem/i915_gem_execbuffer.c b/sys/external/bsd/drm2/dist/drm/i915/gem/i915_gem_execbuffer.c index 6ce02a875af5..8db3b1db8890 100644 --- a/sys/external/bsd/drm2/dist/drm/i915/gem/i915_gem_execbuffer.c +++ b/sys/external/bsd/drm2/dist/drm/i915/gem/i915_gem_execbuffer.c @@ -547,9 +547,12 @@ eb_add_vma(struct i915_execbuffer *eb, if (drm_mm_node_allocated(&vma->node)) err = i915_vma_unbind(vma); if (unlikely(err)) { + printf("%s: lucky day, err=%d\n", __func__, err); vma->exec_flags = NULL; - if (i == batch_idx) + if (i == batch_idx) { + printf("%s: double-lucky day\n", __func__); eb->batch = NULL; + } eb->vma[i] = NULL; } } @@ -1910,8 +1913,10 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) __eb_unreserve_vma(vma, flags); vma->exec_flags = NULL; - if (err) + if (err) { + printf("%s: lucky day, err=%d\n", __func__, err); eb->vma[i] = NULL; + } if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) i915_vma_put(vma); From 53520bab5ca342ac3b2160d91b573e24ba623a4d Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 8 Sep 2022 00:53:37 +0000 Subject: [PATCH 11/13] drm: New mechanism to suspend ioctls during system suspend. drm drivers must opt into this by calling drm_suspend_ioctl in their driver suspend routine, and drm_resume_ioctl in their driver resume routine. --- sys/external/bsd/drm2/dist/drm/drm_drv.c | 19 ++++++ sys/external/bsd/drm2/dist/drm/drm_ioctl.c | 59 +++++++++++++++++++ .../bsd/drm2/dist/include/drm/drm_device.h | 5 ++ .../bsd/drm2/dist/include/drm/drm_ioctl.h | 2 + 4 files changed, 85 insertions(+) diff --git a/sys/external/bsd/drm2/dist/drm/drm_drv.c b/sys/external/bsd/drm2/dist/drm/drm_drv.c index b2796a205639..8a0625b3b583 100644 --- a/sys/external/bsd/drm2/dist/drm/drm_drv.c +++ b/sys/external/bsd/drm2/dist/drm/drm_drv.c @@ -695,6 +695,12 @@ int drm_dev_init(struct drm_device *dev, mutex_init(&dev->filelist_mutex); mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); +#ifdef __NetBSD__ + mutex_init(&dev->suspend_lock); + DRM_INIT_WAITQUEUE(&dev->suspend_cv, "drmsusp"); + dev->active_ioctls = 0; + dev->suspender = NULL; +#endif dev->sc_monitor_hotplug.smpsw_name = PSWITCH_HK_DISPLAY_CYCLE; dev->sc_monitor_hotplug.smpsw_type = PSWITCH_TYPE_HOTKEY; @@ -756,6 +762,12 @@ err_pswitch: #endif #ifndef __NetBSD__ /* XXX drm sysfs */ put_device(dev->dev); +#endif +#ifdef __NetBSD__ + KASSERT(dev->suspender == NULL); + KASSERT(dev->active_ioctls == 0); + DRM_DESTROY_WAITQUEUE(&dev->suspend_cv); + mutex_destroy(&dev->suspend_lock); #endif mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); @@ -844,6 +856,13 @@ void drm_dev_fini(struct drm_device *dev) put_device(dev->dev); #endif +#ifdef __NetBSD__ + KASSERT(dev->suspender == NULL); + KASSERT(dev->active_ioctls == 0); + DRM_DESTROY_WAITQUEUE(&dev->suspend_cv); + mutex_destroy(&dev->suspend_lock); +#endif + mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); diff --git a/sys/external/bsd/drm2/dist/drm/drm_ioctl.c b/sys/external/bsd/drm2/dist/drm/drm_ioctl.c index 4e9e61c15507..8db34e6aad29 100644 --- a/sys/external/bsd/drm2/dist/drm/drm_ioctl.c +++ b/sys/external/bsd/drm2/dist/drm/drm_ioctl.c @@ -753,6 +753,58 @@ static const struct drm_ioctl_desc drm_ioctls[] = { #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) +#ifdef __NetBSD__ +/* ioctl suspend/resume */ + +static void +drm_ioctl_enter(struct drm_device *dev) +{ + int ret __diagused; + + mutex_lock(&dev->suspend_lock); + DRM_WAIT_NOINTR_UNTIL(ret, &dev->suspend_cv, &dev->suspend_lock, + dev->suspender == NULL); + KASSERTMSG(ret == 0, "error=%d", -ret); + dev->active_ioctls++; + mutex_unlock(&dev->suspend_lock); +} + +static void +drm_ioctl_exit(struct drm_device *dev) +{ + + mutex_lock(&dev->suspend_lock); + KASSERT(dev->suspender == NULL); + if (--dev->active_ioctls == 0) + DRM_WAKEUP_ALL(&dev->suspend_cv, &dev->suspend_lock); + mutex_unlock(&dev->suspend_lock); +} + +void +drm_suspend_ioctl(struct drm_device *dev) +{ + int ret; + + mutex_lock(&dev->suspend_lock); + DRM_WAIT_NOINTR_UNTIL(ret, &dev->suspend_cv, &dev->suspend_lock, + dev->suspender == NULL && dev->active_ioctls == 0); + dev->suspender = curlwp; + mutex_unlock(&dev->suspend_lock); +} + +void +drm_resume_ioctl(struct drm_device *dev) +{ + + mutex_lock(&dev->suspend_lock); + KASSERT(dev->suspender); + KASSERT(dev->active_ioctls == 0); + dev->suspender = NULL; + DRM_WAKEUP_ALL(&dev->suspend_cv, &dev->suspend_lock); + mutex_unlock(&dev->suspend_lock); +} +#endif + /** * DOC: driver specific ioctls * @@ -819,6 +871,8 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, if (unlikely(retcode)) return retcode; + drm_ioctl_enter(dev); + /* Enforce sane locking for modern driver ioctls. */ if (likely(!drm_core_check_feature(dev, DRIVER_LEGACY)) || (flags & DRM_UNLOCKED)) @@ -828,6 +882,9 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, retcode = func(dev, kdata, file_priv); mutex_unlock(&drm_global_mutex); } + + drm_ioctl_exit(dev); + return retcode; } EXPORT_SYMBOL(drm_ioctl_kernel); @@ -914,6 +971,7 @@ drm_ioctl(struct file *fp, unsigned long cmd, void *data) data0 = buf; } + drm_ioctl_enter(dev); if ((drm_core_check_feature(dev, DRIVER_MODESET) && is_driver_ioctl) || ISSET(ioctl->flags, DRM_UNLOCKED)) { /* XXX errno Linux->NetBSD */ @@ -924,6 +982,7 @@ drm_ioctl(struct file *fp, unsigned long cmd, void *data) error = -(*ioctl->func)(dev, data0, file); mutex_unlock(&drm_global_mutex); } + drm_ioctl_exit(dev); /* If we used a temporary buffer, copy it back out. */ if (data != data0) diff --git a/sys/external/bsd/drm2/dist/include/drm/drm_device.h b/sys/external/bsd/drm2/dist/include/drm/drm_device.h index f8f923c8eb14..0fea1efbd740 100644 --- a/sys/external/bsd/drm2/dist/include/drm/drm_device.h +++ b/sys/external/bsd/drm2/dist/include/drm/drm_device.h @@ -12,6 +12,7 @@ #include #ifdef __NetBSD__ +#include #include #endif @@ -332,6 +333,10 @@ struct drm_device { #ifdef __NetBSD__ struct sysmon_pswitch sc_monitor_hotplug; + struct mutex suspend_lock; + drm_waitqueue_t suspend_cv; + uint64_t active_ioctls; + struct lwp *suspender; #endif /* Everything below here is for legacy driver, never use! */ diff --git a/sys/external/bsd/drm2/dist/include/drm/drm_ioctl.h b/sys/external/bsd/drm2/dist/include/drm/drm_ioctl.h index 9cf3ac8f558a..cfce450877d8 100644 --- a/sys/external/bsd/drm2/dist/include/drm/drm_ioctl.h +++ b/sys/external/bsd/drm2/dist/include/drm/drm_ioctl.h @@ -178,6 +178,8 @@ struct drm_ioctl_desc { int drm_ioctl_permit(u32 flags, struct drm_file *file_priv); #ifdef __NetBSD__ int drm_ioctl(struct file *, unsigned long, void *); +void drm_suspend_ioctl(struct drm_device *); +void drm_resume_ioctl(struct drm_device *); #else long drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); #endif From 8ad9afafa177c56a6bb1d0133b9ca56fe6b184d4 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sat, 8 Oct 2022 13:16:21 +0000 Subject: [PATCH 12/13] WIP: amdgpu: print ring start --- .../bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c index dc951b76a577..55d68cae8a77 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_fence.c @@ -418,7 +418,15 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index; ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index; } - amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); + u32 seq; + amdgpu_fence_write(ring, seq = atomic_read(&ring->fence_drv.last_seq)); + device_printf(adev->dev, + "[%p] %s: ring %u (%s, type %d) cpu_addr %p offs %u wb %p" + " seq %"PRIu32"\n", + __builtin_return_address(0), __func__, + ring->idx, ring->name, ring->funcs->type, + ring->fence_drv.cpu_addr, ring->fence_offs, adev->wb.wb, + seq); amdgpu_irq_get(adev, irq_src, irq_type); ring->fence_drv.irq_src = irq_src; From 1410bde566acb6e6b7861355c38c487f1fe361bb Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sat, 8 Oct 2022 13:37:01 +0000 Subject: [PATCH 13/13] WIP: amdgpu: heavy hammer -- map things write-combining, not cached --- sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_object.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_object.c b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_object.c index 86d6505a8328..c14c8e666805 100644 --- a/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_object.c +++ b/sys/external/bsd/drm2/dist/drm/amd/amdgpu/amdgpu_object.c @@ -159,7 +159,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].fpfn = 0; places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_TT; - if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) + if (1 || flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) places[c].flags |= TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED; else @@ -171,7 +171,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].fpfn = 0; places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_SYSTEM; - if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) + if (1 || flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) places[c].flags |= TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED; else @@ -570,8 +570,10 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo->flags = bp->flags; - if (!amdgpu_bo_support_uswc(bo->flags)) + if (!amdgpu_bo_support_uswc(bo->flags)) { + printf("%s: no uswc for you\n", __func__); bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; + } bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |