Message ID | 20240213155112.156537-1-pierre-eric.pelloux-prayer@amd.com |
---|---|
Headers | show |
Series | dma-fence, drm, amdgpu new trace events | expand |
Am 13.02.24 um 16:50 schrieb Pierre-Eric Pelloux-Prayer: > This makes it possible to understand the dependencies between jobs. > Possible usage of this trace: > * stuttering issues like Mesa !9189 > * incorrect synchronization: I don't have a link for this one, but having > these events was very useful to debug a virtio-gpu / native-context / > radeonsi sync issue > > I have prototype code using this in UMR, as can be see here: > https://gitlab.freedesktop.org/tomstdenis/umr/-/merge_requests/37 > > The 'reason' param currently uses __func__ but I didn't add a macro for > this because it'd be interesting to use more descriptive names for each > use of amdgpu_fence_sync. > > Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 ++++---- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 +++++++------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 8 +++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 11 ++++++++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 4 ++-- > 7 files changed, 28 insertions(+), 24 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index d17b2452cb1f..fde98e48c84b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) > if (ret) > return ret; > > - return amdgpu_sync_fence(sync, vm->last_update); > + return amdgpu_sync_fence(sync, vm->last_update, __func__); __func__ is used so often that it probably deserves a macro. Regards, Christian. > } > > static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) > @@ -1251,7 +1251,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, > > amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); > > - amdgpu_sync_fence(sync, bo_va->last_pt_update); > + amdgpu_sync_fence(sync, bo_va->last_pt_update, __func__); > } > > static int update_gpuvm_pte(struct kgd_mem *mem, > @@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, > return ret; > } > > - return amdgpu_sync_fence(sync, bo_va->last_pt_update); > + return amdgpu_sync_fence(sync, bo_va->last_pt_update, __func__); > } > > static int map_bo_to_gpuvm(struct kgd_mem *mem, > @@ -2910,7 +2910,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) > } > dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, > DMA_RESV_USAGE_KERNEL, fence) { > - ret = amdgpu_sync_fence(&sync_obj, fence); > + ret = amdgpu_sync_fence(&sync_obj, fence, __func__); > if (ret) { > pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); > goto validate_map_fail; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 6adeddfb3d56..6830892383c3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -423,7 +423,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, > dma_fence_put(old); > } > > - r = amdgpu_sync_fence(&p->sync, fence); > + r = amdgpu_sync_fence(&p->sync, fence, __func__); > dma_fence_put(fence); > if (r) > return r; > @@ -445,7 +445,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, > return r; > } > > - r = amdgpu_sync_fence(&p->sync, fence); > + r = amdgpu_sync_fence(&p->sync, fence, __func__); > dma_fence_put(fence); > return r; > } > @@ -1101,7 +1101,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) > if (r) > return r; > > - r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); > + r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update, __func__); > if (r) > return r; > > @@ -1112,7 +1112,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) > if (r) > return r; > > - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); > + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, __func__); > if (r) > return r; > } > @@ -1131,7 +1131,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) > if (r) > return r; > > - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); > + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, __func__); > if (r) > return r; > } > @@ -1144,7 +1144,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) > if (r) > return r; > > - r = amdgpu_sync_fence(&p->sync, vm->last_update); > + r = amdgpu_sync_fence(&p->sync, vm->last_update, __func__); > if (r) > return r; > > @@ -1225,7 +1225,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) > continue; > } > > - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); > + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence, __func__); > dma_fence_put(fence); > if (r) > return r; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > index ddd0891da116..96f68e025d8e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > @@ -309,7 +309,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, > /* Good we can use this VMID. Remember this submission as > * user of the VMID. > */ > - r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished); > + r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, __func__); > if (r) > return r; > > @@ -369,8 +369,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, > /* Good, we can use this VMID. Remember this submission as > * user of the VMID. > */ > - r = amdgpu_sync_fence(&(*id)->active, > - &job->base.s_fence->finished); > + r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, __func__); > if (r) > return r; > > @@ -421,8 +420,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > id = idle; > > /* Remember this submission as user of the VMID */ > - r = amdgpu_sync_fence(&id->active, > - &job->base.s_fence->finished); > + r = amdgpu_sync_fence(&id->active, &job->base.s_fence->finished, __func__); > if (r) > goto error; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > index da48b6da0107..0f85370f69fa 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > @@ -1219,14 +1219,14 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, > DRM_ERROR("failed to do vm_bo_update on meta data\n"); > goto error_del_bo_va; > } > - amdgpu_sync_fence(&sync, bo_va->last_pt_update); > + amdgpu_sync_fence(&sync, bo_va->last_pt_update, __func__); > > r = amdgpu_vm_update_pdes(adev, vm, false); > if (r) { > DRM_ERROR("failed to update pdes on meta data\n"); > goto error_del_bo_va; > } > - amdgpu_sync_fence(&sync, vm->last_update); > + amdgpu_sync_fence(&sync, vm->last_update, __func__); > > amdgpu_sync_wait(&sync, false); > drm_exec_fini(&exec); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > index 1b013a44ca99..b6538f73eee9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > @@ -30,6 +30,7 @@ > */ > > #include <linux/dma-fence-chain.h> > +#include <trace/events/dma_fence.h> > > #include "amdgpu.h" > #include "amdgpu_trace.h" > @@ -149,10 +150,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) > * > * @sync: sync object to add fence to > * @f: fence to sync to > + * @reason: why do we sync to this fence > * > * Add the fence to the sync object. > */ > -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) > +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, > + const char *reason) > { > struct amdgpu_sync_entry *e; > > @@ -166,6 +169,8 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) > if (!e) > return -ENOMEM; > > + trace_dma_fence_sync_to(f, reason); > + > hash_add(sync->fences, &e->node, f->context); > e->fence = dma_fence_get(f); > return 0; > @@ -249,7 +254,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, > struct dma_fence *tmp = dma_fence_chain_contained(f); > > if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) { > - r = amdgpu_sync_fence(sync, f); > + r = amdgpu_sync_fence(sync, f, __func__); > dma_fence_put(f); > if (r) > return r; > @@ -358,7 +363,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) > hash_for_each_safe(source->fences, i, tmp, e, node) { > f = e->fence; > if (!dma_fence_is_signaled(f)) { > - r = amdgpu_sync_fence(clone, f); > + r = amdgpu_sync_fence(clone, f, __func__); > if (r) > return r; > } else { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > index cf1e9e858efd..0c58d6120053 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > @@ -47,7 +47,8 @@ struct amdgpu_sync { > }; > > void amdgpu_sync_create(struct amdgpu_sync *sync); > -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); > +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, > + const char *reason); > int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, > struct dma_resv *resv, enum amdgpu_sync_mode mode, > void *owner); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c > index bfbf59326ee1..5e30b371b956 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c > @@ -117,13 +117,13 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, > if (r) > goto error_del_bo_va; > > - amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update); > + amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, __func__); > > r = amdgpu_vm_update_pdes(adev, vm, false); > if (r) > goto error_del_bo_va; > > - amdgpu_sync_fence(&sync, vm->last_update); > + amdgpu_sync_fence(&sync, vm->last_update, __func__); > > amdgpu_sync_wait(&sync, false); > drm_exec_fini(&exec);
On Wed, 14 Feb 2024 13:00:16 +0100 Christian König <christian.koenig@amd.com> wrote: > > +DEFINE_EVENT(dma_fence_from, dma_fence_sync_to, > > For a single event you should probably use TRACE_EVENT() instead of > declaring a class. A class is only used if you have multiple events with > the same parameters. FYI, TRACE_EVENT() is actually defined as: #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DECLARE_EVENT_CLASS(name, \ PARAMS(proto), \ PARAMS(args), \ PARAMS(tstruct), \ PARAMS(assign), \ PARAMS(print)); \ DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); So basically, you could really just declare one TRACE_EVENT() and add DEFINE_EVENT()s on top of it ;) I never recommended that because I thought it would be confusing. -- Steve
Le 14/02/2024 à 13:09, Christian König a écrit : > Am 13.02.24 um 16:50 schrieb Pierre-Eric Pelloux-Prayer: >> amdgpu_cs_ioctl already exists but serves a different >> purpose. >> >> amdgpu_cs_ioctl2 marks the beginning of the kernel processing of >> the ioctl which is useful for tools to map which events belong to >> the same submission (without this, the first event would be the >> amdgpu_bo_set_list ones). > > That's not necessary a good justification for the naming. What exactly was the original trace_amdgpu_cs_ioctl() doing? > trace_amdgpu_cs_ioctl is used right before drm_sched_entity_push_job is called so in a sense it's a duplicate of trace_drm_sched_job. That being said, it's used by gpuvis so I chose to not modify it. As for the new event: initially I named it "amdgpu_cs_parser_init", but since the intent is to mark the beginning of the amdgpu_cs_submit I've renamed it. Any suggestion for a better name? Thanks, Pierre-Eric > Regards, > Christian. > >> >> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 12 ++++++++++++ >> 2 files changed, 14 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> index 6830892383c3..29e43a66d0d6 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> @@ -1402,6 +1402,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) >> return r; >> } >> + trace_amdgpu_cs_ioctl2(data); >> + >> r = amdgpu_cs_pass1(&parser, data); >> if (r) >> goto error_fini; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h >> index e8ea1cfe7027..24e95560ede5 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h >> @@ -189,6 +189,18 @@ TRACE_EVENT(amdgpu_cs_ioctl, >> __entry->seqno, __get_str(ring), __entry->num_ibs) >> ); >> +TRACE_EVENT(amdgpu_cs_ioctl2, >> + TP_PROTO(union drm_amdgpu_cs *cs), >> + TP_ARGS(cs), >> + TP_STRUCT__entry( >> + __field(uint32_t, ctx_id) >> + ), >> + TP_fast_assign( >> + __entry->ctx_id = cs->in.ctx_id; >> + ), >> + TP_printk("context=%u", __entry->ctx_id) >> +); >> + >> TRACE_EVENT(amdgpu_sched_run_job, >> TP_PROTO(struct amdgpu_job *job), >> TP_ARGS(job), >
Am 16.02.24 um 17:32 schrieb Daniel Vetter: > On Tue, Feb 13, 2024 at 04:50:26PM +0100, Pierre-Eric Pelloux-Prayer wrote: >> This new event can be used to trace where a given dma_fence is added >> as a dependency of some other work. > How? > > What I'd expected here is that you add a dependency chain from one fence > to another, but this only has one fence. That's what I though initially as well, but at the point we add the dependency fences to the scheduler job we don't have the scheduler fence initialized yet. We could change this so that we only trace all the fences after we have initialized the scheduler fence, but then we loose the information where the dependency comes from. > How do you figure out what's the > next dma_fence that will stall on this dependency? I'm not fully sure on that either. Pierre? Christian. > Like in the gpu > scheduler we do know what will be the fence that userspace gets back, so > we can make that connection. And same for the atomic code (although you > don't wire that up at all). > > I'm very confused on how this works and rather worried it's a brittle > amdgpu-only solution ... > -Sima > >> I plan to use it in amdgpu. >> >> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> >> --- >> drivers/dma-buf/dma-fence.c | 1 + >> include/trace/events/dma_fence.h | 34 ++++++++++++++++++++++++++++++++ >> 2 files changed, 35 insertions(+) >> >> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c >> index e0fd99e61a2d..671a499a5ccd 100644 >> --- a/drivers/dma-buf/dma-fence.c >> +++ b/drivers/dma-buf/dma-fence.c >> @@ -23,6 +23,7 @@ >> EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); >> EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); >> EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); >> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_sync_to); >> >> static DEFINE_SPINLOCK(dma_fence_stub_lock); >> static struct dma_fence dma_fence_stub; >> diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h >> index 3963e79ca7b4..9b3875f7aa79 100644 >> --- a/include/trace/events/dma_fence.h >> +++ b/include/trace/events/dma_fence.h >> @@ -83,6 +83,40 @@ DEFINE_EVENT(dma_fence, dma_fence_wait_end, >> TP_ARGS(fence) >> ); >> >> +DECLARE_EVENT_CLASS(dma_fence_from, >> + >> + TP_PROTO(struct dma_fence *fence, const char *reason), >> + >> + TP_ARGS(fence, reason), >> + >> + TP_STRUCT__entry( >> + __string(driver, fence->ops->get_driver_name(fence)) >> + __string(timeline, fence->ops->get_timeline_name(fence)) >> + __field(unsigned int, context) >> + __field(unsigned int, seqno) >> + __string(reason, reason) >> + ), >> + >> + TP_fast_assign( >> + __assign_str(driver, fence->ops->get_driver_name(fence)); >> + __assign_str(timeline, fence->ops->get_timeline_name(fence)); >> + __entry->context = fence->context; >> + __entry->seqno = fence->seqno; >> + __assign_str(reason, reason); >> + ), >> + >> + TP_printk("driver=%s timeline=%s context=%u seqno=%u reason=%s", >> + __get_str(driver), __get_str(timeline), __entry->context, >> + __entry->seqno, __get_str(reason)) >> +); >> + >> +DEFINE_EVENT(dma_fence_from, dma_fence_sync_to, >> + >> + TP_PROTO(struct dma_fence *fence, const char *reason), >> + >> + TP_ARGS(fence, reason) >> +); >> + >> #endif /* _TRACE_DMA_FENCE_H */ >> >> /* This part must be outside protection */ >> -- >> 2.40.1 >>