Message ID | 20250424083834.15518-6-pierre-eric.pelloux-prayer@amd.com |
---|---|
State | New |
Headers | show |
Series | Improve gpu_scheduler trace events + UAPI | expand |
nit: title: s/gpu/GPU We also mostly start with an upper case letter after the :, but JFYI, it's not a big deal. P. On Thu, 2025-04-24 at 10:38 +0200, Pierre-Eric Pelloux-Prayer wrote: > We can't trace dependencies from drm_sched_job_add_dependency > because when it's called the job's fence is not available yet. > > So instead each dependency is traced individually when > drm_sched_entity_push_job is used. > > Tracing the dependencies allows tools to analyze the dependencies > between the jobs (previously it was only possible for fences > traced by drm_sched_job_wait_dep). > > Signed-off-by: Pierre-Eric Pelloux-Prayer > <pierre-eric.pelloux-prayer@amd.com> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> > --- > .../gpu/drm/scheduler/gpu_scheduler_trace.h | 23 > +++++++++++++++++++ > drivers/gpu/drm/scheduler/sched_entity.c | 8 +++++++ > 2 files changed, 31 insertions(+) > > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h > b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h > index 6f5bd05131aa..5d9992ad47d3 100644 > --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h > @@ -87,6 +87,29 @@ TRACE_EVENT(drm_sched_process_job, > __entry->fence_context, __entry->fence_seqno) > ); > > +TRACE_EVENT(drm_sched_job_add_dep, > + TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence > *fence), > + TP_ARGS(sched_job, fence), > + TP_STRUCT__entry( > + __field(u64, fence_context) > + __field(u64, fence_seqno) > + __field(u64, id) > + __field(u64, ctx) > + __field(u64, seqno) > + ), > + > + TP_fast_assign( > + __entry->fence_context = sched_job->s_fence- > >finished.context; > + __entry->fence_seqno = sched_job->s_fence- > >finished.seqno; > + __entry->id = sched_job->id; > + __entry->ctx = fence->context; > + __entry->seqno = fence->seqno; > + ), > + TP_printk("fence=%llu:%llu, id=%llu depends on > fence=%llu:%llu", > + __entry->fence_context, __entry->fence_seqno, > __entry->id, > + __entry->ctx, __entry->seqno) > +); > + > TRACE_EVENT(drm_sched_job_wait_dep, > TP_PROTO(struct drm_sched_job *sched_job, struct > dma_fence *fence), > TP_ARGS(sched_job, fence), > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c > b/drivers/gpu/drm/scheduler/sched_entity.c > index bd39db7bb240..be579e132711 100644 > --- a/drivers/gpu/drm/scheduler/sched_entity.c > +++ b/drivers/gpu/drm/scheduler/sched_entity.c > @@ -587,6 +587,14 @@ void drm_sched_entity_push_job(struct > drm_sched_job *sched_job) > ktime_t submit_ts; > > trace_drm_sched_job(sched_job, entity); > + > + if (trace_drm_sched_job_add_dep_enabled()) { > + struct dma_fence *entry; > + unsigned long index; > + > + xa_for_each(&sched_job->dependencies, index, entry) > + trace_drm_sched_job_add_dep(sched_job, > entry); > + } > atomic_inc(entity->rq->sched->score); > WRITE_ONCE(entity->last_user, current->group_leader); >
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index 6f5bd05131aa..5d9992ad47d3 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -87,6 +87,29 @@ TRACE_EVENT(drm_sched_process_job, __entry->fence_context, __entry->fence_seqno) ); +TRACE_EVENT(drm_sched_job_add_dep, + TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence), + TP_ARGS(sched_job, fence), + TP_STRUCT__entry( + __field(u64, fence_context) + __field(u64, fence_seqno) + __field(u64, id) + __field(u64, ctx) + __field(u64, seqno) + ), + + TP_fast_assign( + __entry->fence_context = sched_job->s_fence->finished.context; + __entry->fence_seqno = sched_job->s_fence->finished.seqno; + __entry->id = sched_job->id; + __entry->ctx = fence->context; + __entry->seqno = fence->seqno; + ), + TP_printk("fence=%llu:%llu, id=%llu depends on fence=%llu:%llu", + __entry->fence_context, __entry->fence_seqno, __entry->id, + __entry->ctx, __entry->seqno) +); + TRACE_EVENT(drm_sched_job_wait_dep, TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index bd39db7bb240..be579e132711 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -587,6 +587,14 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) ktime_t submit_ts; trace_drm_sched_job(sched_job, entity); + + if (trace_drm_sched_job_add_dep_enabled()) { + struct dma_fence *entry; + unsigned long index; + + xa_for_each(&sched_job->dependencies, index, entry) + trace_drm_sched_job_add_dep(sched_job, entry); + } atomic_inc(entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader);