Message ID | 20250509153352.7187-14-tvrtko.ursulin@igalia.com |
---|---|
State | New |
Headers | show |
Series | Some (drm_sched_|dma_)fence lifetime issues | expand |
On Fri, May 09, 2025 at 04:33:52PM +0100, Tvrtko Ursulin wrote: > Xe can free some of the data pointed to by the dma-fences it exports. Most > notably the timeline name can get freed if userspace closes the associated > submit queue. At the same time the fence could have been exported to a > third party (for example a sync_fence fd) which will then cause an use- > after-free on subsequent access. > > To make this safe we need to make the driver compliant with the newly > documented dma-fence rules. Driver has to ensure a RCU grace period > between signalling a fence and freeing any data pointed to by said fence. > > For the timeline name we simply make the queue be freed via kfree_rcu and > for the shared lock associated with multiple queues we add a RCU grace > period before freeing the per GT structure holding the lock. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> This makes sense in the context of the series (e.g. assuming patch #9 lands). With that: Reviewed-by: Matthew Brost <matthew.brost@intel.com> > --- > drivers/gpu/drm/xe/xe_guc_exec_queue_types.h | 2 ++ > drivers/gpu/drm/xe/xe_guc_submit.c | 7 ++++++- > drivers/gpu/drm/xe/xe_hw_fence.c | 3 +++ > 3 files changed, 11 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h > index 4c39f01e4f52..a3f421e2adc0 100644 > --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h > +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h > @@ -20,6 +20,8 @@ struct xe_exec_queue; > struct xe_guc_exec_queue { > /** @q: Backpointer to parent xe_exec_queue */ > struct xe_exec_queue *q; > + /** @rcu: For safe freeing of exported dma fences */ > + struct rcu_head rcu; > /** @sched: GPU scheduler for this xe_exec_queue */ > struct xe_gpu_scheduler sched; > /** @entity: Scheduler entity for this xe_exec_queue */ > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c > index 369be36f7dc5..cda837ff0118 100644 > --- a/drivers/gpu/drm/xe/xe_guc_submit.c > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c > @@ -1274,7 +1274,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) > xe_sched_entity_fini(&ge->entity); > xe_sched_fini(&ge->sched); > > - kfree(ge); > + /* > + * RCU free due sched being exported via DRM scheduler fences > + * (timeline name). > + */ > + kfree_rcu(ge, rcu); > xe_exec_queue_fini(q); > xe_pm_runtime_put(guc_to_xe(guc)); > } > @@ -1457,6 +1461,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) > > q->guc = ge; > ge->q = q; > + init_rcu_head(&ge->rcu); > init_waitqueue_head(&ge->suspend_wait); > > for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) > diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c > index 03eb8c6d1616..b2a0c46dfcd4 100644 > --- a/drivers/gpu/drm/xe/xe_hw_fence.c > +++ b/drivers/gpu/drm/xe/xe_hw_fence.c > @@ -100,6 +100,9 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) > spin_unlock_irqrestore(&irq->lock, flags); > dma_fence_end_signalling(tmp); > } > + > + /* Safe release of the irq->lock used in dma_fence_init. */ > + synchronize_rcu(); > } > > void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) > -- > 2.48.0 >
diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index 4c39f01e4f52..a3f421e2adc0 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -20,6 +20,8 @@ struct xe_exec_queue; struct xe_guc_exec_queue { /** @q: Backpointer to parent xe_exec_queue */ struct xe_exec_queue *q; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; /** @sched: GPU scheduler for this xe_exec_queue */ struct xe_gpu_scheduler sched; /** @entity: Scheduler entity for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 369be36f7dc5..cda837ff0118 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1274,7 +1274,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); - kfree(ge); + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); xe_exec_queue_fini(q); xe_pm_runtime_put(guc_to_xe(guc)); } @@ -1457,6 +1461,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->guc = ge; ge->q = q; + init_rcu_head(&ge->rcu); init_waitqueue_head(&ge->suspend_wait); for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 03eb8c6d1616..b2a0c46dfcd4 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -100,6 +100,9 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) spin_unlock_irqrestore(&irq->lock, flags); dma_fence_end_signalling(tmp); } + + /* Safe release of the irq->lock used in dma_fence_init. */ + synchronize_rcu(); } void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq)
Xe can free some of the data pointed to by the dma-fences it exports. Most notably the timeline name can get freed if userspace closes the associated submit queue. At the same time the fence could have been exported to a third party (for example a sync_fence fd) which will then cause an use- after-free on subsequent access. To make this safe we need to make the driver compliant with the newly documented dma-fence rules. Driver has to ensure a RCU grace period between signalling a fence and freeing any data pointed to by said fence. For the timeline name we simply make the queue be freed via kfree_rcu and for the shared lock associated with multiple queues we add a RCU grace period before freeing the per GT structure holding the lock. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> --- drivers/gpu/drm/xe/xe_guc_exec_queue_types.h | 2 ++ drivers/gpu/drm/xe/xe_guc_submit.c | 7 ++++++- drivers/gpu/drm/xe/xe_hw_fence.c | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-)