diff mbox series

[v3,06/14] iommufd: Add IOMMUFD_OBJ_VIRQ and IOMMUFD_CMD_VIRQ_ALLOC

Message ID dc1252de7ff06c944cb7d91c91b4f61c25182f91.1734477608.git.nicolinc@nvidia.com
State New
Headers show
Series iommufd: Add vIOMMU infrastructure (Part-3: vIRQ) | expand

Commit Message

Nicolin Chen Dec. 18, 2024, 5 a.m. UTC
Allow a vIOMMU object to allocate vIRQ Event Queues, with a condition that
each vIOMMU can only have one single vIRQ event queue per type.

Add iommufd_eventq_virq_alloc with an iommufd_eventq_virq_ops for this new
ioctl.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/iommufd_private.h |  57 +++++++++++
 include/linux/iommufd.h                 |   3 +
 include/uapi/linux/iommufd.h            |  31 ++++++
 drivers/iommu/iommufd/eventq.c          | 129 ++++++++++++++++++++++++
 drivers/iommu/iommufd/main.c            |   6 ++
 drivers/iommu/iommufd/viommu.c          |   2 +
 6 files changed, 228 insertions(+)

Comments

Nicolin Chen Dec. 19, 2024, 10:31 p.m. UTC | #1
On Tue, Dec 17, 2024 at 09:00:19PM -0800, Nicolin Chen wrote:
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index cfbdf7b0e3c1..9d15978ef882 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -367,6 +367,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
>  		 __reserved),
>  	IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
>  		 struct iommu_viommu_alloc, out_viommu_id),
> +	IOCTL_OP(IOMMU_VIRQ_ALLOC, iommufd_virq_alloc, struct iommu_virq_alloc,
> +		 out_virq_fd),

This is missing the "struct iommu_virq_alloc" in union ucmd_buffer.
Will include in v4.

Nicolin
Jason Gunthorpe Jan. 2, 2025, 8:45 p.m. UTC | #2
On Tue, Dec 17, 2024 at 09:00:19PM -0800, Nicolin Chen wrote:
> Allow a vIOMMU object to allocate vIRQ Event Queues, with a condition that
> each vIOMMU can only have one single vIRQ event queue per type.

I suggest you should tend to use the eventq as the primary naming not
vIRQ, I think that will be a bit clearer.

The virq in the VM is edge triggered by an event queue FD becoming
readable, but the event queue is the file descriptor that reports a
batch of events on read().

The virq name evokes similarities to the virq in vfio which is purely
about conveying if an IRQ edge has happened through an eventfd and has
no event queue associated with it.

Jason
Jason Gunthorpe Jan. 2, 2025, 8:52 p.m. UTC | #3
On Tue, Dec 17, 2024 at 09:00:19PM -0800, Nicolin Chen wrote:
> +/* An iommufd_virq_header packs a vIOMMU interrupt in an iommufd_virq queue */
> +struct iommufd_virq_header {
> +	struct list_head node;
> +	ssize_t irq_len;
> +	void *irq_data;
> +};

Based on how it is used in iommufd_viommu_report_irq()

+       header = kzalloc(sizeof(*header) + irq_len, GFP_KERNEL);
+       header->irq_data = (void *)header + sizeof(*header);
+       memcpy(header->irq_data, irq_ptr, irq_len);

It should be a flex array and use the various flexarray tools

struct iommufd_virq_header {
       ssize_t irq_len;
       u64 irq_data[] __counted_by(irq_len);
}

Jason
Nicolin Chen Jan. 2, 2025, 10:30 p.m. UTC | #4
On Thu, Jan 02, 2025 at 04:45:07PM -0400, Jason Gunthorpe wrote:
> On Tue, Dec 17, 2024 at 09:00:19PM -0800, Nicolin Chen wrote:
> > Allow a vIOMMU object to allocate vIRQ Event Queues, with a condition that
> > each vIOMMU can only have one single vIRQ event queue per type.
> 
> I suggest you should tend to use the eventq as the primary naming not
> vIRQ, I think that will be a bit clearer.
> 
> The virq in the VM is edge triggered by an event queue FD becoming
> readable, but the event queue is the file descriptor that reports a
> batch of events on read().
> 
> The virq name evokes similarities to the virq in vfio which is purely
> about conveying if an IRQ edge has happened through an eventfd and has
> no event queue associated with it.

Ack. By doing the "Part-3: vEVENTQ" specifying one type of queue,
I think the Part-4 then should be "vCMDQ" likewise v.s. "vQUEUE".

Thanks
Nicolin
Nicolin Chen Jan. 3, 2025, 3:30 a.m. UTC | #5
On Thu, Jan 02, 2025 at 04:52:46PM -0400, Jason Gunthorpe wrote:
> On Tue, Dec 17, 2024 at 09:00:19PM -0800, Nicolin Chen wrote:
> > +/* An iommufd_virq_header packs a vIOMMU interrupt in an iommufd_virq queue */
> > +struct iommufd_virq_header {
> > +	struct list_head node;
> > +	ssize_t irq_len;
> > +	void *irq_data;
> > +};
> 
> Based on how it is used in iommufd_viommu_report_irq()
> 
> +       header = kzalloc(sizeof(*header) + irq_len, GFP_KERNEL);
> +       header->irq_data = (void *)header + sizeof(*header);
> +       memcpy(header->irq_data, irq_ptr, irq_len);
> 
> It should be a flex array and use the various flexarray tools
> 
> struct iommufd_virq_header {
>        ssize_t irq_len;
>        u64 irq_data[] __counted_by(irq_len);
> }

Changed to
-------------------------------------------------------------------------
/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */
struct iommufd_vevent {
	struct list_head node;
	ssize_t data_len;
	u64 event_data[] __counted_by(data_len);
};
[...]
	vevent = kmalloc(struct_size(vevent, event_data, data_len), GFP_KERNEL);
-	header->irq_data = (void *)header + sizeof(*header);
-------------------------------------------------------------------------

Thanks
Nicolin
Jason Gunthorpe Jan. 6, 2025, 4:59 p.m. UTC | #6
On Thu, Jan 02, 2025 at 07:30:21PM -0800, Nicolin Chen wrote:
> On Thu, Jan 02, 2025 at 04:52:46PM -0400, Jason Gunthorpe wrote:
> > On Tue, Dec 17, 2024 at 09:00:19PM -0800, Nicolin Chen wrote:
> > > +/* An iommufd_virq_header packs a vIOMMU interrupt in an iommufd_virq queue */
> > > +struct iommufd_virq_header {
> > > +	struct list_head node;
> > > +	ssize_t irq_len;
> > > +	void *irq_data;
> > > +};
> > 
> > Based on how it is used in iommufd_viommu_report_irq()
> > 
> > +       header = kzalloc(sizeof(*header) + irq_len, GFP_KERNEL);
> > +       header->irq_data = (void *)header + sizeof(*header);
> > +       memcpy(header->irq_data, irq_ptr, irq_len);
> > 
> > It should be a flex array and use the various flexarray tools
> > 
> > struct iommufd_virq_header {
> >        ssize_t irq_len;
> >        u64 irq_data[] __counted_by(irq_len);
> > }
> 
> Changed to
> -------------------------------------------------------------------------
> /* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */
> struct iommufd_vevent {
> 	struct list_head node;
> 	ssize_t data_len;
> 	u64 event_data[] __counted_by(data_len);
> };
> [...]
> 	vevent = kmalloc(struct_size(vevent, event_data, data_len), GFP_KERNEL);
> -	header->irq_data = (void *)header + sizeof(*header);
> -------------------------------------------------------------------------

Yeah, that's right

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index dfbc5cfbd164..fab3b21ac687 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -547,6 +547,49 @@  static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
 	return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
 }
 
+/*
+ * An iommufd_virq object represents an interface to deliver vIOMMU interrupts
+ * to the user space. These objects are created/destroyed by the user space and
+ * associated with vIOMMU object(s) during the allocations.
+ */
+struct iommufd_virq {
+	struct iommufd_eventq common;
+	struct iommufd_viommu *viommu;
+	struct list_head node;
+
+	unsigned int type;
+};
+
+static inline struct iommufd_virq *eventq_to_virq(struct iommufd_eventq *eventq)
+{
+	return container_of(eventq, struct iommufd_virq, common);
+}
+
+static inline struct iommufd_virq *iommufd_get_virq(struct iommufd_ucmd *ucmd,
+						    u32 id)
+{
+	return container_of(iommufd_get_object(ucmd->ictx, id,
+					       IOMMUFD_OBJ_VIRQ),
+			    struct iommufd_virq, common.obj);
+}
+
+int iommufd_virq_alloc(struct iommufd_ucmd *ucmd);
+void iommufd_virq_destroy(struct iommufd_object *obj);
+void iommufd_virq_abort(struct iommufd_object *obj);
+
+/* An iommufd_virq_header packs a vIOMMU interrupt in an iommufd_virq queue */
+struct iommufd_virq_header {
+	struct list_head node;
+	ssize_t irq_len;
+	void *irq_data;
+};
+
+static inline int iommufd_virq_handler(struct iommufd_virq *virq,
+				       struct iommufd_virq_header *virq_header)
+{
+	return iommufd_eventq_notify(&virq->common, &virq_header->node);
+}
+
 static inline struct iommufd_viommu *
 iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
 {
@@ -555,6 +598,20 @@  iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
 			    struct iommufd_viommu, obj);
 }
 
+static inline struct iommufd_virq *
+iommufd_viommu_find_virq(struct iommufd_viommu *viommu, u32 type)
+{
+	struct iommufd_virq *virq, *next;
+
+	lockdep_assert_held(&viommu->virqs_rwsem);
+
+	list_for_each_entry_safe(virq, next, &viommu->virqs, node) {
+		if (virq->type == type)
+			return virq;
+	}
+	return NULL;
+}
+
 int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
 void iommufd_viommu_destroy(struct iommufd_object *obj);
 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 11110c749200..b082676c9e43 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -34,6 +34,7 @@  enum iommufd_object_type {
 	IOMMUFD_OBJ_FAULT,
 	IOMMUFD_OBJ_VIOMMU,
 	IOMMUFD_OBJ_VDEVICE,
+	IOMMUFD_OBJ_VIRQ,
 #ifdef CONFIG_IOMMUFD_TEST
 	IOMMUFD_OBJ_SELFTEST,
 #endif
@@ -93,6 +94,8 @@  struct iommufd_viommu {
 	const struct iommufd_viommu_ops *ops;
 
 	struct xarray vdevs;
+	struct list_head virqs;
+	struct rw_semaphore virqs_rwsem;
 
 	unsigned int type;
 };
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 34810f6ae2b5..cdf2dba28d4a 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -55,6 +55,7 @@  enum {
 	IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
 	IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
 	IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
+	IOMMUFD_CMD_VIRQ_ALLOC = 0x93,
 };
 
 /**
@@ -1012,4 +1013,34 @@  struct iommu_ioas_change_process {
 #define IOMMU_IOAS_CHANGE_PROCESS \
 	_IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
 
+/**
+ * enum iommu_virq_type - Virtual IRQ Type
+ * @IOMMU_VIRQ_TYPE_NONE: INVALID type
+ */
+enum iommu_virq_type {
+	IOMMU_VIRQ_TYPE_NONE = 0,
+};
+
+/**
+ * struct iommu_virq_alloc - ioctl(IOMMU_VIRQ_ALLOC)
+ * @size: sizeof(struct iommu_virq_alloc)
+ * @flags: Must be 0
+ * @viommu: virtual IOMMU ID to associate the virtual IRQ with
+ * @type: Type of the virtual IRQ. Must be defined in enum iommu_virq_type
+ * @out_virq_id: The ID of the new virtual IRQ
+ * @out_virq_fd: The fd of the new virtual IRQ. User space must close the
+ *               successfully returned fd after using it
+ *
+ * Explicitly allocate a virtual IRQ interface for a vIOMMU. A vIOMMU can have
+ * multiple FDs for different @type, but is confined to one FD per @type.
+ */
+struct iommu_virq_alloc {
+	__u32 size;
+	__u32 flags;
+	__u32 viommu_id;
+	__u32 type;
+	__u32 out_virq_id;
+	__u32 out_virq_fd;
+};
+#define IOMMU_VIRQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIRQ_ALLOC)
 #endif
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
index e386b6c3e6ab..a8921c745d36 100644
--- a/drivers/iommu/iommufd/eventq.c
+++ b/drivers/iommu/iommufd/eventq.c
@@ -346,6 +346,73 @@  static const struct iommufd_eventq_ops iommufd_fault_ops = {
 	.write = &iommufd_fault_fops_write,
 };
 
+/* IOMMUFD_OBJ_VIRQ Functions */
+
+void iommufd_virq_abort(struct iommufd_object *obj)
+{
+	struct iommufd_eventq *eventq =
+		container_of(obj, struct iommufd_eventq, obj);
+	struct iommufd_virq *virq = eventq_to_virq(eventq);
+	struct iommufd_viommu *viommu = virq->viommu;
+	struct iommufd_virq_header *cur, *next;
+
+	lockdep_assert_held_write(&viommu->virqs_rwsem);
+
+	list_for_each_entry_safe(cur, next, &eventq->deliver, node) {
+		list_del(&cur->node);
+		kfree(cur);
+	}
+
+	refcount_dec(&viommu->obj.users);
+	mutex_destroy(&eventq->mutex);
+	list_del(&virq->node);
+}
+
+void iommufd_virq_destroy(struct iommufd_object *obj)
+{
+	struct iommufd_virq *virq =
+		eventq_to_virq(container_of(obj, struct iommufd_eventq, obj));
+
+	down_write(&virq->viommu->virqs_rwsem);
+	iommufd_virq_abort(obj);
+	up_write(&virq->viommu->virqs_rwsem);
+}
+
+static ssize_t iommufd_virq_fops_read(struct iommufd_eventq *eventq,
+				      char __user *buf, size_t count,
+				      loff_t *ppos)
+{
+	size_t done = 0;
+	int rc = 0;
+
+	if (*ppos)
+		return -ESPIPE;
+
+	mutex_lock(&eventq->mutex);
+	while (!list_empty(&eventq->deliver) && count > done) {
+		struct iommufd_virq_header *cur = list_first_entry(
+			&eventq->deliver, struct iommufd_virq_header, node);
+
+		if (cur->irq_len > count - done)
+			break;
+
+		if (copy_to_user(buf + done, cur->irq_data, cur->irq_len)) {
+			rc = -EFAULT;
+			break;
+		}
+		done += cur->irq_len;
+		list_del(&cur->node);
+		kfree(cur);
+	}
+	mutex_unlock(&eventq->mutex);
+
+	return done == 0 ? rc : done;
+}
+
+static const struct iommufd_eventq_ops iommufd_virq_ops = {
+	.read = &iommufd_virq_fops_read,
+};
+
 /* Common Event Queue Functions */
 
 static ssize_t iommufd_eventq_fops_read(struct file *filep, char __user *buf,
@@ -473,3 +540,65 @@  int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
 
 	return rc;
 }
+
+int iommufd_virq_alloc(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_virq_alloc *cmd = ucmd->cmd;
+	struct iommufd_viommu *viommu;
+	struct iommufd_virq *virq;
+	int fdno;
+	int rc;
+
+	if (cmd->flags || cmd->type == IOMMU_VIRQ_TYPE_NONE)
+		return -EOPNOTSUPP;
+
+	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+	if (IS_ERR(viommu))
+		return PTR_ERR(viommu);
+	down_write(&viommu->virqs_rwsem);
+
+	if (iommufd_viommu_find_virq(viommu, cmd->type)) {
+		rc = -EEXIST;
+		goto out_unlock_virqs;
+	}
+
+	virq = __iommufd_object_alloc(ucmd->ictx, virq, IOMMUFD_OBJ_VIRQ,
+				      common.obj);
+	if (IS_ERR(virq)) {
+		rc = PTR_ERR(virq);
+		goto out_unlock_virqs;
+	}
+
+	virq->type = cmd->type;
+	virq->viommu = viommu;
+	refcount_inc(&viommu->obj.users);
+	list_add_tail(&virq->node, &viommu->virqs);
+
+	fdno = iommufd_eventq_init(&virq->common, "[iommufd-viommu-irq]",
+				   ucmd->ictx, &iommufd_virq_ops);
+	if (fdno < 0) {
+		rc = fdno;
+		goto out_abort;
+	}
+
+	cmd->out_virq_id = virq->common.obj.id;
+	cmd->out_virq_fd = fdno;
+
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+	if (rc)
+		goto out_put_fdno;
+
+	iommufd_object_finalize(ucmd->ictx, &virq->common.obj);
+	fd_install(fdno, virq->common.filep);
+	goto out_unlock_virqs;
+
+out_put_fdno:
+	put_unused_fd(fdno);
+	fput(virq->common.filep);
+out_abort:
+	iommufd_object_abort_and_destroy(ucmd->ictx, &virq->common.obj);
+out_unlock_virqs:
+	up_write(&viommu->virqs_rwsem);
+	iommufd_put_object(ucmd->ictx, &viommu->obj);
+	return rc;
+}
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index cfbdf7b0e3c1..9d15978ef882 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -367,6 +367,8 @@  static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 		 __reserved),
 	IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
 		 struct iommu_viommu_alloc, out_viommu_id),
+	IOCTL_OP(IOMMU_VIRQ_ALLOC, iommufd_virq_alloc, struct iommu_virq_alloc,
+		 out_virq_fd),
 #ifdef CONFIG_IOMMUFD_TEST
 	IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
 #endif
@@ -502,6 +504,10 @@  static const struct iommufd_object_ops iommufd_object_ops[] = {
 	[IOMMUFD_OBJ_FAULT] = {
 		.destroy = iommufd_fault_destroy,
 	},
+	[IOMMUFD_OBJ_VIRQ] = {
+		.destroy = iommufd_virq_destroy,
+		.abort = iommufd_virq_abort,
+	},
 	[IOMMUFD_OBJ_VIOMMU] = {
 		.destroy = iommufd_viommu_destroy,
 	},
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 69b88e8c7c26..075b6aed79bc 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -59,6 +59,8 @@  int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
 	viommu->ictx = ucmd->ictx;
 	viommu->hwpt = hwpt_paging;
 	refcount_inc(&viommu->hwpt->common.obj.users);
+	INIT_LIST_HEAD(&viommu->virqs);
+	init_rwsem(&viommu->virqs_rwsem);
 	/*
 	 * It is the most likely case that a physical IOMMU is unpluggable. A
 	 * pluggable IOMMU instance (if exists) is responsible for refcounting