diff mbox series

[04/14] iommufd: Use the iommufd_group to avoid duplicate reserved groups and msi setup

Message ID 4-v1-7612f88c19f5+2f21-iommufd_alloc_jgg@nvidia.com
State New
Headers show
Series Add iommufd physical device operations for replace and alloc hwpt | expand

Commit Message

Jason Gunthorpe Feb. 25, 2023, 12:27 a.m. UTC
These items only needs to be done once per group, not once per device. The
once per device was a way to make the device list work. Since we are
abandoning this we can optimize things a bit.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c          | 38 ++++++++++++++-----------
 drivers/iommu/iommufd/io_pagetable.c    |  5 ++--
 drivers/iommu/iommufd/iommufd_private.h |  1 -
 3 files changed, 23 insertions(+), 21 deletions(-)

Comments

Tian, Kevin March 2, 2023, 8:06 a.m. UTC | #1
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, February 25, 2023 8:28 AM
> 
> These items only needs to be done once per group, not once per device. The
> once per device was a way to make the device list work. Since we are
> abandoning this we can optimize things a bit.
> 

Are we sure that a device hot-plugged into the group won't increase the
list of reserved IOVA ranges?

Currently iommu_get_group_resv_regions() queries reserved regions per
device and then merge them into the specified list.

Of course VFIO cannot cope with it since its group_attach() is done only
once. But if such scenario does exist then the current per-device
reservation in iommufd looks an improvement.
Jason Gunthorpe March 2, 2023, 12:55 p.m. UTC | #2
On Thu, Mar 02, 2023 at 08:06:27AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Saturday, February 25, 2023 8:28 AM
> > 
> > These items only needs to be done once per group, not once per device. The
> > once per device was a way to make the device list work. Since we are
> > abandoning this we can optimize things a bit.
> > 
> 
> Are we sure that a device hot-plugged into the group won't increase the
> list of reserved IOVA ranges?
> 
> Currently iommu_get_group_resv_regions() queries reserved regions per
> device and then merge them into the specified list.

So, maybe we should export the device API from the core code and use
it here instead of this group stuff. We don't need the core code to
merge each device together for us anyhow.

Jason
Tian, Kevin March 3, 2023, 2:16 a.m. UTC | #3
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Thursday, March 2, 2023 8:55 PM
> 
> On Thu, Mar 02, 2023 at 08:06:27AM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Saturday, February 25, 2023 8:28 AM
> > >
> > > These items only needs to be done once per group, not once per device.
> The
> > > once per device was a way to make the device list work. Since we are
> > > abandoning this we can optimize things a bit.
> > >
> >
> > Are we sure that a device hot-plugged into the group won't increase the
> > list of reserved IOVA ranges?
> >
> > Currently iommu_get_group_resv_regions() queries reserved regions per
> > device and then merge them into the specified list.
> 
> So, maybe we should export the device API from the core code and use
> it here instead of this group stuff. We don't need the core code to
> merge each device together for us anyhow.
> 

Yes, that'd be clearer in this context.
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 264bfa2212481f..75e8d79678736f 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -298,9 +298,20 @@  int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 		}
 	}
 
-	rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev,
-						   idev->igroup->group,
-						   &sw_msi_start);
+	/*
+	 * The first device in the group to be attached will do all the work
+	 * to setup the hwpt and ioas. Every other device re-uses it through
+	 * the shared group attachment. Users are allowed/expected to attach
+	 * every device in the group to the same hwpt, that just turns into
+	 * a NOP.
+	 */
+	if (idev->igroup->devices) {
+		idev->igroup->devices++;
+		return 0;
+	}
+
+	rc = iopt_table_enforce_group_resv_regions(
+		&hwpt->ioas->iopt, idev->igroup->group, &sw_msi_start);
 	if (rc)
 		return rc;
 
@@ -308,22 +319,15 @@  int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
 	if (rc)
 		goto err_unresv;
 
-	/*
-	 * Only attach to the group once for the first device that is in the
-	 * group. All the other devices will follow this attachment.
-	 * The user can attach every device individually as well.
-	 */
-	if (!idev->igroup->devices) {
-		rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
-		if (rc)
-			goto err_unresv;
-		idev->igroup->hwpt = hwpt;
-		refcount_inc(&hwpt->obj.users);
-	}
+	rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
+	if (rc)
+		goto err_unresv;
+	idev->igroup->hwpt = hwpt;
+	refcount_inc(&hwpt->obj.users);
 	idev->igroup->devices++;
 	return 0;
 err_unresv:
-	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->igroup->group);
 	return rc;
 }
 
@@ -339,7 +343,7 @@  iommufd_hw_pagetable_detach(struct iommufd_device *idev)
 		iommu_detach_group(hwpt->domain, idev->igroup->group);
 		idev->igroup->hwpt = NULL;
 	}
-	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->igroup->group);
 	return hwpt;
 }
 
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index e0ae72b9e67f86..096491bbb5acf5 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -1163,7 +1163,6 @@  void iopt_remove_access(struct io_pagetable *iopt,
 
 /* Narrow the valid_iova_itree to include reserved ranges from a group. */
 int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
-					  struct device *device,
 					  struct iommu_group *group,
 					  phys_addr_t *sw_msi_start)
 {
@@ -1191,7 +1190,7 @@  int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
 		}
 
 		rc = iopt_reserve_iova(iopt, resv->start,
-				       resv->length - 1 + resv->start, device);
+				       resv->length - 1 + resv->start, group);
 		if (rc)
 			goto out_reserved;
 	}
@@ -1206,7 +1205,7 @@  int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
 	goto out_free_resv;
 
 out_reserved:
-	__iopt_remove_reserved_iova(iopt, device);
+	__iopt_remove_reserved_iova(iopt, group);
 out_free_resv:
 	list_for_each_entry_safe(resv, tmp, &group_resv_regions, list)
 		kfree(resv);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 5f3ad16da819e7..dbecdff013d082 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -77,7 +77,6 @@  int iopt_table_add_domain(struct io_pagetable *iopt,
 void iopt_table_remove_domain(struct io_pagetable *iopt,
 			      struct iommu_domain *domain);
 int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
-					  struct device *device,
 					  struct iommu_group *group,
 					  phys_addr_t *sw_msi_start);
 int iopt_set_allow_iova(struct io_pagetable *iopt,