diff mbox series

drm/msm/iommu: optimize map/unmap

Message ID 20220822184742.32076-1-robdclark@gmail.com
State Superseded
Headers show
Series drm/msm/iommu: optimize map/unmap | expand

Commit Message

Rob Clark Aug. 22, 2022, 6:47 p.m. UTC
From: Rob Clark <robdclark@chromium.org>

Using map_pages/unmap_pages cuts down on the # of pgtable walks needed
in the process of finding where to insert/remove an entry.  The end
result is ~5-10x faster than mapping a single page at a time.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_iommu.c | 91 ++++++++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 12 deletions(-)

Comments

Sai Prakash Ranjan Aug. 23, 2022, 7:25 a.m. UTC | #1
Hi Rob,

On 8/23/2022 12:17 AM, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
>
> Using map_pages/unmap_pages cuts down on the # of pgtable walks needed
> in the process of finding where to insert/remove an entry.  The end
> result is ~5-10x faster than mapping a single page at a time.
>
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>   drivers/gpu/drm/msm/msm_iommu.c | 91 ++++++++++++++++++++++++++++-----
>   1 file changed, 79 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
> index a54ed354578b..0f3f60da3314 100644
> --- a/drivers/gpu/drm/msm/msm_iommu.c
> +++ b/drivers/gpu/drm/msm/msm_iommu.c
> @@ -21,6 +21,7 @@ struct msm_iommu_pagetable {
>   	struct msm_mmu base;
>   	struct msm_mmu *parent;
>   	struct io_pgtable_ops *pgtbl_ops;
> +	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
>   	phys_addr_t ttbr;
>   	u32 asid;
>   };
> @@ -29,23 +30,85 @@ static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu)
>   	return container_of(mmu, struct msm_iommu_pagetable, base);
>   }
>   
> +/* based on iommu_pgsize() in iommu.c: */
> +static size_t iommu_pgsize(struct msm_iommu_pagetable *pagetable,

Maybe call this msm_iommu_pgsize? There won't be any namespace conflict since it is static
in both places but still would be better.

> +			   unsigned long iova, phys_addr_t paddr,
> +			   size_t size, size_t *count)
> +{
> +	unsigned int pgsize_idx, pgsize_idx_next;
> +	unsigned long pgsizes;
> +	size_t offset, pgsize, pgsize_next;
> +	unsigned long addr_merge = paddr | iova;
> +
> +	/* Page sizes supported by the hardware and small enough for @size */
> +	pgsizes = pagetable->pgsize_bitmap & GENMASK(__fls(size), 0);
> +
> +	/* Constrain the page sizes further based on the maximum alignment */
> +	if (likely(addr_merge))
> +		pgsizes &= GENMASK(__ffs(addr_merge), 0);
> +
> +	/* Make sure we have at least one suitable page size */
> +	BUG_ON(!pgsizes);
> +
> +	/* Pick the biggest page size remaining */
> +	pgsize_idx = __fls(pgsizes);
> +	pgsize = BIT(pgsize_idx);
> +	if (!count)
> +		return pgsize;
> +
> +	/* Find the next biggest support page size, if it exists */
> +	pgsizes = pagetable->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
> +	if (!pgsizes)
> +		goto out_set_count;
> +
> +	pgsize_idx_next = __ffs(pgsizes);
> +	pgsize_next = BIT(pgsize_idx_next);
> +
> +	/*
> +	 * There's no point trying a bigger page size unless the virtual
> +	 * and physical addresses are similarly offset within the larger page.
> +	 */
> +	if ((iova ^ paddr) & (pgsize_next - 1))
> +		goto out_set_count;
> +
> +	/* Calculate the offset to the next page size alignment boundary */
> +	offset = pgsize_next - (addr_merge & (pgsize_next - 1));
> +
> +	/*
> +	 * If size is big enough to accommodate the larger page, reduce
> +	 * the number of smaller pages.
> +	 */
> +	if (offset + pgsize_next <= size)
> +		size = offset;
> +
> +out_set_count:
> +	*count = size >> pgsize_idx;
> +	return pgsize;
> +}
> +
>   static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
>   		size_t size)
>   {
>   	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
>   	struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
> -	size_t unmapped = 0;
>   
>   	/* Unmap the block one page at a time */

This comment will need an update.

>   	while (size) {
> -		unmapped += ops->unmap(ops, iova, 4096, NULL);
> -		iova += 4096;
> -		size -= 4096;
> +		size_t unmapped, pgsize, count;
> +
> +		pgsize = iommu_pgsize(pagetable, iova, iova, size, &count);
> +
> +		unmapped = ops->unmap_pages(ops, iova, pgsize, count, NULL);
> +		if (!unmapped)
> +			break;
> +
> +		iova += unmapped;
> +		size -= unmapped;
>   	}
>   
>   	iommu_flush_iotlb_all(to_msm_iommu(pagetable->parent)->domain);
>   
> -	return (unmapped == size) ? 0 : -EINVAL;
> +	return (size == 0) ? 0 : -EINVAL;
>   }
>   
>   static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
> @@ -54,7 +117,6 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
>   	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
>   	struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
>   	struct scatterlist *sg;
> -	size_t mapped = 0;
>   	u64 addr = iova;
>   	unsigned int i;
>   
> @@ -64,15 +126,19 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
>   
>   		/* Map the block one page at a time */

This comment will need an update.

>   		while (size) {
> -			if (ops->map(ops, addr, phys, 4096, prot, GFP_KERNEL)) {
> -				msm_iommu_pagetable_unmap(mmu, iova, mapped);
> +			size_t pgsize, count, mapped;
> +
> +			pgsize = iommu_pgsize(pagetable, addr, phys, size, &count);
> +
> +			if (ops->map_pages(ops, addr, phys, pgsize, count,
> +					   prot, GFP_KERNEL, &mapped)) {
> +				msm_iommu_pagetable_unmap(mmu, iova, addr - iova);

On ->map_pages failure, some pages can still be mapped and would need to be accounted for unmapping,
so maybe follow the logic in __iommu_map() to account for mapped size instead of addr - iova where
addr won't be updated in case of failure to map few pages.

Thanks,
Sai

>   				return -EINVAL;
>   			}
>   
> -			phys += 4096;
> -			addr += 4096;
> -			size -= 4096;
> -			mapped += 4096;
> +			phys += mapped;
> +			addr += mapped;
> +			size -= mapped;
>   		}
>   	}
>   
> @@ -207,6 +273,7 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
>   
>   	/* Needed later for TLB flush */
>   	pagetable->parent = parent;
> +	pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap;
>   	pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr;
>   
>   	/*
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index a54ed354578b..0f3f60da3314 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -21,6 +21,7 @@  struct msm_iommu_pagetable {
 	struct msm_mmu base;
 	struct msm_mmu *parent;
 	struct io_pgtable_ops *pgtbl_ops;
+	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
 	phys_addr_t ttbr;
 	u32 asid;
 };
@@ -29,23 +30,85 @@  static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu)
 	return container_of(mmu, struct msm_iommu_pagetable, base);
 }
 
+/* based on iommu_pgsize() in iommu.c: */
+static size_t iommu_pgsize(struct msm_iommu_pagetable *pagetable,
+			   unsigned long iova, phys_addr_t paddr,
+			   size_t size, size_t *count)
+{
+	unsigned int pgsize_idx, pgsize_idx_next;
+	unsigned long pgsizes;
+	size_t offset, pgsize, pgsize_next;
+	unsigned long addr_merge = paddr | iova;
+
+	/* Page sizes supported by the hardware and small enough for @size */
+	pgsizes = pagetable->pgsize_bitmap & GENMASK(__fls(size), 0);
+
+	/* Constrain the page sizes further based on the maximum alignment */
+	if (likely(addr_merge))
+		pgsizes &= GENMASK(__ffs(addr_merge), 0);
+
+	/* Make sure we have at least one suitable page size */
+	BUG_ON(!pgsizes);
+
+	/* Pick the biggest page size remaining */
+	pgsize_idx = __fls(pgsizes);
+	pgsize = BIT(pgsize_idx);
+	if (!count)
+		return pgsize;
+
+	/* Find the next biggest support page size, if it exists */
+	pgsizes = pagetable->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+	if (!pgsizes)
+		goto out_set_count;
+
+	pgsize_idx_next = __ffs(pgsizes);
+	pgsize_next = BIT(pgsize_idx_next);
+
+	/*
+	 * There's no point trying a bigger page size unless the virtual
+	 * and physical addresses are similarly offset within the larger page.
+	 */
+	if ((iova ^ paddr) & (pgsize_next - 1))
+		goto out_set_count;
+
+	/* Calculate the offset to the next page size alignment boundary */
+	offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+	/*
+	 * If size is big enough to accommodate the larger page, reduce
+	 * the number of smaller pages.
+	 */
+	if (offset + pgsize_next <= size)
+		size = offset;
+
+out_set_count:
+	*count = size >> pgsize_idx;
+	return pgsize;
+}
+
 static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
 		size_t size)
 {
 	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
 	struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
-	size_t unmapped = 0;
 
 	/* Unmap the block one page at a time */
 	while (size) {
-		unmapped += ops->unmap(ops, iova, 4096, NULL);
-		iova += 4096;
-		size -= 4096;
+		size_t unmapped, pgsize, count;
+
+		pgsize = iommu_pgsize(pagetable, iova, iova, size, &count);
+
+		unmapped = ops->unmap_pages(ops, iova, pgsize, count, NULL);
+		if (!unmapped)
+			break;
+
+		iova += unmapped;
+		size -= unmapped;
 	}
 
 	iommu_flush_iotlb_all(to_msm_iommu(pagetable->parent)->domain);
 
-	return (unmapped == size) ? 0 : -EINVAL;
+	return (size == 0) ? 0 : -EINVAL;
 }
 
 static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
@@ -54,7 +117,6 @@  static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
 	struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
 	struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
 	struct scatterlist *sg;
-	size_t mapped = 0;
 	u64 addr = iova;
 	unsigned int i;
 
@@ -64,15 +126,19 @@  static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
 
 		/* Map the block one page at a time */
 		while (size) {
-			if (ops->map(ops, addr, phys, 4096, prot, GFP_KERNEL)) {
-				msm_iommu_pagetable_unmap(mmu, iova, mapped);
+			size_t pgsize, count, mapped;
+
+			pgsize = iommu_pgsize(pagetable, addr, phys, size, &count);
+
+			if (ops->map_pages(ops, addr, phys, pgsize, count,
+					   prot, GFP_KERNEL, &mapped)) {
+				msm_iommu_pagetable_unmap(mmu, iova, addr - iova);
 				return -EINVAL;
 			}
 
-			phys += 4096;
-			addr += 4096;
-			size -= 4096;
-			mapped += 4096;
+			phys += mapped;
+			addr += mapped;
+			size -= mapped;
 		}
 	}
 
@@ -207,6 +273,7 @@  struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
 
 	/* Needed later for TLB flush */
 	pagetable->parent = parent;
+	pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap;
 	pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr;
 
 	/*