diff mbox

[v4,00/13] iommu: Add PASID support to Arm SMMUv3

Message ID 20191219163033.2608177-1-jean-philippe@linaro.org
State New
Headers show

Commit Message

Jean-Philippe Brucker Dec. 19, 2019, 4:30 p.m. UTC
Add support for Substream ID and PASIDs to the SMMUv3 driver. Since v3
[1], I added review and tested tags where appropriate and applied the
suggested changes, shown in the diff below. Thanks all!

I'm testing using the zip accelerator on the Hisilicon KunPeng920 and
Zhangfei's uacce module [2]. The full SVA support, which I'll send out
early next year, is available on my branch sva/zip-devel at
https://jpbrucker.net/git/linux/

[1] https://lore.kernel.org/linux-iommu/20191209180514.272727-1-jean-philippe@linaro.org/
[2] https://lore.kernel.org/linux-iommu/1576465697-27946-1-git-send-email-zhangfei.gao@linaro.org/

Jean-Philippe Brucker (13):
  iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation
  dt-bindings: document PASID property for IOMMU masters
  iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices
  ACPI/IORT: Parse SSID property of named component node
  iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support
  iommu/arm-smmu-v3: Add context descriptor tables allocators
  iommu/arm-smmu-v3: Add support for Substream IDs
  iommu/arm-smmu-v3: Propagate ssid_bits
  iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc()
    failure
  iommu/arm-smmu-v3: Add second level of context descriptor table
  iommu/arm-smmu-v3: Improve add_device() error handling
  PCI/ATS: Add PASID stubs
  iommu/arm-smmu-v3: Add support for PCI PASID

 .../devicetree/bindings/iommu/iommu.txt       |   6 +
 drivers/acpi/arm64/iort.c                     |  18 +
 drivers/iommu/arm-smmu-v3.c                   | 467 +++++++++++++++---
 drivers/iommu/of_iommu.c                      |   6 +-
 include/linux/iommu.h                         |   2 +
 include/linux/pci-ats.h                       |   3 +
 6 files changed, 442 insertions(+), 60 deletions(-)

-- 
Diff since v3:
#diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index cde7af39681c..8e95ecad4c9a 100644

Comments

Eric Auger Dec. 20, 2019, 7:37 a.m. UTC | #1
Hi Jean,

On 12/19/19 5:30 PM, Jean-Philippe Brucker wrote:
> The SMMU can support up to 20 bits of SSID. Add a second level of page

> tables to accommodate this. Devices that support more than 1024 SSIDs now

> have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context

> descriptors (64kB), allocated on demand.

> 

> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>

> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>

Reviewed-by: Eric Auger <eric.auger@redhat.com>


Thanks

Eric

> ---

>  drivers/iommu/arm-smmu-v3.c | 154 +++++++++++++++++++++++++++++++++---

>  1 file changed, 144 insertions(+), 10 deletions(-)

> 

> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

> index b825a5639afc..bf106a7b53eb 100644

> --- a/drivers/iommu/arm-smmu-v3.c

> +++ b/drivers/iommu/arm-smmu-v3.c

> @@ -224,6 +224,7 @@

>  

>  #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)

>  #define STRTAB_STE_0_S1FMT_LINEAR	0

> +#define STRTAB_STE_0_S1FMT_64K_L2	2

>  #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)

>  #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)

>  

> @@ -263,7 +264,20 @@

>  

>  #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)

>  

> -/* Context descriptor (stage-1 only) */

> +/*

> + * Context descriptors.

> + *

> + * Linear: when less than 1024 SSIDs are supported

> + * 2lvl: at most 1024 L1 entries,

> + *       1024 lazy entries per table.

> + */

> +#define CTXDESC_SPLIT			10

> +#define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)

> +

> +#define CTXDESC_L1_DESC_DWORDS		1

> +#define CTXDESC_L1_DESC_VALID		1

> +#define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)

> +

>  #define CTXDESC_CD_DWORDS		8

>  #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)

>  #define ARM64_TCR_T0SZ			GENMASK_ULL(5, 0)

> @@ -575,7 +589,12 @@ struct arm_smmu_cd_table {

>  };

>  

>  struct arm_smmu_s1_cfg {

> -	struct arm_smmu_cd_table	table;

> +	/* Leaf tables or linear table */

> +	struct arm_smmu_cd_table	*tables;

> +	size_t				num_tables;

> +	/* First level tables, when two levels are used */

> +	__le64				*l1ptr;

> +	dma_addr_t			l1ptr_dma;

>  	struct arm_smmu_ctx_desc	cd;

>  	u8				s1fmt;

>  	u8				s1cdmax;

> @@ -1521,9 +1540,48 @@ static void arm_smmu_free_cd_leaf_table(struct arm_smmu_device *smmu,

>  {

>  	size_t size = num_entries * (CTXDESC_CD_DWORDS << 3);

>  

> +	if (!table->ptr)

> +		return;

>  	dmam_free_coherent(smmu->dev, size, table->ptr, table->ptr_dma);

>  }

>  

> +static void arm_smmu_write_cd_l1_desc(__le64 *dst,

> +				      struct arm_smmu_cd_table *table)

> +{

> +	u64 val = (table->ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |

> +		  CTXDESC_L1_DESC_VALID;

> +

> +	WRITE_ONCE(*dst, cpu_to_le64(val));

> +}

> +

> +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,

> +				   u32 ssid)

> +{

> +	__le64 *l1ptr;

> +	unsigned int idx;

> +	struct arm_smmu_cd_table *table;

> +	struct arm_smmu_device *smmu = smmu_domain->smmu;

> +	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;

> +

> +	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)

> +		return cfg->tables[0].ptr + ssid * CTXDESC_CD_DWORDS;

> +

> +	idx = ssid >> CTXDESC_SPLIT;

> +	table = &cfg->tables[idx];

> +	if (!table->ptr) {

> +		if (arm_smmu_alloc_cd_leaf_table(smmu, table,

> +						 CTXDESC_L2_ENTRIES))

> +			return NULL;

> +

> +		l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;

> +		arm_smmu_write_cd_l1_desc(l1ptr, table);

> +		/* An invalid L1CD can be cached */

> +		arm_smmu_sync_cd(smmu_domain, ssid, false);

> +	}

> +	idx = ssid & (CTXDESC_L2_ENTRIES - 1);

> +	return table->ptr + idx * CTXDESC_CD_DWORDS;

> +}

> +

>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)

>  {

>  	u64 val = 0;

> @@ -1556,8 +1614,14 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,

>  	u64 val;

>  	bool cd_live;

>  	struct arm_smmu_device *smmu = smmu_domain->smmu;

> -	__le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid *

> -			CTXDESC_CD_DWORDS;

> +	__le64 *cdptr;

> +

> +	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))

> +		return -E2BIG;

> +

> +	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);

> +	if (!cdptr)

> +		return -ENOMEM;

>  

>  	val = le64_to_cpu(cdptr[0]);

>  	cd_live = !!(val & CTXDESC_CD_0_V);

> @@ -1604,20 +1668,87 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,

>  

>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)

>  {

> +	int ret;

> +	size_t size = 0;

> +	size_t max_contexts;

>  	struct arm_smmu_device *smmu = smmu_domain->smmu;

>  	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;

>  

> -	cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;

> -	return arm_smmu_alloc_cd_leaf_table(smmu, &cfg->table,

> -					    1 << cfg->s1cdmax);

> +	max_contexts = 1 << cfg->s1cdmax;

> +

> +	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||

> +	    max_contexts <= CTXDESC_L2_ENTRIES) {

> +		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;

> +		cfg->num_tables = 1;

> +	} else {

> +		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;

> +		cfg->num_tables = DIV_ROUND_UP(max_contexts,

> +					       CTXDESC_L2_ENTRIES);

> +

> +		size = cfg->num_tables * (CTXDESC_L1_DESC_DWORDS << 3);

> +		cfg->l1ptr = dmam_alloc_coherent(smmu->dev, size,

> +						 &cfg->l1ptr_dma,

> +						 GFP_KERNEL);

> +		if (!cfg->l1ptr) {

> +			dev_warn(smmu->dev,

> +				 "failed to allocate L1 context table\n");

> +			return -ENOMEM;

> +		}

> +	}

> +

> +	cfg->tables = devm_kzalloc(smmu->dev, sizeof(struct arm_smmu_cd_table) *

> +				   cfg->num_tables, GFP_KERNEL);

> +	if (!cfg->tables) {

> +		ret = -ENOMEM;

> +		goto err_free_l1;

> +	}

> +

> +	/*

> +	 * Only allocate a leaf table for linear case. With two levels, leaf

> +	 * tables are allocated lazily.

> +	 */

> +	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {

> +		ret = arm_smmu_alloc_cd_leaf_table(smmu, &cfg->tables[0],

> +						   max_contexts);

> +		if (ret)

> +			goto err_free_tables;

> +	}

> +

> +	return 0;

> +

> +err_free_tables:

> +	devm_kfree(smmu->dev, cfg->tables);

> +	cfg->tables = NULL;

> +err_free_l1:

> +	if (cfg->l1ptr) {

> +		dmam_free_coherent(smmu->dev, size, cfg->l1ptr, cfg->l1ptr_dma);

> +		cfg->l1ptr = NULL;

> +		cfg->l1ptr_dma = 0;

> +	}

> +	return ret;

>  }

>  

>  static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)

>  {

> +	int i;

>  	struct arm_smmu_device *smmu = smmu_domain->smmu;

>  	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;

> +	size_t num_leaf_entries = 1 << cfg->s1cdmax;

> +	struct arm_smmu_cd_table *table = cfg->tables;

> +

> +	if (cfg->l1ptr) {

> +		size_t size = cfg->num_tables * (CTXDESC_L1_DESC_DWORDS << 3);

>  

> -	arm_smmu_free_cd_leaf_table(smmu, &cfg->table, 1 << cfg->s1cdmax);

> +		dmam_free_coherent(smmu->dev, size, cfg->l1ptr, cfg->l1ptr_dma);

> +		cfg->l1ptr = NULL;

> +		cfg->l1ptr_dma = 0;

> +		num_leaf_entries = CTXDESC_L2_ENTRIES;

> +	}

> +

> +	for (i = 0; i < cfg->num_tables; i++, table++)

> +		arm_smmu_free_cd_leaf_table(smmu, table, num_leaf_entries);

> +	devm_kfree(smmu->dev, cfg->tables);

> +	cfg->tables = NULL;

>  }

>  

>  /* Stream table manipulation functions */

> @@ -1737,6 +1868,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,

>  	}

>  

>  	if (s1_cfg) {

> +		dma_addr_t ptr_dma = s1_cfg->l1ptr ? s1_cfg->l1ptr_dma :

> +				     s1_cfg->tables[0].ptr_dma;

> +

>  		BUG_ON(ste_live);

>  		dst[1] = cpu_to_le64(

>  			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |

> @@ -1749,7 +1883,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,

>  		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))

>  			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);

>  

> -		val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |

> +		val |= (ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |

>  			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |

>  			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |

>  			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);

> @@ -2265,7 +2399,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)

>  	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {

>  		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;

>  

> -		if (cfg->table.ptr) {

> +		if (cfg->tables) {

>  			arm_smmu_free_cd_tables(smmu_domain);

>  			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);

>  		}

>
Jean-Philippe Brucker Jan. 9, 2020, 2:36 p.m. UTC | #2
Hi Will,

On Thu, Dec 19, 2019 at 05:30:20PM +0100, Jean-Philippe Brucker wrote:
> Add support for Substream ID and PASIDs to the SMMUv3 driver. Since v3
> [1], I added review and tested tags where appropriate and applied the
> suggested changes, shown in the diff below. Thanks all!
> 
> I'm testing using the zip accelerator on the Hisilicon KunPeng920 and
> Zhangfei's uacce module [2]. The full SVA support, which I'll send out
> early next year, is available on my branch sva/zip-devel at
> https://jpbrucker.net/git/linux/

Is there anything more I should do for the PASID support? Ideally I'd like
to get this in v5.6 so I can focus on the rest of the SVA work and on
performance improvements.

Thanks,
Jean

> 
> [1] https://lore.kernel.org/linux-iommu/20191209180514.272727-1-jean-philippe@linaro.org/
> [2] https://lore.kernel.org/linux-iommu/1576465697-27946-1-git-send-email-zhangfei.gao@linaro.org/
> 
> Jean-Philippe Brucker (13):
>   iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation
>   dt-bindings: document PASID property for IOMMU masters
>   iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices
>   ACPI/IORT: Parse SSID property of named component node
>   iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support
>   iommu/arm-smmu-v3: Add context descriptor tables allocators
>   iommu/arm-smmu-v3: Add support for Substream IDs
>   iommu/arm-smmu-v3: Propagate ssid_bits
>   iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc()
>     failure
>   iommu/arm-smmu-v3: Add second level of context descriptor table
>   iommu/arm-smmu-v3: Improve add_device() error handling
>   PCI/ATS: Add PASID stubs
>   iommu/arm-smmu-v3: Add support for PCI PASID
> 
>  .../devicetree/bindings/iommu/iommu.txt       |   6 +
>  drivers/acpi/arm64/iort.c                     |  18 +
>  drivers/iommu/arm-smmu-v3.c                   | 467 +++++++++++++++---
>  drivers/iommu/of_iommu.c                      |   6 +-
>  include/linux/iommu.h                         |   2 +
>  include/linux/pci-ats.h                       |   3 +
>  6 files changed, 442 insertions(+), 60 deletions(-)
> 
> -- 
> Diff since v3:
> #diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index cde7af39681c..8e95ecad4c9a 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -589,8 +589,10 @@ struct arm_smmu_cd_table {
>  };
> 
>  struct arm_smmu_s1_cfg {
> +	/* Leaf tables or linear table */
>  	struct arm_smmu_cd_table	*tables;
>  	size_t				num_tables;
> +	/* First level tables, when two levels are used */
>  	__le64				*l1ptr;
>  	dma_addr_t			l1ptr_dma;
>  	struct arm_smmu_ctx_desc	cd;
> @@ -1561,27 +1563,22 @@ static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> 
> -	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
> -		table = &cfg->tables[0];
> -		idx = ssid;
> -	} else {
> -		idx = ssid >> CTXDESC_SPLIT;
> -		if (idx >= cfg->num_tables)
> -			return NULL;
> +	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
> +		return cfg->tables[0].ptr + ssid * CTXDESC_CD_DWORDS;
> 
> -		table = &cfg->tables[idx];
> -		if (!table->ptr) {
> -			if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> -							 CTXDESC_L2_ENTRIES))
> -				return NULL;
> +	idx = ssid >> CTXDESC_SPLIT;
> +	table = &cfg->tables[idx];
> +	if (!table->ptr) {
> +		if (arm_smmu_alloc_cd_leaf_table(smmu, table,
> +						 CTXDESC_L2_ENTRIES))
> +			return NULL;
> 
> -			l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
> -			arm_smmu_write_cd_l1_desc(l1ptr, table);
> -			/* An invalid L1CD can be cached */
> -			arm_smmu_sync_cd(smmu_domain, ssid, false);
> -		}
> -		idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> +		l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
> +		arm_smmu_write_cd_l1_desc(l1ptr, table);
> +		/* An invalid L1CD can be cached */
> +		arm_smmu_sync_cd(smmu_domain, ssid, false);
>  	}
> +	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
>  	return table->ptr + idx * CTXDESC_CD_DWORDS;
>  }
> 
> @@ -1617,8 +1614,12 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  	u64 val;
>  	bool cd_live;
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
> -	__le64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
> +	__le64 *cdptr;
> 
> +	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
> +		return -E2BIG;
> +
> +	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
>  	if (!cdptr)
>  		return -ENOMEM;
> 
> @@ -1640,9 +1641,9 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  		cdptr[3] = cpu_to_le64(cd->mair);
> 
>  		/*
> -		 * STE is live, and the SMMU might fetch this CD at any
> -		 * time. Ensure that it observes the rest of the CD before we
> -		 * enable it.
> +		 * STE is live, and the SMMU might read dwords of this CD in any
> +		 * order. Ensure that it observes valid values before reading
> +		 * V=1.
>  		 */
>  		arm_smmu_sync_cd(smmu_domain, ssid, true);
> 
> @@ -1706,7 +1707,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
>  	 * Only allocate a leaf table for linear case. With two levels, leaf
>  	 * tables are allocated lazily.
>  	 */
> -	if (!cfg->l1ptr) {
> +	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
>  		ret = arm_smmu_alloc_cd_leaf_table(smmu, &cfg->tables[0],
>  						   max_contexts);
>  		if (ret)
> @@ -2657,17 +2658,21 @@ static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
> 
>  	features = pci_pasid_features(pdev);
>  	if (features < 0)
> -		return -ENODEV;
> +		return features;
> 
>  	num_pasids = pci_max_pasids(pdev);
>  	if (num_pasids <= 0)
> -		return -ENODEV;
> +		return num_pasids;
> 
>  	ret = pci_enable_pasid(pdev, features);
> -	if (!ret)
> -		master->ssid_bits = min_t(u8, ilog2(num_pasids),
> -					  master->smmu->ssid_bits);
> -	return ret;
> +	if (ret) {
> +		dev_err(&pdev->dev, "Failed to enable PASID\n");
> +		return ret;
> +	}
> +
> +	master->ssid_bits = min_t(u8, ilog2(num_pasids),
> +				  master->smmu->ssid_bits);
> +	return 0;
>  }
> 
>  static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
> 
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
Jean-Philippe Brucker Jan. 10, 2020, 7:15 a.m. UTC | #3
On Thu, Jan 09, 2020 at 02:41:01PM +0000, Will Deacon wrote:
> On Thu, Jan 09, 2020 at 03:36:18PM +0100, Jean-Philippe Brucker wrote:
> > On Thu, Dec 19, 2019 at 05:30:20PM +0100, Jean-Philippe Brucker wrote:
> > > Add support for Substream ID and PASIDs to the SMMUv3 driver. Since v3
> > > [1], I added review and tested tags where appropriate and applied the
> > > suggested changes, shown in the diff below. Thanks all!
> > > 
> > > I'm testing using the zip accelerator on the Hisilicon KunPeng920 and
> > > Zhangfei's uacce module [2]. The full SVA support, which I'll send out
> > > early next year, is available on my branch sva/zip-devel at
> > > https://jpbrucker.net/git/linux/
> > 
> > Is there anything more I should do for the PASID support? Ideally I'd like
> > to get this in v5.6 so I can focus on the rest of the SVA work and on
> > performance improvements.
> 
> Apologies, I'm just behind with review what with the timing of the new
> year. You're on the list, but I was hoping to get Robin's TCR stuff dusted
> off so that Jordan doesn't have to depend on patches languishing on the
> mailing list and there's also the nvidia stuff to review as well.
> 
> Going as fast as I can!

No worries, I just wanted to check that it didn't slip through the cracks.

Thanks,
Jean
diff mbox

Patch

--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -589,8 +589,10 @@  struct arm_smmu_cd_table {
 };

 struct arm_smmu_s1_cfg {
+	/* Leaf tables or linear table */
 	struct arm_smmu_cd_table	*tables;
 	size_t				num_tables;
+	/* First level tables, when two levels are used */
 	__le64				*l1ptr;
 	dma_addr_t			l1ptr_dma;
 	struct arm_smmu_ctx_desc	cd;
@@ -1561,27 +1563,22 @@  static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;

-	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
-		table = &cfg->tables[0];
-		idx = ssid;
-	} else {
-		idx = ssid >> CTXDESC_SPLIT;
-		if (idx >= cfg->num_tables)
-			return NULL;
+	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+		return cfg->tables[0].ptr + ssid * CTXDESC_CD_DWORDS;

-		table = &cfg->tables[idx];
-		if (!table->ptr) {
-			if (arm_smmu_alloc_cd_leaf_table(smmu, table,
-							 CTXDESC_L2_ENTRIES))
-				return NULL;
+	idx = ssid >> CTXDESC_SPLIT;
+	table = &cfg->tables[idx];
+	if (!table->ptr) {
+		if (arm_smmu_alloc_cd_leaf_table(smmu, table,
+						 CTXDESC_L2_ENTRIES))
+			return NULL;

-			l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
-			arm_smmu_write_cd_l1_desc(l1ptr, table);
-			/* An invalid L1CD can be cached */
-			arm_smmu_sync_cd(smmu_domain, ssid, false);
-		}
-		idx = ssid & (CTXDESC_L2_ENTRIES - 1);
+		l1ptr = cfg->l1ptr + idx * CTXDESC_L1_DESC_DWORDS;
+		arm_smmu_write_cd_l1_desc(l1ptr, table);
+		/* An invalid L1CD can be cached */
+		arm_smmu_sync_cd(smmu_domain, ssid, false);
 	}
+	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
 	return table->ptr + idx * CTXDESC_CD_DWORDS;
 }

@@ -1617,8 +1614,12 @@  static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 	u64 val;
 	bool cd_live;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	__le64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+	__le64 *cdptr;

+	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+		return -E2BIG;
+
+	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
 	if (!cdptr)
 		return -ENOMEM;

@@ -1640,9 +1641,9 @@  static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 		cdptr[3] = cpu_to_le64(cd->mair);

 		/*
-		 * STE is live, and the SMMU might fetch this CD at any
-		 * time. Ensure that it observes the rest of the CD before we
-		 * enable it.
+		 * STE is live, and the SMMU might read dwords of this CD in any
+		 * order. Ensure that it observes valid values before reading
+		 * V=1.
 		 */
 		arm_smmu_sync_cd(smmu_domain, ssid, true);

@@ -1706,7 +1707,7 @@  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
 	 * Only allocate a leaf table for linear case. With two levels, leaf
 	 * tables are allocated lazily.
 	 */
-	if (!cfg->l1ptr) {
+	if (cfg->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) {
 		ret = arm_smmu_alloc_cd_leaf_table(smmu, &cfg->tables[0],
 						   max_contexts);
 		if (ret)
@@ -2657,17 +2658,21 @@  static int arm_smmu_enable_pasid(struct arm_smmu_master *master)

 	features = pci_pasid_features(pdev);
 	if (features < 0)
-		return -ENODEV;
+		return features;

 	num_pasids = pci_max_pasids(pdev);
 	if (num_pasids <= 0)
-		return -ENODEV;
+		return num_pasids;

 	ret = pci_enable_pasid(pdev, features);
-	if (!ret)
-		master->ssid_bits = min_t(u8, ilog2(num_pasids),
-					  master->smmu->ssid_bits);
-	return ret;
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to enable PASID\n");
+		return ret;
+	}
+
+	master->ssid_bits = min_t(u8, ilog2(num_pasids),
+				  master->smmu->ssid_bits);
+	return 0;
 }

 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)