diff mbox series

[v3,01/10] iommufd: Add data structure for Intel VT-d stage-1 domain allocation

Message ID 20230511145110.27707-2-yi.l.liu@intel.com
State New
Headers show
Series Add Intel VT-d nested translation | expand

Commit Message

Yi Liu May 11, 2023, 2:51 p.m. UTC
This adds IOMMU_HWPT_TYPE_VTD_S1 for stage-1 hw_pagetable of Intel VT-d
and the corressponding data structure for userspace specified parameter
for the domain allocation.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 include/uapi/linux/iommufd.h | 57 ++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

Comments

Zhang, Tina May 25, 2023, 2:28 a.m. UTC | #1
Hi,

> -----Original Message-----
> From: Yi Liu <yi.l.liu@intel.com>
> Sent: Thursday, May 11, 2023 10:51 PM
> To: joro@8bytes.org; alex.williamson@redhat.com; jgg@nvidia.com; Tian,
> Kevin <kevin.tian@intel.com>; robin.murphy@arm.com;
> baolu.lu@linux.intel.com
> Cc: cohuck@redhat.com; eric.auger@redhat.com; nicolinc@nvidia.com;
> kvm@vger.kernel.org; mjrosato@linux.ibm.com;
> chao.p.peng@linux.intel.com; Liu, Yi L <yi.l.liu@intel.com>;
> yi.y.sun@linux.intel.com; peterx@redhat.com; jasowang@redhat.com;
> shameerali.kolothum.thodi@huawei.com; lulu@redhat.com;
> suravee.suthikulpanit@amd.com; iommu@lists.linux.dev; linux-
> kernel@vger.kernel.org; linux-kselftest@vger.kernel.org; Duan, Zhenzhong
> <zhenzhong.duan@intel.com>
> Subject: [PATCH v3 01/10] iommufd: Add data structure for Intel VT-d stage-1
> domain allocation
> 
> This adds IOMMU_HWPT_TYPE_VTD_S1 for stage-1 hw_pagetable of Intel VT-
> d and the corressponding data structure for userspace specified parameter
> for the domain allocation.
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>  include/uapi/linux/iommufd.h | 57
> ++++++++++++++++++++++++++++++++++++
>  1 file changed, 57 insertions(+)
> 
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index 06dcad6ab082..c2658394827a 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -353,9 +353,64 @@ struct iommu_vfio_ioas {
>  /**
>   * enum iommu_hwpt_type - IOMMU HWPT Type
>   * @IOMMU_HWPT_TYPE_DEFAULT: default
> + * @IOMMU_HWPT_TYPE_VTD_S1: Intel VT-d stage-1 page table
>   */
>  enum iommu_hwpt_type {
>  	IOMMU_HWPT_TYPE_DEFAULT,
> +	IOMMU_HWPT_TYPE_VTD_S1,
> +};
> +
> +/**
> + * enum iommu_hwpt_intel_vtd_flags - Intel VT-d stage-1 page
> + *				     table entry attributes
> + * @IOMMU_VTD_PGTBL_SRE: Supervisor request
> + * @IOMMU_VTD_PGTBL_EAFE: Extended access enable
> + * @IOMMU_VTD_PGTBL_PCD: Page-level cache disable
> + * @IOMMU_VTD_PGTBL_PWT: Page-level write through
> + * @IOMMU_VTD_PGTBL_EMTE: Extended mem type enable
> + * @IOMMU_VTD_PGTBL_CD: PASID-level cache disable
> + * @IOMMU_VTD_PGTBL_WPE: Write protect enable  */ enum
> +iommu_hwpt_intel_vtd_flags {
> +	IOMMU_VTD_PGTBL_SRE = 1 << 0,
> +	IOMMU_VTD_PGTBL_EAFE = 1 << 1,
> +	IOMMU_VTD_PGTBL_PCD = 1 << 2,
> +	IOMMU_VTD_PGTBL_PWT = 1 << 3,
> +	IOMMU_VTD_PGTBL_EMTE = 1 << 4,
> +	IOMMU_VTD_PGTBL_CD = 1 << 5,
> +	IOMMU_VTD_PGTBL_WPE = 1 << 6,
> +	IOMMU_VTD_PGTBL_LAST = 1 << 7,
> +};
> +
> +/**
> + * struct iommu_hwpt_intel_vtd - Intel VT-d specific user-managed
> + *                               stage-1 page table info
> + * @flags: Combination of enum iommu_hwpt_intel_vtd_flags
> + * @pgtbl_addr: The base address of the user-managed stage-1 page table.
> + * @pat: Page attribute table data to compute effective memory type
> + * @emt: Extended memory type
> + * @addr_width: The address width of the untranslated addresses that are
> + *              subjected to the user-managed stage-1 page table.
> + * @__reserved: Must be 0
> + *
> + * The Intel VT-d specific data for creating hw_pagetable to represent
> + * the user-managed stage-1 page table that is used in nested translation.
> + *
> + * In nested translation, the stage-1 page table locates in the address
> + * space that defined by the corresponding stage-2 page table. Hence
> +the
> + * stage-1 page table base address value should not be higher than the
> + * maximum untranslated address of stage-2 page table.
> + *
> + * The paging level of the stage-1 page table should be compatible with
> + * the hardware iommu. Otherwise, the allocation would be failed.
> + */
> +struct iommu_hwpt_intel_vtd {
> +	__u64 flags;
> +	__u64 pgtbl_addr;
> +	__u32 pat;
> +	__u32 emt;
Do we need the emt field as part of the stage-1 page table info? IIUC, according to vt-d spec, the emt field is in stage-2 page table entries. Since in nested mode we only expose stage-1 page table to guest vt-d driver, I'm curious how does guest vt-d driver populate this EMT?

Thanks
-Tina
> +	__u32 addr_width;
> +	__u32 __reserved;
>  };
> 
>  /**
> @@ -391,6 +446,8 @@ enum iommu_hwpt_type {
>   * +------------------------------+-------------------------------------+-----------+
>   * | IOMMU_HWPT_TYPE_DEFAULT      |               N/A                   |    IOAS   |
>   * +------------------------------+-------------------------------------+-----------+
> + * | IOMMU_HWPT_TYPE_VTD_S1       |      struct iommu_hwpt_intel_vtd    |
> HWPT   |
> + *
> + +------------------------------+-------------------------------------+
> + -----------+
>   */
>  struct iommu_hwpt_alloc {
>  	__u32 size;
> --
> 2.34.1
>
Jason Gunthorpe May 29, 2023, 7:53 p.m. UTC | #2
On Thu, May 11, 2023 at 07:51:01AM -0700, Yi Liu wrote:
> This adds IOMMU_HWPT_TYPE_VTD_S1 for stage-1 hw_pagetable of Intel VT-d
> +/**
> + * struct iommu_hwpt_intel_vtd - Intel VT-d specific user-managed
> + *                               stage-1 page table info
> + * @flags: Combination of enum iommu_hwpt_intel_vtd_flags
> + * @pgtbl_addr: The base address of the user-managed stage-1 page table.
> + * @pat: Page attribute table data to compute effective memory type
> + * @emt: Extended memory type
> + * @addr_width: The address width of the untranslated addresses that are
> + *              subjected to the user-managed stage-1 page table.
> + * @__reserved: Must be 0
> + *
> + * The Intel VT-d specific data for creating hw_pagetable to represent
> + * the user-managed stage-1 page table that is used in nested translation.
> + *
> + * In nested translation, the stage-1 page table locates in the address
> + * space that defined by the corresponding stage-2 page table. Hence the
> + * stage-1 page table base address value should not be higher than the
> + * maximum untranslated address of stage-2 page table.
> + *
> + * The paging level of the stage-1 page table should be compatible with
> + * the hardware iommu. Otherwise, the allocation would be failed.
> + */
> +struct iommu_hwpt_intel_vtd {
> +	__u64 flags;
> +	__u64 pgtbl_addr;

__aligned_u64

> +	__u32 pat;
> +	__u32 emt;
> +	__u32 addr_width;
> +	__u32 __reserved;
>  };
>  
>  /**
> @@ -391,6 +446,8 @@ enum iommu_hwpt_type {
>   * +------------------------------+-------------------------------------+-----------+
>   * | IOMMU_HWPT_TYPE_DEFAULT      |               N/A                   |    IOAS   |
>   * +------------------------------+-------------------------------------+-----------+
> + * | IOMMU_HWPT_TYPE_VTD_S1       |      struct iommu_hwpt_intel_vtd    |    HWPT   |
> + * +------------------------------+-------------------------------------+-----------+

Please don't make ascii art tables.

Note beside the struct what enum it is for

Jason
Jason Gunthorpe May 29, 2023, 8 p.m. UTC | #3
On Thu, May 25, 2023 at 02:28:18AM +0000, Zhang, Tina wrote:

> > +struct iommu_hwpt_intel_vtd {
> > +	__u64 flags;
> > +	__u64 pgtbl_addr;
> > +	__u32 pat;
> > +	__u32 emt;

> Do we need the emt field as part of the stage-1 page table info?
> IIUC, according to vt-d spec, the emt field is in stage-2 page table
> entries. Since in nested mode we only expose stage-1 page table to
> guest vt-d driver, I'm curious how does guest vt-d driver populate
> this EMT?

Indeed. The EMT is controlling how the iommu HW parses memory that is
controlled by the kernel - this simply should not be something that
userspace controls.

The S2 itself has to decide if it is populating the EMT bits in the
IOPTE and if so it could enable EMT. Does userspace need to be
involved here?

The seemingly more tricky thing is that it feels like EMT and PAT
would like to be per-iova and we don't have a means for that right
now. (and even if we did, how would qemu decide what to do ?)

So probably drop it.

Jason
diff mbox series

Patch

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 06dcad6ab082..c2658394827a 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -353,9 +353,64 @@  struct iommu_vfio_ioas {
 /**
  * enum iommu_hwpt_type - IOMMU HWPT Type
  * @IOMMU_HWPT_TYPE_DEFAULT: default
+ * @IOMMU_HWPT_TYPE_VTD_S1: Intel VT-d stage-1 page table
  */
 enum iommu_hwpt_type {
 	IOMMU_HWPT_TYPE_DEFAULT,
+	IOMMU_HWPT_TYPE_VTD_S1,
+};
+
+/**
+ * enum iommu_hwpt_intel_vtd_flags - Intel VT-d stage-1 page
+ *				     table entry attributes
+ * @IOMMU_VTD_PGTBL_SRE: Supervisor request
+ * @IOMMU_VTD_PGTBL_EAFE: Extended access enable
+ * @IOMMU_VTD_PGTBL_PCD: Page-level cache disable
+ * @IOMMU_VTD_PGTBL_PWT: Page-level write through
+ * @IOMMU_VTD_PGTBL_EMTE: Extended mem type enable
+ * @IOMMU_VTD_PGTBL_CD: PASID-level cache disable
+ * @IOMMU_VTD_PGTBL_WPE: Write protect enable
+ */
+enum iommu_hwpt_intel_vtd_flags {
+	IOMMU_VTD_PGTBL_SRE = 1 << 0,
+	IOMMU_VTD_PGTBL_EAFE = 1 << 1,
+	IOMMU_VTD_PGTBL_PCD = 1 << 2,
+	IOMMU_VTD_PGTBL_PWT = 1 << 3,
+	IOMMU_VTD_PGTBL_EMTE = 1 << 4,
+	IOMMU_VTD_PGTBL_CD = 1 << 5,
+	IOMMU_VTD_PGTBL_WPE = 1 << 6,
+	IOMMU_VTD_PGTBL_LAST = 1 << 7,
+};
+
+/**
+ * struct iommu_hwpt_intel_vtd - Intel VT-d specific user-managed
+ *                               stage-1 page table info
+ * @flags: Combination of enum iommu_hwpt_intel_vtd_flags
+ * @pgtbl_addr: The base address of the user-managed stage-1 page table.
+ * @pat: Page attribute table data to compute effective memory type
+ * @emt: Extended memory type
+ * @addr_width: The address width of the untranslated addresses that are
+ *              subjected to the user-managed stage-1 page table.
+ * @__reserved: Must be 0
+ *
+ * The Intel VT-d specific data for creating hw_pagetable to represent
+ * the user-managed stage-1 page table that is used in nested translation.
+ *
+ * In nested translation, the stage-1 page table locates in the address
+ * space that defined by the corresponding stage-2 page table. Hence the
+ * stage-1 page table base address value should not be higher than the
+ * maximum untranslated address of stage-2 page table.
+ *
+ * The paging level of the stage-1 page table should be compatible with
+ * the hardware iommu. Otherwise, the allocation would be failed.
+ */
+struct iommu_hwpt_intel_vtd {
+	__u64 flags;
+	__u64 pgtbl_addr;
+	__u32 pat;
+	__u32 emt;
+	__u32 addr_width;
+	__u32 __reserved;
 };
 
 /**
@@ -391,6 +446,8 @@  enum iommu_hwpt_type {
  * +------------------------------+-------------------------------------+-----------+
  * | IOMMU_HWPT_TYPE_DEFAULT      |               N/A                   |    IOAS   |
  * +------------------------------+-------------------------------------+-----------+
+ * | IOMMU_HWPT_TYPE_VTD_S1       |      struct iommu_hwpt_intel_vtd    |    HWPT   |
+ * +------------------------------+-------------------------------------+-----------+
  */
 struct iommu_hwpt_alloc {
 	__u32 size;