Message ID | 7-v1-6e8b3997c46d+89e-iommu_map_gfp_jgg@nvidia.com |
---|---|
State | New |
Headers | show |
Series | Let iommufd charge IOPTE allocations to the memory cgroup | expand |
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Saturday, January 7, 2023 12:43 AM > > @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct > dmar_domain *domain, > > return __domain_mapping(domain, first_vpfn, > first_vpfn, last_vpfn - first_vpfn + 1, > - DMA_PTE_READ|DMA_PTE_WRITE); > + DMA_PTE_READ|DMA_PTE_WRITE, > GFP_KERNEL); > } Baolu, can you help confirm whether switching from GFP_ATOMIC to GFP_KERNEL is OK in this path? it looks fine to me in a quick glance but want to be conservative here. > @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct > iommu_domain *domain, > > /* Cope with horrid API which requires us to unmap more than the > size argument if it happens to be a large-page mapping. */ > - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, > &level)); > + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, > &level, > + GFP_ATOMIC)); with level==0 it implies it's only lookup w/o pgtable allocation. From this angle it reads better to use a more relaxed gfp e.g. GFP_KERNEL here. > @@ -4392,7 +4397,8 @@ static phys_addr_t > intel_iommu_iova_to_phys(struct iommu_domain *domain, > int level = 0; > u64 phys = 0; > > - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, > &level); > + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, > &level, > + GFP_ATOMIC); ditto
On 2023/1/17 11:38, Tian, Kevin wrote: >> From: Jason Gunthorpe<jgg@nvidia.com> >> Sent: Saturday, January 7, 2023 12:43 AM >> >> @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct >> dmar_domain *domain, >> >> return __domain_mapping(domain, first_vpfn, >> first_vpfn, last_vpfn - first_vpfn + 1, >> - DMA_PTE_READ|DMA_PTE_WRITE); >> + DMA_PTE_READ|DMA_PTE_WRITE, >> GFP_KERNEL); >> } > Baolu, can you help confirm whether switching from GFP_ATOMIC to > GFP_KERNEL is OK in this path? it looks fine to me in a quick glance > but want to be conservative here. This is also good for me. The memory notifier callback runs in a process context and allowed to block. Best regards, baolu
On Tue, Jan 17, 2023 at 03:38:51AM +0000, Tian, Kevin wrote: > > From: Jason Gunthorpe <jgg@nvidia.com> > > Sent: Saturday, January 7, 2023 12:43 AM > > > > @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct > > dmar_domain *domain, > > > > return __domain_mapping(domain, first_vpfn, > > first_vpfn, last_vpfn - first_vpfn + 1, > > - DMA_PTE_READ|DMA_PTE_WRITE); > > + DMA_PTE_READ|DMA_PTE_WRITE, > > GFP_KERNEL); > > } > > Baolu, can you help confirm whether switching from GFP_ATOMIC to > GFP_KERNEL is OK in this path? it looks fine to me in a quick glance > but want to be conservative here. I checked it carefully myself as well, good to check again. > > @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct > > iommu_domain *domain, > > > > /* Cope with horrid API which requires us to unmap more than the > > size argument if it happens to be a large-page mapping. */ > > - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, > > &level)); > > + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, > > &level, > > + GFP_ATOMIC)); > > with level==0 it implies it's only lookup w/o pgtable allocation. From this > angle it reads better to use a more relaxed gfp e.g. GFP_KERNEL here. We should only write GFP_KERNEL if it is actually a sleepable context because it will be mighty confusing if it isn't. I couldn't tell what the context is so I left it as ATOMIC. You are correct this is only just a lookup and so the value is never used / doesn't matter. Jason
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e3807776971563..a1a66798e1f06c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -908,7 +908,8 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, #endif static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int *target_level) + unsigned long pfn, int *target_level, + gfp_t gfp) { struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); @@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid, GFP_ATOMIC); + tmp_page = alloc_pgtable_page(domain->nid, gfp); if (!tmp_page) return NULL; @@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain, while (start_pfn <= end_pfn) { if (!pte) - pte = pfn_to_dma_pte(domain, start_pfn, &level); + pte = pfn_to_dma_pte(domain, start_pfn, &level, + GFP_ATOMIC); if (dma_pte_present(pte)) { dma_pte_free_pagetable(domain, start_pfn, @@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain, static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) + unsigned long phys_pfn, unsigned long nr_pages, int prot, + gfp_t gfp) { struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; @@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl, + gfp); if (!pte) return -ENOMEM; first_pte = pte; @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, return __domain_mapping(domain, first_vpfn, first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE); + DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); } static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -4298,7 +4302,7 @@ static int intel_iommu_map(struct iommu_domain *domain, the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); + hpa >> VTD_PAGE_SHIFT, size, prot, gfp); } static int intel_iommu_map_pages(struct iommu_domain *domain, @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC)); if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4392,7 +4397,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC); if (pte && dma_pte_present(pte)) phys = dma_pte_addr(pte) + (iova & (BIT_MASK(level_to_offset_bits(level) +
Flow it down to alloc_pgtable_page() via pfn_to_dma_pte() and __domain_mapping(). Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/iommu/intel/iommu.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-)