Message ID | 20230310-dma_iommu-v9-5-65bb8edd2beb@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | iommu/dma: s390 DMA API conversion and optimized IOTLB flushing | expand |
On Mon, 2023-05-22 at 17:26 +0100, Robin Murphy wrote: > On 2023-05-15 10:15, Niklas Schnelle wrote: > > In some virtualized environments, including s390 paged memory guests, > > IOTLB flushes are used to update IOMMU shadow tables. Due to this, they > > are much more expensive than in typical bare metal environments or > > non-paged s390 guests. In addition they may parallelize more poorly in > > virtualized environments. This changes the trade off for flushing IOVAs > > such that minimizing the number of IOTLB flushes trumps any benefit of > > cheaper queuing operations or increased paralellism. > > > > In this scenario per-CPU flush queues pose several problems. Firstly > > per-CPU memory is often quite limited prohibiting larger queues. > > Secondly collecting IOVAs per-CPU but flushing via a global timeout > > reduces the number of IOVAs flushed for each timeout especially on s390 > > where PCI interrupts may not be bound to a specific CPU. > > > > Let's introduce a single flush queue mode that reuses the same queue > > logic but only allocates a single global queue. This mode can be > > selected as a flag bit in a new dma_iommu_options struct which can be > > modified from its defaults by IOMMU drivers implementing a new > > ops.tune_dma_iommu() callback. As a first user the s390 IOMMU driver > > selects the single queue mode if IOTLB flushes are needed on map which > > indicates shadow table use. With the unchanged small FQ size and > > timeouts this setting is worse than per-CPU queues but a follow up patch > > will make the FQ size and timeout variable. Together this allows the > > common IOVA flushing code to more closely resemble the global flush > > behavior used on s390's previous internal DMA API implementation. > > > > Link: https://lore.kernel.org/linux-iommu/3e402947-61f9-b7e8-1414-fde006257b6f@arm.com/ > > Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> #s390 > > Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> > > --- > > drivers/iommu/dma-iommu.c | 163 ++++++++++++++++++++++++++++++++++----------- > > drivers/iommu/dma-iommu.h | 4 +- > > drivers/iommu/iommu.c | 18 +++-- > > drivers/iommu/s390-iommu.c | 10 +++ > > include/linux/iommu.h | 21 ++++++ > > 5 files changed, 169 insertions(+), 47 deletions(-) > > > > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c > > index 7a9f0b0bddbd..be4cab6b4fe4 100644 > > --- a/drivers/iommu/dma-iommu.c > > +++ b/drivers/iommu/dma-iommu.c > > @@ -49,8 +49,11 @@ struct iommu_dma_cookie { > > /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ > > struct { > > struct iova_domain iovad; > > - > > - struct iova_fq __percpu *fq; /* Flush queue */ > > + /* Flush queue */ > > + union { > > + struct iova_fq *single_fq; > > + struct iova_fq __percpu *percpu_fq; > > + }; > > /* Number of TLB flushes that have been started */ > > atomic64_t fq_flush_start_cnt; > > /* Number of TLB flushes that have been finished */ > > @@ -67,6 +70,8 @@ struct iommu_dma_cookie { > > > > /* Domain for flush queue callback; NULL if flush queue not in use */ > > struct iommu_domain *fq_domain; > > + /* Options for dma-iommu use */ > > + struct dma_iommu_options options; > > struct mutex mutex; > > }; > > > > @@ -152,25 +157,44 @@ static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) > > atomic64_inc(&cookie->fq_flush_finish_cnt); > > } > > > > -static void fq_flush_timeout(struct timer_list *t) > > +static void fq_flush_percpu(struct iommu_dma_cookie *cookie) > > { > > - struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); > > int cpu; > > > > - atomic_set(&cookie->fq_timer_on, 0); > > - fq_flush_iotlb(cookie); > > - > > for_each_possible_cpu(cpu) { > > unsigned long flags; > > struct iova_fq *fq; > > > > - fq = per_cpu_ptr(cookie->fq, cpu); > > + fq = per_cpu_ptr(cookie->percpu_fq, cpu); > > spin_lock_irqsave(&fq->lock, flags); > > fq_ring_free(cookie, fq); > > spin_unlock_irqrestore(&fq->lock, flags); > > } > > } > > > > +static void fq_flush_single(struct iommu_dma_cookie *cookie) > > +{ > > + struct iova_fq *fq = cookie->single_fq; > > + unsigned long flags; > > + > > + spin_lock_irqsave(&fq->lock, flags); > > + fq_ring_free(cookie, fq); > > + spin_unlock_irqrestore(&fq->lock, flags) > > Nit: this should clearly just be a self-locked version of fq_ring_free() > that takes fq as an argument, then both the new case and the existing > loop body become trivial one-line calls. Sure will do. Just one question about names. As an example pci_reset_function_locked() means that the relevant lock is already taken with pci_reset_function() adding the lock/unlock. In your wording the implied function names sound the other way around. I can't find anything similar in drivers/iommu so would you mind going the PCI way and having: fq_ring_free_locked(): Called in queue_iova() with the lock held fr_ring_free(): Called in fq_flush_timeout() takes the lock itself Or maybe I'm just biased because I've used the PCI ..locked() functions before and there is a better convention. > > > +} > > + > > +static void fq_flush_timeout(struct timer_list *t) > > +{ > > + struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); > > + > > + atomic_set(&cookie->fq_timer_on, 0); > > + fq_flush_iotlb(cookie); > > + > > + if (cookie->options.flags & IOMMU_DMA_OPTS_SINGLE_QUEUE) > > + fq_flush_single(cookie); > > + else > > + fq_flush_percpu(cookie); > > +} > > + > > static void queue_iova(struct iommu_dma_cookie *cookie, > > unsigned long pfn, unsigned long pages, > > struct list_head *freelist) > > @@ -188,7 +212,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie, > > */ > > smp_mb(); > > > > - fq = raw_cpu_ptr(cookie->fq); > > + if (cookie->options.flags & IOMMU_DMA_OPTS_SINGLE_QUEUE) > > + fq = cookie->single_fq; > > + else > > + fq = raw_cpu_ptr(cookie->percpu_fq); > > + > > spin_lock_irqsave(&fq->lock, flags); > > > > /* > > @@ -219,58 +247,114 @@ static void queue_iova(struct iommu_dma_cookie *cookie, > > jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); > > } > > > > -static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) > > +static void iommu_dma_free_fq_single(struct iova_fq *fq) > > +{ > > + int idx; > > + > > + if (!fq) > > + return; > > + fq_ring_for_each(idx, fq) > > + put_pages_list(&fq->entries[idx].freelist); > > + vfree(fq); > > +} > > + > > +static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq) > > { > > int cpu, idx; > > > > - if (!cookie->fq) > > - return; > > - > > - del_timer_sync(&cookie->fq_timer); > > /* The IOVAs will be torn down separately, so just free our queued pages */ > > for_each_possible_cpu(cpu) { > > - struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); > > + struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu); > > > > fq_ring_for_each(idx, fq) > > put_pages_list(&fq->entries[idx].freelist); > > } > > > > - free_percpu(cookie->fq); > > + free_percpu(percpu_fq); > > +} > > + > > +static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) > > +{ > > + if (!cookie->fq_domain) > > + return; > > + > > + del_timer_sync(&cookie->fq_timer); > > + if (cookie->options.flags & IOMMU_DMA_OPTS_SINGLE_QUEUE) > > + iommu_dma_free_fq_single(cookie->single_fq); > > + else > > + iommu_dma_free_fq_percpu(cookie->percpu_fq); > > +} > > + > > + > > +static void iommu_dma_init_one_fq(struct iova_fq *fq) > > +{ > > + int i; > > + > > + fq->head = 0; > > + fq->tail = 0; > > + > > + spin_lock_init(&fq->lock); > > + > > + for (i = 0; i < IOVA_FQ_SIZE; i++) > > + INIT_LIST_HEAD(&fq->entries[i].freelist); > > +} > > + > > +static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie) > > +{ > > + struct iova_fq *queue; > > + > > + queue = vzalloc(sizeof(*queue)); > > + if (!queue) > > + return -ENOMEM; > > + iommu_dma_init_one_fq(queue); > > + cookie->single_fq = queue; > > + > > + return 0; > > +} > > + > > +static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie) > > +{ > > + struct iova_fq __percpu *queue; > > + int cpu; > > + > > + queue = alloc_percpu(struct iova_fq); > > + if (!queue) > > + return -ENOMEM; > > + > > + for_each_possible_cpu(cpu) > > + iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu)); > > + cookie->percpu_fq = queue; > > + return 0; > > } > > > > /* sysfs updates are serialised by the mutex of the group owning @domain */ > > -int iommu_dma_init_fq(struct iommu_domain *domain) > > +int iommu_dma_init_fq(struct device *dev, struct iommu_domain *domain) > > { > > struct iommu_dma_cookie *cookie = domain->iova_cookie; > > - struct iova_fq __percpu *queue; > > - int i, cpu; > > + const struct iommu_ops *ops = dev_iommu_ops(dev); > > + int rc; > > > > if (cookie->fq_domain) > > return 0; > > > > + if (ops->tune_dma_iommu) > > + ops->tune_dma_iommu(dev, &cookie->options); > > + > > atomic64_set(&cookie->fq_flush_start_cnt, 0); > > atomic64_set(&cookie->fq_flush_finish_cnt, 0); > > > > - queue = alloc_percpu(struct iova_fq); > > - if (!queue) { > > + if (cookie->options.flags & IOMMU_DMA_OPTS_SINGLE_QUEUE) > > + rc = iommu_dma_init_fq_single(cookie); > > + else > > + rc = iommu_dma_init_fq_percpu(cookie); > > + > > + if (rc) { > > pr_warn("iova flush queue initialization failed\n"); > > - return -ENOMEM; > > + /* fall back to strict mode */ > > + domain->type = IOMMU_DOMAIN_DMA; > > Why move this? It doesn't logically belong to FQ initialisation itself. Ah yes this is not needed anymore. Previously when I had a new domain type I think I needed to set domain->type in here and moved the fallback for consistency. Will remove that change. > > > + return rc; > > } > > > > - for_each_possible_cpu(cpu) { > > - struct iova_fq *fq = per_cpu_ptr(queue, cpu); > > - > > - fq->head = 0; > > - fq->tail = 0; > > - > > - spin_lock_init(&fq->lock); > > - > > - for (i = 0; i < IOVA_FQ_SIZE; i++) > > - INIT_LIST_HEAD(&fq->entries[i].freelist); > > - } > > - > > - cookie->fq = queue; > > - > > timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); > > atomic_set(&cookie->fq_timer_on, 0); > > /* > > ---8<--- > > static struct iommu_device *s390_iommu_probe_device(struct device *dev) > > { > > struct zpci_dev *zdev; > > @@ -793,6 +802,7 @@ static const struct iommu_ops s390_iommu_ops = { > > .device_group = generic_device_group, > > .pgsize_bitmap = SZ_4K, > > .get_resv_regions = s390_iommu_get_resv_regions, > > + .tune_dma_iommu = s390_iommu_tune_dma_iommu, > > .default_domain_ops = &(const struct iommu_domain_ops) { > > .attach_dev = s390_iommu_attach_device, > > .map_pages = s390_iommu_map_pages, > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h > > index 58891eddc2c4..3649a17256a5 100644 > > --- a/include/linux/iommu.h > > +++ b/include/linux/iommu.h > > @@ -219,6 +219,21 @@ struct iommu_iotlb_gather { > > bool queued; > > }; > > > > +/** > > + * struct dma_iommu_options - Options for dma-iommu > > + * > > + * @flags: Flag bits for enabling/disabling dma-iommu settings > > + * > > + * This structure is intended to provide IOMMU drivers a way to influence the > > + * behavior of the dma-iommu DMA API implementation. This allows optimizing for > > + * example for a virtualized environment with slow IOTLB flushes. > > + */ > > +struct dma_iommu_options { > > +#define IOMMU_DMA_OPTS_PER_CPU_QUEUE (0L << 0) > > +#define IOMMU_DMA_OPTS_SINGLE_QUEUE (1L << 0) > > + u64 flags; > > +}; > > I think for now this can just use a bit in dev_iommu to indicate that > the device will prefer a global flush queue; s390 can set that in > .probe_device, then iommu_dma_init_domain() can propagate it to an > equivalent flag in the cookie (possibly even a new cookie type?) that > iommu_dma_init_fq() can then consume. Then just make the s390 parameters > from patch #6 the standard parameters for a global queue. > > Thanks, > Robin. Sounds good. > > > + > > /** > > * struct iommu_ops - iommu ops and capabilities > > * @capable: check capability > > @@ -242,6 +257,9 @@ struct iommu_iotlb_gather { > > * - IOMMU_DOMAIN_IDENTITY: must use an identity domain > > * - IOMMU_DOMAIN_DMA: must use a dma domain > > * - 0: use the default setting > > + * @tune_dma_iommu: Allows the IOMMU driver to modify the default > > + * options of the dma-iommu layer for a specific > > + * device. > > * @default_domain_ops: the default ops for domains > > * @remove_dev_pasid: Remove any translation configurations of a specific > > * pasid, so that any DMA transactions with this pasid > > @@ -278,6 +296,9 @@ struct iommu_ops { > > int (*def_domain_type)(struct device *dev); > > void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); > > > > + void (*tune_dma_iommu)(struct device *dev, > > + struct dma_iommu_options *options); > > + > > const struct iommu_domain_ops *default_domain_ops; > > unsigned long pgsize_bitmap; > > struct module *owner; > > >
On 2023-05-23 13:02, Niklas Schnelle wrote: [...] >>> +static void fq_flush_single(struct iommu_dma_cookie *cookie) >>> +{ >>> + struct iova_fq *fq = cookie->single_fq; >>> + unsigned long flags; >>> + >>> + spin_lock_irqsave(&fq->lock, flags); >>> + fq_ring_free(cookie, fq); >>> + spin_unlock_irqrestore(&fq->lock, flags) >> >> Nit: this should clearly just be a self-locked version of fq_ring_free() >> that takes fq as an argument, then both the new case and the existing >> loop body become trivial one-line calls. > > Sure will do. Just one question about names. As an example > pci_reset_function_locked() means that the relevant lock is already > taken with pci_reset_function() adding the lock/unlock. In your wording > the implied function names sound the other way around. I can't find > anything similar in drivers/iommu so would you mind going the PCI way > and having: > > fq_ring_free_locked(): Called in queue_iova() with the lock held > fr_ring_free(): Called in fq_flush_timeout() takes the lock itself > > Or maybe I'm just biased because I've used the PCI ..locked() functions > before and there is a better convention. Yes, that's the form that's most familiar to me too - sorry I failed to express it clearly :) Thanks, Robin.
On Mon, 2023-05-22 at 17:26 +0100, Robin Murphy wrote: > On 2023-05-15 10:15, Niklas Schnelle wrote: > > In some virtualized environments, including s390 paged memory guests, > > IOTLB flushes are used to update IOMMU shadow tables. Due to this, they > > are much more expensive than in typical bare metal environments or > > non-paged s390 guests. In addition they may parallelize more poorly in > > virtualized environments. This changes the trade off for flushing IOVAs > > such that minimizing the number of IOTLB flushes trumps any benefit of > > cheaper queuing operations or increased paralellism. > > > > In this scenario per-CPU flush queues pose several problems. Firstly > > per-CPU memory is often quite limited prohibiting larger queues. > > Secondly collecting IOVAs per-CPU but flushing via a global timeout > > reduces the number of IOVAs flushed for each timeout especially on s390 > > where PCI interrupts may not be bound to a specific CPU. > > > > Let's introduce a single flush queue mode that reuses the same queue > > logic but only allocates a single global queue. This mode can be > > selected as a flag bit in a new dma_iommu_options struct which can be > > modified from its defaults by IOMMU drivers implementing a new > > ops.tune_dma_iommu() callback. As a first user the s390 IOMMU driver > > selects the single queue mode if IOTLB flushes are needed on map which > > indicates shadow table use. With the unchanged small FQ size and > > timeouts this setting is worse than per-CPU queues but a follow up patch > > will make the FQ size and timeout variable. Together this allows the > > common IOVA flushing code to more closely resemble the global flush > > behavior used on s390's previous internal DMA API implementation. > > > > Link: https://lore.kernel.org/linux-iommu/3e402947-61f9-b7e8-1414-fde006257b6f@arm.com/ > > Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> #s390 > > Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> > > --- > > drivers/iommu/dma-iommu.c | 163 ++++++++++++++++++++++++++++++++++----------- > > drivers/iommu/dma-iommu.h | 4 +- > > drivers/iommu/iommu.c | 18 +++-- > > drivers/iommu/s390-iommu.c | 10 +++ > > include/linux/iommu.h | 21 ++++++ > > 5 files changed, 169 insertions(+), 47 deletions(-) > > ---8<--- > > > > +/** > > + * struct dma_iommu_options - Options for dma-iommu > > + * > > + * @flags: Flag bits for enabling/disabling dma-iommu settings > > + * > > + * This structure is intended to provide IOMMU drivers a way to influence the > > + * behavior of the dma-iommu DMA API implementation. This allows optimizing for > > + * example for a virtualized environment with slow IOTLB flushes. > > + */ > > +struct dma_iommu_options { > > +#define IOMMU_DMA_OPTS_PER_CPU_QUEUE (0L << 0) > > +#define IOMMU_DMA_OPTS_SINGLE_QUEUE (1L << 0) > > + u64 flags; > > +}; > > I think for now this can just use a bit in dev_iommu to indicate that > the device will prefer a global flush queue; s390 can set that in > .probe_device, then iommu_dma_init_domain() can propagate it to an > equivalent flag in the cookie (possibly even a new cookie type?) that > iommu_dma_init_fq() can then consume. Then just make the s390 parameters > from patch #6 the standard parameters for a global queue. > > Thanks, > Robin. Working on this now. How about I move the struct dma_iommu_options definition into dma-iommu.c keeping it as part of struct iommu_dma_cookie. That way we can still have the flags, timeout and queue size organized the same but internal to dma-iommu.c. We then set them in iommu_dma_init_domain() triggered by a "shadow_on_flush" flag in struct dev_iommu. That way we can keep most of the same code but only add a single flag as external interface. The flag would also be an explicit fact about a distinctly IOMMU device thing just stating that the IOTLB flushes do extra shadowing work. This leaves the decision to then use a longer timeout and queue size within the responsibility of dma-iommu.c. I think that's overall a better match of responsibilities. Thanks, Niklas
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7a9f0b0bddbd..be4cab6b4fe4 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -49,8 +49,11 @@ struct iommu_dma_cookie { /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ struct { struct iova_domain iovad; - - struct iova_fq __percpu *fq; /* Flush queue */ + /* Flush queue */ + union { + struct iova_fq *single_fq; + struct iova_fq __percpu *percpu_fq; + }; /* Number of TLB flushes that have been started */ atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that have been finished */ @@ -67,6 +70,8 @@ struct iommu_dma_cookie { /* Domain for flush queue callback; NULL if flush queue not in use */ struct iommu_domain *fq_domain; + /* Options for dma-iommu use */ + struct dma_iommu_options options; struct mutex mutex; }; @@ -152,25 +157,44 @@ static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) atomic64_inc(&cookie->fq_flush_finish_cnt); } -static void fq_flush_timeout(struct timer_list *t) +static void fq_flush_percpu(struct iommu_dma_cookie *cookie) { - struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); int cpu; - atomic_set(&cookie->fq_timer_on, 0); - fq_flush_iotlb(cookie); - for_each_possible_cpu(cpu) { unsigned long flags; struct iova_fq *fq; - fq = per_cpu_ptr(cookie->fq, cpu); + fq = per_cpu_ptr(cookie->percpu_fq, cpu); spin_lock_irqsave(&fq->lock, flags); fq_ring_free(cookie, fq); spin_unlock_irqrestore(&fq->lock, flags); } } +static void fq_flush_single(struct iommu_dma_cookie *cookie) +{ + struct iova_fq *fq = cookie->single_fq; + unsigned long flags; + + spin_lock_irqsave(&fq->lock, flags); + fq_ring_free(cookie, fq); + spin_unlock_irqrestore(&fq->lock, flags); +} + +static void fq_flush_timeout(struct timer_list *t) +{ + struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); + + atomic_set(&cookie->fq_timer_on, 0); + fq_flush_iotlb(cookie); + + if (cookie->options.flags & IOMMU_DMA_OPTS_SINGLE_QUEUE) + fq_flush_single(cookie); + else + fq_flush_percpu(cookie); +} + static void queue_iova(struct iommu_dma_cookie *cookie, unsigned long pfn, unsigned long pages, struct list_head *freelist) @@ -188,7 +212,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie, */ smp_mb(); - fq = raw_cpu_ptr(cookie->fq); + if (cookie->options.flags & IOMMU_DMA_OPTS_SINGLE_QUEUE) + fq = cookie->single_fq; + else + fq = raw_cpu_ptr(cookie->percpu_fq); + spin_lock_irqsave(&fq->lock, flags); /* @@ -219,58 +247,114 @@ static void queue_iova(struct iommu_dma_cookie *cookie, jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); } -static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +static void iommu_dma_free_fq_single(struct iova_fq *fq) +{ + int idx; + + if (!fq) + return; + fq_ring_for_each(idx, fq) + put_pages_list(&fq->entries[idx].freelist); + vfree(fq); +} + +static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq) { int cpu, idx; - if (!cookie->fq) - return; - - del_timer_sync(&cookie->fq_timer); /* The IOVAs will be torn down separately, so just free our queued pages */ for_each_possible_cpu(cpu) { - struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); + struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu); fq_ring_for_each(idx, fq) put_pages_list(&fq->entries[idx].freelist); } - free_percpu(cookie->fq); + free_percpu(percpu_fq); +} + +static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +{ + if (!cookie->fq_domain) + return; + + del_timer_sync(&cookie->fq_timer); + if (cookie->options.flags & IOMMU_DMA_OPTS_SINGLE_QUEUE) + iommu_dma_free_fq_single(cookie->single_fq); + else + iommu_dma_free_fq_percpu(cookie->percpu_fq); +} + + +static void iommu_dma_init_one_fq(struct iova_fq *fq) +{ + int i; + + fq->head = 0; + fq->tail = 0; + + spin_lock_init(&fq->lock); + + for (i = 0; i < IOVA_FQ_SIZE; i++) + INIT_LIST_HEAD(&fq->entries[i].freelist); +} + +static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie) +{ + struct iova_fq *queue; + + queue = vzalloc(sizeof(*queue)); + if (!queue) + return -ENOMEM; + iommu_dma_init_one_fq(queue); + cookie->single_fq = queue; + + return 0; +} + +static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie) +{ + struct iova_fq __percpu *queue; + int cpu; + + queue = alloc_percpu(struct iova_fq); + if (!queue) + return -ENOMEM; + + for_each_possible_cpu(cpu) + iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu)); + cookie->percpu_fq = queue; + return 0; } /* sysfs updates are serialised by the mutex of the group owning @domain */ -int iommu_dma_init_fq(struct iommu_domain *domain) +int iommu_dma_init_fq(struct device *dev, struct iommu_domain *domain) { struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_fq __percpu *queue; - int i, cpu; + const struct iommu_ops *ops = dev_iommu_ops(dev); + int rc; if (cookie->fq_domain) return 0; + if (ops->tune_dma_iommu) + ops->tune_dma_iommu(dev, &cookie->options); + atomic64_set(&cookie->fq_flush_start_cnt, 0); atomic64_set(&cookie->fq_flush_finish_cnt, 0); - queue = alloc_percpu(struct iova_fq); - if (!queue) { + if (cookie->options.flags & IOMMU_DMA_OPTS_SINGLE_QUEUE) + rc = iommu_dma_init_fq_single(cookie); + else + rc = iommu_dma_init_fq_percpu(cookie); + + if (rc) { pr_warn("iova flush queue initialization failed\n"); - return -ENOMEM; + /* fall back to strict mode */ + domain->type = IOMMU_DOMAIN_DMA; + return rc; } - for_each_possible_cpu(cpu) { - struct iova_fq *fq = per_cpu_ptr(queue, cpu); - - fq->head = 0; - fq->tail = 0; - - spin_lock_init(&fq->lock); - - for (i = 0; i < IOVA_FQ_SIZE; i++) - INIT_LIST_HEAD(&fq->entries[i].freelist); - } - - cookie->fq = queue; - timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); atomic_set(&cookie->fq_timer_on, 0); /* @@ -297,6 +381,7 @@ static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) if (cookie) { INIT_LIST_HEAD(&cookie->msi_page_list); cookie->type = type; + cookie->options.flags = IOMMU_DMA_OPTS_PER_CPU_QUEUE; } return cookie; } @@ -585,9 +670,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, if (ret) goto done_unlock; - /* If the FQ fails we can simply fall back to strict mode */ - if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain)) - domain->type = IOMMU_DOMAIN_DMA; + /* If the FQ fails we fall back to strict mode */ + if (domain->type == IOMMU_DOMAIN_DMA_FQ) + iommu_dma_init_fq(dev, domain); ret = iova_reserve_iommu_regions(dev, domain); diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index 942790009292..4f727ab56d3c 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -12,7 +12,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain); void iommu_put_dma_cookie(struct iommu_domain *domain); -int iommu_dma_init_fq(struct iommu_domain *domain); +int iommu_dma_init_fq(struct device *dev, struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); @@ -20,7 +20,7 @@ extern bool iommu_dma_forcedac; #else /* CONFIG_IOMMU_DMA */ -static inline int iommu_dma_init_fq(struct iommu_domain *domain) +static inline int iommu_dma_init_fq(struct device *dev, struct iommu_domain *domain) { return -EINVAL; } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 51f816367205..e2334ca480dd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2967,10 +2967,19 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, return -EINVAL; mutex_lock(&group->mutex); + /* Ensure that device exists. */ + if (list_empty(&group->devices)) { + mutex_unlock(&group->mutex); + return -EPERM; + } + + grp_dev = list_first_entry(&group->devices, struct group_device, list); + dev = grp_dev->dev; + /* We can bring up a flush queue without tearing down the domain. */ if (req_type == IOMMU_DOMAIN_DMA_FQ && group->default_domain->type == IOMMU_DOMAIN_DMA) { - ret = iommu_dma_init_fq(group->default_domain); + ret = iommu_dma_init_fq(dev, group->default_domain); if (!ret) group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; mutex_unlock(&group->mutex); @@ -2978,15 +2987,12 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, return ret ?: count; } - /* Otherwise, ensure that device exists and no driver is bound. */ - if (list_empty(&group->devices) || group->owner_cnt) { + /* Otherwise, ensure that no driver is bound. */ + if (group->owner_cnt) { mutex_unlock(&group->mutex); return -EPERM; } - grp_dev = list_first_entry(&group->devices, struct group_device, list); - dev = grp_dev->dev; - ret = iommu_change_dev_def_domain(group, dev, req_type); /* diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 161b0be5aba6..65dd469ad524 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -451,6 +451,15 @@ static void s390_iommu_get_resv_regions(struct device *dev, } } +static void s390_iommu_tune_dma_iommu(struct device *dev, + struct dma_iommu_options *options) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + + if (zdev->tlb_refresh) + options->flags |= IOMMU_DMA_OPTS_SINGLE_QUEUE; +} + static struct iommu_device *s390_iommu_probe_device(struct device *dev) { struct zpci_dev *zdev; @@ -793,6 +802,7 @@ static const struct iommu_ops s390_iommu_ops = { .device_group = generic_device_group, .pgsize_bitmap = SZ_4K, .get_resv_regions = s390_iommu_get_resv_regions, + .tune_dma_iommu = s390_iommu_tune_dma_iommu, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, .map_pages = s390_iommu_map_pages, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 58891eddc2c4..3649a17256a5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -219,6 +219,21 @@ struct iommu_iotlb_gather { bool queued; }; +/** + * struct dma_iommu_options - Options for dma-iommu + * + * @flags: Flag bits for enabling/disabling dma-iommu settings + * + * This structure is intended to provide IOMMU drivers a way to influence the + * behavior of the dma-iommu DMA API implementation. This allows optimizing for + * example for a virtualized environment with slow IOTLB flushes. + */ +struct dma_iommu_options { +#define IOMMU_DMA_OPTS_PER_CPU_QUEUE (0L << 0) +#define IOMMU_DMA_OPTS_SINGLE_QUEUE (1L << 0) + u64 flags; +}; + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability @@ -242,6 +257,9 @@ struct iommu_iotlb_gather { * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting + * @tune_dma_iommu: Allows the IOMMU driver to modify the default + * options of the dma-iommu layer for a specific + * device. * @default_domain_ops: the default ops for domains * @remove_dev_pasid: Remove any translation configurations of a specific * pasid, so that any DMA transactions with this pasid @@ -278,6 +296,9 @@ struct iommu_ops { int (*def_domain_type)(struct device *dev); void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); + void (*tune_dma_iommu)(struct device *dev, + struct dma_iommu_options *options); + const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner;