Message ID | 1455264797-2334-16-git-send-email-eric.auger@linaro.org |
---|---|
State | New |
Headers | show |
Hi Marc, On 02/18/2016 12:33 PM, Marc Zyngier wrote: > On Fri, 12 Feb 2016 08:13:17 +0000 > Eric Auger <eric.auger@linaro.org> wrote: > >> In case the msi_desc references a device attached to an iommu >> domain, the msi address needs to be mapped in the IOMMU. Else any >> MSI write transaction will cause a fault. >> >> gic_set_msi_addr detects that case and allocates an iova bound >> to the physical address page comprising the MSI frame. This iova >> then is used as the msi_msg address. Unset operation decrements the >> reference on the binding. >> >> The functions are called in the irq_write_msi_msg ops implementation. >> At that time we can recognize whether the msi is setup or teared down >> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all >> the fields. >> >> Signed-off-by: Eric Auger <eric.auger@linaro.org> >> >> --- >> >> v2 -> v3: >> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and >> CONFIG_PHYS_ADDR_T_64BIT >> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API & >> CONFIG_PCI_MSI_IRQ_DOMAIN are set. >> - gic_set/unset_msi_addr duly become static >> --- >> drivers/irqchip/irq-gic-common.c | 69 ++++++++++++++++++++++++++++++++ >> drivers/irqchip/irq-gic-common.h | 5 +++ >> drivers/irqchip/irq-gic-v2m.c | 7 +++- >> drivers/irqchip/irq-gic-v3-its-pci-msi.c | 5 +++ >> 4 files changed, 85 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c >> index f174ce0..46cd06c 100644 >> --- a/drivers/irqchip/irq-gic-common.c >> +++ b/drivers/irqchip/irq-gic-common.c >> @@ -18,6 +18,8 @@ >> #include <linux/io.h> >> #include <linux/irq.h> >> #include <linux/irqchip/arm-gic.h> >> +#include <linux/iommu.h> >> +#include <linux/msi.h> >> >> #include "irq-gic-common.h" >> >> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void)) >> if (sync_access) >> sync_access(); >> } >> + >> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN) >> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg) >> +{ >> + struct msi_desc *desc = irq_data_get_msi_desc(data); >> + struct device *dev = msi_desc_to_dev(desc); >> + struct iommu_domain *d; >> + phys_addr_t addr; >> + dma_addr_t iova; >> + int ret; >> + >> + d = iommu_get_domain_for_dev(dev); >> + if (!d) >> + return 0; >> +#ifdef CONFIG_PHYS_ADDR_T_64BIT >> + addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo; >> +#else >> + addr = msg->address_lo; >> +#endif >> + >> + ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova); >> + >> + if (!ret) { >> + msg->address_lo = lower_32_bits(iova); >> + msg->address_hi = upper_32_bits(iova); >> + } >> + return ret; >> +} >> + >> + >> +static void gic_unset_msi_addr(struct irq_data *data) >> +{ >> + struct msi_desc *desc = irq_data_get_msi_desc(data); >> + struct device *dev; >> + struct iommu_domain *d; >> + dma_addr_t iova; >> + >> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT >> + iova = ((dma_addr_t)(desc->msg.address_hi) << 32) | >> + desc->msg.address_lo; >> +#else >> + iova = desc->msg.address_lo; >> +#endif >> + >> + dev = msi_desc_to_dev(desc); >> + if (!dev) >> + return; >> + >> + d = iommu_get_domain_for_dev(dev); >> + if (!d) >> + return; >> + >> + iommu_put_single_reserved(d, iova); >> +} >> + >> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data, >> + struct msi_msg *msg) >> +{ >> + if (!msg->address_hi && !msg->address_lo && !msg->data) >> + gic_unset_msi_addr(irq_data); /* deactivate */ >> + else >> + gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */ >> + >> + pci_msi_domain_write_msg(irq_data, msg); >> +} > > So by doing that, you are specializing this infrastructure to PCI. > If you hijacked irq_compose_msi_msg() instead, you'd have both > platform and PCI MSI for the same price. > > I can see a potential problem with the teardown of an MSI (I don't > think the compose method is called on teardown), but I think this could > be easily addressed. Yes effectively this is the reason why I moved it from irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I noticed I had no way to detect the teardown whereas the msi_domain_deactivate also calls irq_write_msi_msg which is quite practical ;-) To be honest I need to further look at the non PCI implementation. > >> +#endif >> + >> diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h >> index fff697d..98681fd 100644 >> --- a/drivers/irqchip/irq-gic-common.h >> +++ b/drivers/irqchip/irq-gic-common.h >> @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void)); >> void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, >> void *data); >> >> +#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API) >> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data, >> + struct msi_msg *msg); >> +#endif >> + >> #endif /* _IRQ_GIC_COMMON_H */ >> diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c >> index c779f83..692d809 100644 >> --- a/drivers/irqchip/irq-gic-v2m.c >> +++ b/drivers/irqchip/irq-gic-v2m.c >> @@ -24,6 +24,7 @@ >> #include <linux/of_pci.h> >> #include <linux/slab.h> >> #include <linux/spinlock.h> >> +#include "irq-gic-common.h" >> >> /* >> * MSI_TYPER: >> @@ -83,7 +84,11 @@ static struct irq_chip gicv2m_msi_irq_chip = { >> .irq_mask = gicv2m_mask_msi_irq, >> .irq_unmask = gicv2m_unmask_msi_irq, >> .irq_eoi = irq_chip_eoi_parent, >> - .irq_write_msi_msg = pci_msi_domain_write_msg, >> +#ifdef CONFIG_IOMMU_API >> + .irq_write_msi_msg = gic_pci_msi_domain_write_msg, >> +#else >> + .irq_write_msi_msg = pci_msi_domain_write_msg, >> +#endif > > Irrespective of the way you implement the translation procedure, you > should make this unconditional, and have the #ifdefery in the code that > implements it. OK Thanks Eric > >> }; >> >> static struct msi_domain_info gicv2m_msi_domain_info = { >> diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c >> index 8223765..690504e 100644 >> --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c >> +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c >> @@ -19,6 +19,7 @@ >> #include <linux/of.h> >> #include <linux/of_irq.h> >> #include <linux/of_pci.h> >> +#include "irq-gic-common.h" >> >> static void its_mask_msi_irq(struct irq_data *d) >> { >> @@ -37,7 +38,11 @@ static struct irq_chip its_msi_irq_chip = { >> .irq_unmask = its_unmask_msi_irq, >> .irq_mask = its_mask_msi_irq, >> .irq_eoi = irq_chip_eoi_parent, >> +#ifdef CONFIG_IOMMU_API >> + .irq_write_msi_msg = gic_pci_msi_domain_write_msg, >> +#else >> .irq_write_msi_msg = pci_msi_domain_write_msg, >> +#endif >> }; >> >> struct its_pci_alias { > > Thanks, > > M. >
Hi Marc, On 02/18/2016 04:47 PM, Marc Zyngier wrote: > On 18/02/16 15:33, Eric Auger wrote: >> Hi Marc, >> On 02/18/2016 12:33 PM, Marc Zyngier wrote: >>> On Fri, 12 Feb 2016 08:13:17 +0000 >>> Eric Auger <eric.auger@linaro.org> wrote: >>> >>>> In case the msi_desc references a device attached to an iommu >>>> domain, the msi address needs to be mapped in the IOMMU. Else any >>>> MSI write transaction will cause a fault. >>>> >>>> gic_set_msi_addr detects that case and allocates an iova bound >>>> to the physical address page comprising the MSI frame. This iova >>>> then is used as the msi_msg address. Unset operation decrements the >>>> reference on the binding. >>>> >>>> The functions are called in the irq_write_msi_msg ops implementation. >>>> At that time we can recognize whether the msi is setup or teared down >>>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all >>>> the fields. >>>> >>>> Signed-off-by: Eric Auger <eric.auger@linaro.org> >>>> >>>> --- >>>> >>>> v2 -> v3: >>>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and >>>> CONFIG_PHYS_ADDR_T_64BIT >>>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API & >>>> CONFIG_PCI_MSI_IRQ_DOMAIN are set. >>>> - gic_set/unset_msi_addr duly become static >>>> --- >>>> drivers/irqchip/irq-gic-common.c | 69 ++++++++++++++++++++++++++++++++ >>>> drivers/irqchip/irq-gic-common.h | 5 +++ >>>> drivers/irqchip/irq-gic-v2m.c | 7 +++- >>>> drivers/irqchip/irq-gic-v3-its-pci-msi.c | 5 +++ >>>> 4 files changed, 85 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c >>>> index f174ce0..46cd06c 100644 >>>> --- a/drivers/irqchip/irq-gic-common.c >>>> +++ b/drivers/irqchip/irq-gic-common.c >>>> @@ -18,6 +18,8 @@ >>>> #include <linux/io.h> >>>> #include <linux/irq.h> >>>> #include <linux/irqchip/arm-gic.h> >>>> +#include <linux/iommu.h> >>>> +#include <linux/msi.h> >>>> >>>> #include "irq-gic-common.h" >>>> >>>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void)) >>>> if (sync_access) >>>> sync_access(); >>>> } >>>> + >>>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN) >>>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg) >>>> +{ >>>> + struct msi_desc *desc = irq_data_get_msi_desc(data); >>>> + struct device *dev = msi_desc_to_dev(desc); >>>> + struct iommu_domain *d; >>>> + phys_addr_t addr; >>>> + dma_addr_t iova; >>>> + int ret; >>>> + >>>> + d = iommu_get_domain_for_dev(dev); >>>> + if (!d) >>>> + return 0; >>>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT >>>> + addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo; >>>> +#else >>>> + addr = msg->address_lo; >>>> +#endif >>>> + >>>> + ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova); >>>> + >>>> + if (!ret) { >>>> + msg->address_lo = lower_32_bits(iova); >>>> + msg->address_hi = upper_32_bits(iova); >>>> + } >>>> + return ret; >>>> +} >>>> + >>>> + >>>> +static void gic_unset_msi_addr(struct irq_data *data) >>>> +{ >>>> + struct msi_desc *desc = irq_data_get_msi_desc(data); >>>> + struct device *dev; >>>> + struct iommu_domain *d; >>>> + dma_addr_t iova; >>>> + >>>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT >>>> + iova = ((dma_addr_t)(desc->msg.address_hi) << 32) | >>>> + desc->msg.address_lo; >>>> +#else >>>> + iova = desc->msg.address_lo; >>>> +#endif >>>> + >>>> + dev = msi_desc_to_dev(desc); >>>> + if (!dev) >>>> + return; >>>> + >>>> + d = iommu_get_domain_for_dev(dev); >>>> + if (!d) >>>> + return; >>>> + >>>> + iommu_put_single_reserved(d, iova); >>>> +} >>>> + >>>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data, >>>> + struct msi_msg *msg) >>>> +{ >>>> + if (!msg->address_hi && !msg->address_lo && !msg->data) >>>> + gic_unset_msi_addr(irq_data); /* deactivate */ >>>> + else >>>> + gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */ >>>> + >>>> + pci_msi_domain_write_msg(irq_data, msg); >>>> +} >>> >>> So by doing that, you are specializing this infrastructure to PCI. >>> If you hijacked irq_compose_msi_msg() instead, you'd have both >>> platform and PCI MSI for the same price. >>> >>> I can see a potential problem with the teardown of an MSI (I don't >>> think the compose method is called on teardown), but I think this could >>> be easily addressed. >> Yes effectively this is the reason why I moved it from >> irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I >> noticed I had no way to detect the teardown whereas the >> msi_domain_deactivate also calls irq_write_msi_msg which is quite >> practical ;-) To be honest I need to further look at the non PCI >> implementation. > > Another thing to consider is that MSI controllers may use different > doorbells for different CPU affinities. OK thanks for pointing this out. This is also a good confirmation that a single IOVA address is not always sufficient (at some point we wondered if we could directly use the MSI controller guest PA instead of having the user-space providing an IOVA pool) With your implementation, > repeatedly changing the affinity from one CPU to another would increase > the refcounting, and never actually lower it (you don't necessarily go > via an "unmap"). Of course, none of that applies to GICv2m/GICv3-ITS, > but that's worth considering. > > So I think we may need some better tracking of the IOVA we program in > the device, and offer a generic infrastructure for this instead of > hiding it in the MSI controller drivers. OK I will study that. Thanks for your time! Eric > > Thanks, > > M. >
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index f174ce0..46cd06c 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -18,6 +18,8 @@ #include <linux/io.h> #include <linux/irq.h> #include <linux/irqchip/arm-gic.h> +#include <linux/iommu.h> +#include <linux/msi.h> #include "irq-gic-common.h" @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void)) if (sync_access) sync_access(); } + +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN) +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + struct device *dev = msi_desc_to_dev(desc); + struct iommu_domain *d; + phys_addr_t addr; + dma_addr_t iova; + int ret; + + d = iommu_get_domain_for_dev(dev); + if (!d) + return 0; +#ifdef CONFIG_PHYS_ADDR_T_64BIT + addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo; +#else + addr = msg->address_lo; +#endif + + ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova); + + if (!ret) { + msg->address_lo = lower_32_bits(iova); + msg->address_hi = upper_32_bits(iova); + } + return ret; +} + + +static void gic_unset_msi_addr(struct irq_data *data) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + struct device *dev; + struct iommu_domain *d; + dma_addr_t iova; + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + iova = ((dma_addr_t)(desc->msg.address_hi) << 32) | + desc->msg.address_lo; +#else + iova = desc->msg.address_lo; +#endif + + dev = msi_desc_to_dev(desc); + if (!dev) + return; + + d = iommu_get_domain_for_dev(dev); + if (!d) + return; + + iommu_put_single_reserved(d, iova); +} + +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data, + struct msi_msg *msg) +{ + if (!msg->address_hi && !msg->address_lo && !msg->data) + gic_unset_msi_addr(irq_data); /* deactivate */ + else + gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */ + + pci_msi_domain_write_msg(irq_data, msg); +} +#endif + diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index fff697d..98681fd 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void)); void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void *data); +#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API) +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data, + struct msi_msg *msg); +#endif + #endif /* _IRQ_GIC_COMMON_H */ diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index c779f83..692d809 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -24,6 +24,7 @@ #include <linux/of_pci.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include "irq-gic-common.h" /* * MSI_TYPER: @@ -83,7 +84,11 @@ static struct irq_chip gicv2m_msi_irq_chip = { .irq_mask = gicv2m_mask_msi_irq, .irq_unmask = gicv2m_unmask_msi_irq, .irq_eoi = irq_chip_eoi_parent, - .irq_write_msi_msg = pci_msi_domain_write_msg, +#ifdef CONFIG_IOMMU_API + .irq_write_msi_msg = gic_pci_msi_domain_write_msg, +#else + .irq_write_msi_msg = pci_msi_domain_write_msg, +#endif }; static struct msi_domain_info gicv2m_msi_domain_info = { diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index 8223765..690504e 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -19,6 +19,7 @@ #include <linux/of.h> #include <linux/of_irq.h> #include <linux/of_pci.h> +#include "irq-gic-common.h" static void its_mask_msi_irq(struct irq_data *d) { @@ -37,7 +38,11 @@ static struct irq_chip its_msi_irq_chip = { .irq_unmask = its_unmask_msi_irq, .irq_mask = its_mask_msi_irq, .irq_eoi = irq_chip_eoi_parent, +#ifdef CONFIG_IOMMU_API + .irq_write_msi_msg = gic_pci_msi_domain_write_msg, +#else .irq_write_msi_msg = pci_msi_domain_write_msg, +#endif }; struct its_pci_alias {
In case the msi_desc references a device attached to an iommu domain, the msi address needs to be mapped in the IOMMU. Else any MSI write transaction will cause a fault. gic_set_msi_addr detects that case and allocates an iova bound to the physical address page comprising the MSI frame. This iova then is used as the msi_msg address. Unset operation decrements the reference on the binding. The functions are called in the irq_write_msi_msg ops implementation. At that time we can recognize whether the msi is setup or teared down looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all the fields. Signed-off-by: Eric Auger <eric.auger@linaro.org> --- v2 -> v3: - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and CONFIG_PHYS_ADDR_T_64BIT - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API & CONFIG_PCI_MSI_IRQ_DOMAIN are set. - gic_set/unset_msi_addr duly become static --- drivers/irqchip/irq-gic-common.c | 69 ++++++++++++++++++++++++++++++++ drivers/irqchip/irq-gic-common.h | 5 +++ drivers/irqchip/irq-gic-v2m.c | 7 +++- drivers/irqchip/irq-gic-v3-its-pci-msi.c | 5 +++ 4 files changed, 85 insertions(+), 1 deletion(-) -- 1.9.1