diff mbox

[RFC,v3,15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

Message ID 1455264797-2334-16-git-send-email-eric.auger@linaro.org
State New
Headers show

Commit Message

Auger Eric Feb. 12, 2016, 8:13 a.m. UTC
In case the msi_desc references a device attached to an iommu
domain, the msi address needs to be mapped in the IOMMU. Else any
MSI write transaction will cause a fault.

gic_set_msi_addr detects that case and allocates an iova bound
to the physical address page comprising the MSI frame. This iova
then is used as the msi_msg address. Unset operation decrements the
reference on the binding.

The functions are called in the irq_write_msi_msg ops implementation.
At that time we can recognize whether the msi is setup or teared down
looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
the fields.

Signed-off-by: Eric Auger <eric.auger@linaro.org>


---

v2 -> v3:
- protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
  CONFIG_PHYS_ADDR_T_64BIT
- only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
  CONFIG_PCI_MSI_IRQ_DOMAIN are set.
- gic_set/unset_msi_addr duly become static
---
 drivers/irqchip/irq-gic-common.c         | 69 ++++++++++++++++++++++++++++++++
 drivers/irqchip/irq-gic-common.h         |  5 +++
 drivers/irqchip/irq-gic-v2m.c            |  7 +++-
 drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
 4 files changed, 85 insertions(+), 1 deletion(-)

-- 
1.9.1

Comments

Auger Eric Feb. 18, 2016, 3:33 p.m. UTC | #1
Hi Marc,
On 02/18/2016 12:33 PM, Marc Zyngier wrote:
> On Fri, 12 Feb 2016 08:13:17 +0000

> Eric Auger <eric.auger@linaro.org> wrote:

> 

>> In case the msi_desc references a device attached to an iommu

>> domain, the msi address needs to be mapped in the IOMMU. Else any

>> MSI write transaction will cause a fault.

>>

>> gic_set_msi_addr detects that case and allocates an iova bound

>> to the physical address page comprising the MSI frame. This iova

>> then is used as the msi_msg address. Unset operation decrements the

>> reference on the binding.

>>

>> The functions are called in the irq_write_msi_msg ops implementation.

>> At that time we can recognize whether the msi is setup or teared down

>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all

>> the fields.

>>

>> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>>

>> ---

>>

>> v2 -> v3:

>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and

>>   CONFIG_PHYS_ADDR_T_64BIT

>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &

>>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.

>> - gic_set/unset_msi_addr duly become static

>> ---

>>  drivers/irqchip/irq-gic-common.c         | 69 ++++++++++++++++++++++++++++++++

>>  drivers/irqchip/irq-gic-common.h         |  5 +++

>>  drivers/irqchip/irq-gic-v2m.c            |  7 +++-

>>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++

>>  4 files changed, 85 insertions(+), 1 deletion(-)

>>

>> diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c

>> index f174ce0..46cd06c 100644

>> --- a/drivers/irqchip/irq-gic-common.c

>> +++ b/drivers/irqchip/irq-gic-common.c

>> @@ -18,6 +18,8 @@

>>  #include <linux/io.h>

>>  #include <linux/irq.h>

>>  #include <linux/irqchip/arm-gic.h>

>> +#include <linux/iommu.h>

>> +#include <linux/msi.h>

>>  

>>  #include "irq-gic-common.h"

>>  

>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))

>>  	if (sync_access)

>>  		sync_access();

>>  }

>> +

>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)

>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)

>> +{

>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);

>> +	struct device *dev = msi_desc_to_dev(desc);

>> +	struct iommu_domain *d;

>> +	phys_addr_t addr;

>> +	dma_addr_t iova;

>> +	int ret;

>> +

>> +	d = iommu_get_domain_for_dev(dev);

>> +	if (!d)

>> +		return 0;

>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT

>> +	addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;

>> +#else

>> +	addr = msg->address_lo;

>> +#endif

>> +

>> +	ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova);

>> +

>> +	if (!ret) {

>> +		msg->address_lo = lower_32_bits(iova);

>> +		msg->address_hi = upper_32_bits(iova);

>> +	}

>> +	return ret;

>> +}

>> +

>> +

>> +static void gic_unset_msi_addr(struct irq_data *data)

>> +{

>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);

>> +	struct device *dev;

>> +	struct iommu_domain *d;

>> +	dma_addr_t iova;

>> +

>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT

>> +	iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |

>> +		desc->msg.address_lo;

>> +#else

>> +	iova = desc->msg.address_lo;

>> +#endif

>> +

>> +	dev = msi_desc_to_dev(desc);

>> +	if (!dev)

>> +		return;

>> +

>> +	d = iommu_get_domain_for_dev(dev);

>> +	if (!d)

>> +		return;

>> +

>> +	iommu_put_single_reserved(d, iova);

>> +}

>> +

>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,

>> +				  struct msi_msg *msg)

>> +{

>> +	if (!msg->address_hi && !msg->address_lo && !msg->data)

>> +		gic_unset_msi_addr(irq_data); /* deactivate */

>> +	else

>> +		gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */

>> +

>> +	pci_msi_domain_write_msg(irq_data, msg);

>> +}

> 

> So by doing that, you are specializing this infrastructure to PCI.

> If you hijacked irq_compose_msi_msg() instead, you'd have both

> platform and PCI MSI for the same price.

> 

> I can see a potential problem with the teardown of an MSI (I don't

> think the compose method is called on teardown), but I think this could

> be easily addressed.

Yes effectively this is the reason why I moved it from
irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
noticed I had no way to detect the teardown whereas the
msi_domain_deactivate also calls irq_write_msi_msg which is quite
practical ;-) To be honest I need to further look at the non PCI
implementation.


> 

>> +#endif

>> +

>> diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h

>> index fff697d..98681fd 100644

>> --- a/drivers/irqchip/irq-gic-common.h

>> +++ b/drivers/irqchip/irq-gic-common.h

>> @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void));

>>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,

>>  		void *data);

>>  

>> +#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API)

>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,

>> +				  struct msi_msg *msg);

>> +#endif

>> +

>>  #endif /* _IRQ_GIC_COMMON_H */

>> diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c

>> index c779f83..692d809 100644

>> --- a/drivers/irqchip/irq-gic-v2m.c

>> +++ b/drivers/irqchip/irq-gic-v2m.c

>> @@ -24,6 +24,7 @@

>>  #include <linux/of_pci.h>

>>  #include <linux/slab.h>

>>  #include <linux/spinlock.h>

>> +#include "irq-gic-common.h"

>>  

>>  /*

>>  * MSI_TYPER:

>> @@ -83,7 +84,11 @@ static struct irq_chip gicv2m_msi_irq_chip = {

>>  	.irq_mask		= gicv2m_mask_msi_irq,

>>  	.irq_unmask		= gicv2m_unmask_msi_irq,

>>  	.irq_eoi		= irq_chip_eoi_parent,

>> -	.irq_write_msi_msg	= pci_msi_domain_write_msg,

>> +#ifdef CONFIG_IOMMU_API

>> +	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,

>> +#else

>> +	.irq_write_msi_msg      = pci_msi_domain_write_msg,

>> +#endif

> 

> Irrespective of the way you implement the translation procedure, you

> should make this unconditional, and have the #ifdefery in the code that

> implements it.


OK

Thanks

Eric
> 

>>  };

>>  

>>  static struct msi_domain_info gicv2m_msi_domain_info = {

>> diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c

>> index 8223765..690504e 100644

>> --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c

>> +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c

>> @@ -19,6 +19,7 @@

>>  #include <linux/of.h>

>>  #include <linux/of_irq.h>

>>  #include <linux/of_pci.h>

>> +#include "irq-gic-common.h"

>>  

>>  static void its_mask_msi_irq(struct irq_data *d)

>>  {

>> @@ -37,7 +38,11 @@ static struct irq_chip its_msi_irq_chip = {

>>  	.irq_unmask		= its_unmask_msi_irq,

>>  	.irq_mask		= its_mask_msi_irq,

>>  	.irq_eoi		= irq_chip_eoi_parent,

>> +#ifdef CONFIG_IOMMU_API

>> +	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,

>> +#else

>>  	.irq_write_msi_msg	= pci_msi_domain_write_msg,

>> +#endif

>>  };

>>  

>>  struct its_pci_alias {

> 

> Thanks,

> 

> 	M.

>
Auger Eric Feb. 18, 2016, 4:58 p.m. UTC | #2
Hi Marc,
On 02/18/2016 04:47 PM, Marc Zyngier wrote:
> On 18/02/16 15:33, Eric Auger wrote:

>> Hi Marc,

>> On 02/18/2016 12:33 PM, Marc Zyngier wrote:

>>> On Fri, 12 Feb 2016 08:13:17 +0000

>>> Eric Auger <eric.auger@linaro.org> wrote:

>>>

>>>> In case the msi_desc references a device attached to an iommu

>>>> domain, the msi address needs to be mapped in the IOMMU. Else any

>>>> MSI write transaction will cause a fault.

>>>>

>>>> gic_set_msi_addr detects that case and allocates an iova bound

>>>> to the physical address page comprising the MSI frame. This iova

>>>> then is used as the msi_msg address. Unset operation decrements the

>>>> reference on the binding.

>>>>

>>>> The functions are called in the irq_write_msi_msg ops implementation.

>>>> At that time we can recognize whether the msi is setup or teared down

>>>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all

>>>> the fields.

>>>>

>>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>>>>

>>>> ---

>>>>

>>>> v2 -> v3:

>>>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and

>>>>   CONFIG_PHYS_ADDR_T_64BIT

>>>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &

>>>>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.

>>>> - gic_set/unset_msi_addr duly become static

>>>> ---

>>>>  drivers/irqchip/irq-gic-common.c         | 69 ++++++++++++++++++++++++++++++++

>>>>  drivers/irqchip/irq-gic-common.h         |  5 +++

>>>>  drivers/irqchip/irq-gic-v2m.c            |  7 +++-

>>>>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++

>>>>  4 files changed, 85 insertions(+), 1 deletion(-)

>>>>

>>>> diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c

>>>> index f174ce0..46cd06c 100644

>>>> --- a/drivers/irqchip/irq-gic-common.c

>>>> +++ b/drivers/irqchip/irq-gic-common.c

>>>> @@ -18,6 +18,8 @@

>>>>  #include <linux/io.h>

>>>>  #include <linux/irq.h>

>>>>  #include <linux/irqchip/arm-gic.h>

>>>> +#include <linux/iommu.h>

>>>> +#include <linux/msi.h>

>>>>  

>>>>  #include "irq-gic-common.h"

>>>>  

>>>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))

>>>>  	if (sync_access)

>>>>  		sync_access();

>>>>  }

>>>> +

>>>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)

>>>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)

>>>> +{

>>>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);

>>>> +	struct device *dev = msi_desc_to_dev(desc);

>>>> +	struct iommu_domain *d;

>>>> +	phys_addr_t addr;

>>>> +	dma_addr_t iova;

>>>> +	int ret;

>>>> +

>>>> +	d = iommu_get_domain_for_dev(dev);

>>>> +	if (!d)

>>>> +		return 0;

>>>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT

>>>> +	addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;

>>>> +#else

>>>> +	addr = msg->address_lo;

>>>> +#endif

>>>> +

>>>> +	ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova);

>>>> +

>>>> +	if (!ret) {

>>>> +		msg->address_lo = lower_32_bits(iova);

>>>> +		msg->address_hi = upper_32_bits(iova);

>>>> +	}

>>>> +	return ret;

>>>> +}

>>>> +

>>>> +

>>>> +static void gic_unset_msi_addr(struct irq_data *data)

>>>> +{

>>>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);

>>>> +	struct device *dev;

>>>> +	struct iommu_domain *d;

>>>> +	dma_addr_t iova;

>>>> +

>>>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT

>>>> +	iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |

>>>> +		desc->msg.address_lo;

>>>> +#else

>>>> +	iova = desc->msg.address_lo;

>>>> +#endif

>>>> +

>>>> +	dev = msi_desc_to_dev(desc);

>>>> +	if (!dev)

>>>> +		return;

>>>> +

>>>> +	d = iommu_get_domain_for_dev(dev);

>>>> +	if (!d)

>>>> +		return;

>>>> +

>>>> +	iommu_put_single_reserved(d, iova);

>>>> +}

>>>> +

>>>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,

>>>> +				  struct msi_msg *msg)

>>>> +{

>>>> +	if (!msg->address_hi && !msg->address_lo && !msg->data)

>>>> +		gic_unset_msi_addr(irq_data); /* deactivate */

>>>> +	else

>>>> +		gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */

>>>> +

>>>> +	pci_msi_domain_write_msg(irq_data, msg);

>>>> +}

>>>

>>> So by doing that, you are specializing this infrastructure to PCI.

>>> If you hijacked irq_compose_msi_msg() instead, you'd have both

>>> platform and PCI MSI for the same price.

>>>

>>> I can see a potential problem with the teardown of an MSI (I don't

>>> think the compose method is called on teardown), but I think this could

>>> be easily addressed.

>> Yes effectively this is the reason why I moved it from

>> irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I

>> noticed I had no way to detect the teardown whereas the

>> msi_domain_deactivate also calls irq_write_msi_msg which is quite

>> practical ;-) To be honest I need to further look at the non PCI

>> implementation.

> 

> Another thing to consider is that MSI controllers may use different

> doorbells for different CPU affinities.


OK thanks for pointing this out.

This is also a good confirmation that a single IOVA address is not
always sufficient (at some point we wondered if we could directly use
the MSI controller guest PA instead of having the user-space providing
an IOVA pool)

 With your implementation,
> repeatedly changing the affinity from one CPU to another would increase

> the refcounting, and never actually lower it (you don't necessarily go

> via an "unmap").


 Of course, none of that applies to GICv2m/GICv3-ITS,
> but that's worth considering.

> 

> So I think we may need some better tracking of the IOVA we program in

> the device, and offer a generic infrastructure for this instead of

> hiding it in the MSI controller drivers.

OK I will study that.

Thanks for your time!

Eric
> 

> Thanks,

> 

> 	M.

>
diff mbox

Patch

diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index f174ce0..46cd06c 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -18,6 +18,8 @@ 
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/irqchip/arm-gic.h>
+#include <linux/iommu.h>
+#include <linux/msi.h>
 
 #include "irq-gic-common.h"
 
@@ -121,3 +123,70 @@  void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
 	if (sync_access)
 		sync_access();
 }
+
+#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
+static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
+	struct device *dev = msi_desc_to_dev(desc);
+	struct iommu_domain *d;
+	phys_addr_t addr;
+	dma_addr_t iova;
+	int ret;
+
+	d = iommu_get_domain_for_dev(dev);
+	if (!d)
+		return 0;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
+#else
+	addr = msg->address_lo;
+#endif
+
+	ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova);
+
+	if (!ret) {
+		msg->address_lo = lower_32_bits(iova);
+		msg->address_hi = upper_32_bits(iova);
+	}
+	return ret;
+}
+
+
+static void gic_unset_msi_addr(struct irq_data *data)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
+	struct device *dev;
+	struct iommu_domain *d;
+	dma_addr_t iova;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
+		desc->msg.address_lo;
+#else
+	iova = desc->msg.address_lo;
+#endif
+
+	dev = msi_desc_to_dev(desc);
+	if (!dev)
+		return;
+
+	d = iommu_get_domain_for_dev(dev);
+	if (!d)
+		return;
+
+	iommu_put_single_reserved(d, iova);
+}
+
+void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
+				  struct msi_msg *msg)
+{
+	if (!msg->address_hi && !msg->address_lo && !msg->data)
+		gic_unset_msi_addr(irq_data); /* deactivate */
+	else
+		gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
+
+	pci_msi_domain_write_msg(irq_data, msg);
+}
+#endif
+
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index fff697d..98681fd 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -35,4 +35,9 @@  void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
 		void *data);
 
+#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API)
+void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
+				  struct msi_msg *msg);
+#endif
+
 #endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index c779f83..692d809 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -24,6 +24,7 @@ 
 #include <linux/of_pci.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include "irq-gic-common.h"
 
 /*
 * MSI_TYPER:
@@ -83,7 +84,11 @@  static struct irq_chip gicv2m_msi_irq_chip = {
 	.irq_mask		= gicv2m_mask_msi_irq,
 	.irq_unmask		= gicv2m_unmask_msi_irq,
 	.irq_eoi		= irq_chip_eoi_parent,
-	.irq_write_msi_msg	= pci_msi_domain_write_msg,
+#ifdef CONFIG_IOMMU_API
+	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,
+#else
+	.irq_write_msi_msg      = pci_msi_domain_write_msg,
+#endif
 };
 
 static struct msi_domain_info gicv2m_msi_domain_info = {
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index 8223765..690504e 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -19,6 +19,7 @@ 
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
+#include "irq-gic-common.h"
 
 static void its_mask_msi_irq(struct irq_data *d)
 {
@@ -37,7 +38,11 @@  static struct irq_chip its_msi_irq_chip = {
 	.irq_unmask		= its_unmask_msi_irq,
 	.irq_mask		= its_mask_msi_irq,
 	.irq_eoi		= irq_chip_eoi_parent,
+#ifdef CONFIG_IOMMU_API
+	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,
+#else
 	.irq_write_msi_msg	= pci_msi_domain_write_msg,
+#endif
 };
 
 struct its_pci_alias {