diff mbox

[RFC,v2,7/8] hw: arm: virt: register reserved IOVA region

Message ID 1454086429-4373-8-git-send-email-eric.auger@linaro.org
State New
Headers show

Commit Message

Auger Eric Jan. 29, 2016, 4:53 p.m. UTC
Registers a 16x64kB reserved iova region. Currently this iova
region is used by the host kernel to map host MSI controller frames
(GICv2m, GITS_TRANSLATER). The host kernel needs this iova window
since it cannot program the PCIe device with MSI frame physical
address (as opposed to x86) since the MSI write transactions go
through the IOMMU.

The reserved region is mapped on the platform bus.

Signed-off-by: Eric Auger <eric.auger@linaro.org>


---

RFC v1 -> RFC v2:
- use the platform bus to map the reserved iova region
---
 hw/arm/virt.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

-- 
1.9.1

Comments

Peter Maydell Feb. 16, 2016, 6:21 p.m. UTC | #1
On 29 January 2016 at 16:53, Eric Auger <eric.auger@linaro.org> wrote:
> Registers a 16x64kB reserved iova region. Currently this iova

> region is used by the host kernel to map host MSI controller frames

> (GICv2m, GITS_TRANSLATER). The host kernel needs this iova window

> since it cannot program the PCIe device with MSI frame physical

> address (as opposed to x86) since the MSI write transactions go

> through the IOMMU.

>

> The reserved region is mapped on the platform bus.


I guess that keeps it neatly out of the way of everybody else :-)

> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>

> ---

>

> RFC v1 -> RFC v2:

> - use the platform bus to map the reserved iova region

> ---

>  hw/arm/virt.c | 19 ++++++++++++++-----

>  1 file changed, 14 insertions(+), 5 deletions(-)

>

> diff --git a/hw/arm/virt.c b/hw/arm/virt.c

> index 3839c68..4b2a891 100644

> --- a/hw/arm/virt.c

> +++ b/hw/arm/virt.c

> @@ -805,7 +805,7 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,

>  }

>

>  static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,

> -                        bool use_highmem)

> +                        bool use_highmem, MemoryRegion **reserved_reg)

>  {

>      hwaddr base_mmio = vbi->memmap[VIRT_PCIE_MMIO].base;

>      hwaddr size_mmio = vbi->memmap[VIRT_PCIE_MMIO].size;

> @@ -920,10 +920,16 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,

>      qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);

>      create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);

>

> +    /* initialize the reserved iova region for MSI binding (16 x 64kb) */

> +    *reserved_reg = g_new0(MemoryRegion, 1);

> +    memory_region_init_reserved_iova(*reserved_reg, OBJECT(dev),

> +                                     "reserved-iova",

> +                                     0x100000, &error_fatal);


So the only reason this is here is because we need to have a pointer to
the PCIe controller DeviceState, right? I think it would be better to
make create_pcie() return the DeviceState* instead of NULL. Then you
can either (a) pass the pcie controller pointer into create_platform_bus()
and have that create and map the reserved iova region, or (b) have a
separate function to create the reserved iova region. In any case I
think it fits more naturally with the rest of the platform bus code
rather than in the PCIe controller creation function.

thanks
-- PMM
Auger Eric Feb. 18, 2016, 6:48 p.m. UTC | #2
Hi Peter,
On 02/16/2016 07:21 PM, Peter Maydell wrote:
> On 29 January 2016 at 16:53, Eric Auger <eric.auger@linaro.org> wrote:

>> Registers a 16x64kB reserved iova region. Currently this iova

>> region is used by the host kernel to map host MSI controller frames

>> (GICv2m, GITS_TRANSLATER). The host kernel needs this iova window

>> since it cannot program the PCIe device with MSI frame physical

>> address (as opposed to x86) since the MSI write transactions go

>> through the IOMMU.

>>

>> The reserved region is mapped on the platform bus.

> 

> I guess that keeps it neatly out of the way of everybody else :-)

Yes hopefully. The platform bus has its own MMIO allocation scheme.
> 

>> Signed-off-by: Eric Auger <eric.auger@linaro.org>

>>

>> ---

>>

>> RFC v1 -> RFC v2:

>> - use the platform bus to map the reserved iova region

>> ---

>>  hw/arm/virt.c | 19 ++++++++++++++-----

>>  1 file changed, 14 insertions(+), 5 deletions(-)

>>

>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c

>> index 3839c68..4b2a891 100644

>> --- a/hw/arm/virt.c

>> +++ b/hw/arm/virt.c

>> @@ -805,7 +805,7 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,

>>  }

>>

>>  static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,

>> -                        bool use_highmem)

>> +                        bool use_highmem, MemoryRegion **reserved_reg)

>>  {

>>      hwaddr base_mmio = vbi->memmap[VIRT_PCIE_MMIO].base;

>>      hwaddr size_mmio = vbi->memmap[VIRT_PCIE_MMIO].size;

>> @@ -920,10 +920,16 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,

>>      qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);

>>      create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);

>>

>> +    /* initialize the reserved iova region for MSI binding (16 x 64kb) */

>> +    *reserved_reg = g_new0(MemoryRegion, 1);

>> +    memory_region_init_reserved_iova(*reserved_reg, OBJECT(dev),

>> +                                     "reserved-iova",

>> +                                     0x100000, &error_fatal);

> 

> So the only reason this is here is because we need to have a pointer to

> the PCIe controller DeviceState, right?

yes that's correct. currently the PCIe controller is the object that
tracks the reserved region's ref count. I proceeded that way because the
reserved IOVA provision currently is related to PCIe/MSI functionality.

 I think it would be better to
> make create_pcie() return the DeviceState* instead of NULL. Then you

> can either (a) pass the pcie controller pointer into create_platform_bus()

> and have that create and map the reserved iova region, or (b) have a

> separate function to create the reserved iova region. In any case I

> think it fits more naturally with the rest of the platform bus code

> rather than in the PCIe controller creation function.

OK

Another issue is that we currently book an arbitrary 1MB IOVA window
whatever the needs. We are also thinking about extending the VFIO user
API to return the number of reserved iova pages that are requested and
possibly some alignment constraints. Then it becomes more complex since
VFIO devices are instantiated after the machine creation, all the needs
must be collected and consolidated and eventually the reserved iova
region can be created. So I think that code will end up somewhere in a
machine init done notifier ...

Thanks for the review!

Best Regards

Eric
> 

> thanks

> -- PMM

>
diff mbox

Patch

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3839c68..4b2a891 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -805,7 +805,7 @@  static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
 }
 
 static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
-                        bool use_highmem)
+                        bool use_highmem, MemoryRegion **reserved_reg)
 {
     hwaddr base_mmio = vbi->memmap[VIRT_PCIE_MMIO].base;
     hwaddr size_mmio = vbi->memmap[VIRT_PCIE_MMIO].size;
@@ -920,10 +920,16 @@  static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
     qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
     create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);
 
+    /* initialize the reserved iova region for MSI binding (16 x 64kb) */
+    *reserved_reg = g_new0(MemoryRegion, 1);
+    memory_region_init_reserved_iova(*reserved_reg, OBJECT(dev),
+                                     "reserved-iova",
+                                     0x100000, &error_fatal);
+
     g_free(nodename);
 }
 
-static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
+static PlatformBusDevice *create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
 {
     DeviceState *dev;
     SysBusDevice *s;
@@ -962,6 +968,7 @@  static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
     memory_region_add_subregion(sysmem,
                                 platform_bus_params.platform_bus_base,
                                 sysbus_mmio_get_region(s, 0));
+    return PLATFORM_BUS_DEVICE(dev);
 }
 
 static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
@@ -1015,7 +1022,7 @@  static void machvirt_init(MachineState *machine)
     VirtMachineState *vms = VIRT_MACHINE(machine);
     qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
-    MemoryRegion *secure_sysmem = NULL;
+    MemoryRegion *secure_sysmem = NULL, *reserved_reg;
     int gic_version = vms->gic_version;
     int n, max_cpus;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
@@ -1024,6 +1031,7 @@  static void machvirt_init(MachineState *machine)
     VirtGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
     VirtGuestInfo *guest_info = &guest_info_state->info;
     char **cpustr;
+    PlatformBusDevice *pbus;
 
     if (!cpu_model) {
         cpu_model = "cortex-a15";
@@ -1161,7 +1169,7 @@  static void machvirt_init(MachineState *machine)
 
     create_rtc(vbi, pic);
 
-    create_pcie(vbi, pic, vms->highmem);
+    create_pcie(vbi, pic, vms->highmem, &reserved_reg);
 
     create_gpio(vbi, pic);
 
@@ -1200,7 +1208,8 @@  static void machvirt_init(MachineState *machine)
      * another notifier is registered which adds platform bus nodes.
      * Notifiers are executed in registration reverse order.
      */
-    create_platform_bus(vbi, pic);
+    pbus = create_platform_bus(vbi, pic);
+    platform_bus_map_region(pbus, reserved_reg);
 }
 
 static bool virt_get_secure(Object *obj, Error **errp)