diff mbox

[RFC,v2,5/6] virt: Assign a VFIO platform device to a virt VM in QEMU command line

Message ID 1397057589-11779-6-git-send-email-eric.auger@linaro.org
State New
Headers show

Commit Message

Auger Eric April 9, 2014, 3:33 p.m. UTC
This patch aims at allowing the end-user to specify the device he
wants to directly assign to his virt VM in the QEMU command line.
The QEMU platform device becomes generic.

Current choice is to reuse the "-device" option.

For example when assigning Calxeda Midway xgmac device this option is
used:
-device vfio-platform,vfio_device="fff51000.ethernet",\
compat="calxeda/hb-xgmac",mmap-timeout-ms=1000

where
- fff51000.ethernet is the name of the device in
  /sys/bus/platform/devices/
- calxeda/hb-xgma is the compatibility where the standard coma
  separator is replaced by "/" since coma is specifically used by QEMU
  command line parser
- mmap-timeout-ms is minimal amount of time (ms) during which the IP
  register space stays MMIO mapped after an IRQ triggers in order to
  trap the end of interrupt (EOI). This is an optional parameter
  (default value set to 1100 ms).

virt machine was modified to interpret this line and automatically
- map the device at a chosen guest physical address in
  [0xa004000, 0x10000000],
- map the device IRQs after 48,
- create the associated guest device tree with the provided
  compatibility.

The "-device" option underlying implementation is not standard
which can be argued. Indeed normaly it induces the call to the QEMU
device realize function once after the virtual machine init execution.
In this case QDEV mappings and device tree creation must happen.
Since virt is the place where the whole memory and IRQ mapping is
known and device tree is created, it was chosen to interpret the option
line there. This means the realize function is called twice, once in
virt.c and once after machine init return. The second call returns
immediatly since the QEMU device is recognized as already existing.
Another way to implement this would be to create a new option in QEMU.

Acknowledgements:
- a single compatibility currently is supported
- IRQ properties set in the device tree should be refined
- More generally devices with more complex device tree nodes must be
  studied and are not currently handled
- cases where multiple VFIO devices are assigned could not be tested

Signed-off-by: Eric Auger <eric.auger@linaro.org>
---
 hw/arm/virt.c      | 178 +++++++++++++++++++++++++++++++++++++++++++----------
 hw/vfio/platform.c |  43 ++++++++++---
 2 files changed, 181 insertions(+), 40 deletions(-)
diff mbox

Patch

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 31ae7d2..1fb66ef 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -40,6 +40,17 @@ 
 #include "exec/address-spaces.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
+#include "monitor/qdev.h"
+#include "qemu/config-file.h"
+
+/*
+ * this function is implemented in vfio/platform.c
+ * it returns the name, compatibility, IRQ number and register set size.
+ * the function only is implemented for VFIO platform devices
+ */
+void vfio_get_props(SysBusDevice *s, char **pname, char **pcompat,
+                    int *pnum_irqs, size_t *psize);
+
 
 #define NUM_VIRTIO_TRANSPORTS 32
 
@@ -65,7 +76,7 @@  enum {
     VIRT_GIC_CPU,
     VIRT_UART,
     VIRT_MMIO,
-    VIRT_ETHERNET,
+    VIRT_VFIO,
 };
 
 typedef struct MemMapEntry {
@@ -79,7 +90,10 @@  typedef struct VirtBoardInfo {
     const char *qdevname;
     const char *gic_compatible;
     const MemMapEntry *memmap;
+    qemu_irq pic[NUM_IRQS];
     const int *irqmap;
+    hwaddr avail_vfio_base;
+    int avail_vfio_irq;
     int smp_cpus;
     void *fdt;
     int fdt_size;
@@ -105,16 +119,16 @@  static const MemMapEntry a15memmap[] = {
     [VIRT_GIC_CPU] = { 0x8002000, 0x1000 },
     [VIRT_UART] = { 0x9000000, 0x1000 },
     [VIRT_MMIO] = { 0xa000000, 0x200 },
+    [VIRT_VFIO] = { 0xa004000, 0x0 }, /* size is dynamically populated */
     /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
     /* 0x10000000 .. 0x40000000 reserved for PCI */
-    [VIRT_MEM] = { 0x40000000, 1ULL * 1024 * 1024 * 1024 },
-    [VIRT_ETHERNET] = { 0xfff41000, 0x1000 },
+    [VIRT_MEM] = { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
 };
 
 static const int a15irqmap[] = {
     [VIRT_UART] = 1,
     [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
-    [VIRT_ETHERNET] = 77,
+    [VIRT_VFIO] = 48,
 };
 
 static VirtBoardInfo machines[] = {
@@ -266,7 +280,7 @@  static void fdt_add_gic_node(const VirtBoardInfo *vbi)
     qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
 }
 
-static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_uart(const VirtBoardInfo *vbi)
 {
     char *nodename;
     hwaddr base = vbi->memmap[VIRT_UART].base;
@@ -275,7 +289,7 @@  static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
     const char compat[] = "arm,pl011\0arm,primecell";
     const char clocknames[] = "uartclk\0apb_pclk";
 
-    sysbus_create_simple("pl011", base, pic[irq]);
+    sysbus_create_simple("pl011", base, vbi->pic[irq]);
 
     nodename = g_strdup_printf("/pl011@%" PRIx64, base);
     qemu_fdt_add_subnode(vbi->fdt, nodename);
@@ -294,34 +308,133 @@  static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
     g_free(nodename);
 }
 
-static void create_ethernet(const VirtBoardInfo *vbi, qemu_irq *pic)
+/*
+ * Function called for each vfio-platform device option found in the
+ * qemu user command line:
+ * -device vfio-platform,vfio-device="<device>",compat"<compat>"
+ * for instance <device> can be fff51000.ethernet (device unbound from
+ * original driver and bound to vfio driver)
+ * for instance <compat> can be calxeda/hb-xgmac
+ * note "/" replaces normal ",". Indeed "," would be interpreted by QEMU as
+ * a separator
+ */
+
+static int vfio_init_func(QemuOpts *opts, void *opaque)
 {
+    const char *driver;
+    DeviceState *dev;
+    SysBusDevice *s;
+    VirtBoardInfo *vbi = (VirtBoardInfo *)opaque;
+    driver = qemu_opt_get(opts, "driver");
+
+    /* index the first IRQ should be mapped */
+    int irq_start = vbi->avail_vfio_irq;
+
     char *nodename;
-    hwaddr base = vbi->memmap[VIRT_ETHERNET].base;
-    hwaddr size = vbi->memmap[VIRT_ETHERNET].size;
-    const char compat[] = "calxeda,hb-xgmac";
-    int irqm = vbi->irqmap[VIRT_ETHERNET];
-    int irqp = irqm+1;
-    int irqlp = irqm+2;
 
-    sysbus_create_varargs("vfio-platform", base,
-                          pic[irqm], pic[irqp], pic[irqlp], NULL);
+    /* this will contain the capatibility string with the "/"
+     * replaced by ","
+     */
+    char *corrected_compat;
 
-    nodename = g_strdup_printf("/ethernet@%" PRIx64, base);
-    qemu_fdt_add_subnode(vbi->fdt, nodename);
+    char **pname  = g_malloc0(sizeof(char *));
+    char **pcompat = g_malloc0(sizeof(char *));
 
-    /* Note that we can't use setprop_string because of the embedded NUL */
-    qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
-    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg", 2, base, 2, size);
-    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
-                                0x0, irqm, 0x4,
-                                0x0, irqp, 0x4,
-                                0x0, irqlp, 0x4);
+    int num_irqs;
+    size_t size;
+    int i;
+    uint32_t *irq_attr;
 
-    g_free(nodename);
+    if (!driver) {
+        qerror_report(QERR_MISSING_PARAMETER, "driver");
+        return -1 ;
+    }
+
+    if (strcasecmp(driver, "vfio-platform") == 0) {
+        dev = qdev_device_add(opts);
+        if (!dev) {
+            return -1;
+        }
+        s = SYS_BUS_DEVICE(dev);
+
+        vfio_get_props(s, pname, pcompat, &num_irqs, &size);
+
+        if (vbi->avail_vfio_base + size >= 0x10000000) {
+            /* register space size exceeds remaining VFIO space */
+            qerror_report(QERR_DEVICE_INIT_FAILED, *pname);
+        } else if (irq_start + num_irqs >= NUM_IRQS) {
+            /* VFIO IRQ number exceeded */
+            qerror_report(QERR_DEVICE_INIT_FAILED, *pname);
+        }
+
+        /*
+         * process compatibility property string passed by end-user
+         * replaces / by ,
+         * currently a single property compatibility value is supported!
+         */
+        corrected_compat = g_strdup(*pcompat);
+        char *slash = strchr(corrected_compat, '/');
+        *slash = ',';
+
+        sysbus_mmio_map(s, 0, vbi->avail_vfio_base);
+
+        nodename = g_strdup_printf("/%s@%" PRIx64,
+                                   *pname, vbi->avail_vfio_base);
+        qemu_fdt_add_subnode(vbi->fdt, nodename);
+
+        qemu_fdt_setprop(vbi->fdt, nodename, "compatible",
+                             corrected_compat, strlen(corrected_compat));
+
+        qemu_fdt_setprop_sized_cells(vbi->fdt, nodename,
+                             "reg", 2, vbi->avail_vfio_base, 2, size);
+
+        irq_attr = g_malloc0(num_irqs*3*sizeof(uint32_t));
+        for (i = 0; i < num_irqs; i++) {
+            sysbus_connect_irq(s, i, vbi->pic[irq_start+i]);
+
+            irq_attr[3*i] = cpu_to_be32(0);
+            irq_attr[3*i+1] = cpu_to_be32(irq_start+i);
+            irq_attr[3*i+2] = cpu_to_be32(0x4);
+        }
+
+        qemu_fdt_setprop(vbi->fdt, nodename, "interrupts",
+                         irq_attr, num_irqs*3*sizeof(uint32_t));
+
+        /* increment base address and IRQ index for next VFIO device */
+        vbi->avail_vfio_base += size;
+        vbi->avail_vfio_irq += num_irqs;
+
+        g_free(pcompat);
+        g_free(pname);
+        g_free(nodename);
+        g_free(corrected_compat);
+        g_free(irq_attr);
+
+        object_unref(OBJECT(dev));
+
+    }
+
+  return 0;
+}
+
+/*
+ * parses the option line and look for -device option
+ * for each of time vfio_init_func is called.
+ * this later only applies to -device vfio-platform ones
+ */
+
+static void create_vfio_devices(VirtBoardInfo *vbi)
+{
+    vbi->avail_vfio_base = vbi->memmap[VIRT_VFIO].base;
+    vbi->avail_vfio_irq =  vbi->irqmap[VIRT_VFIO];
+
+    if (qemu_opts_foreach(qemu_find_opts("device"),
+                        vfio_init_func, (void *)vbi, 1) != 0)
+        exit(1);
 }
 
-static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
+
+static void create_virtio_devices(const VirtBoardInfo *vbi)
 {
     int i;
     hwaddr size = vbi->memmap[VIRT_MMIO].size;
@@ -335,7 +448,7 @@  static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
         int irq = vbi->irqmap[VIRT_MMIO] + i;
         hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
 
-        sysbus_create_simple("virtio-mmio", base, pic[irq]);
+        sysbus_create_simple("virtio-mmio", base, vbi->pic[irq]);
     }
 
     for (i = NUM_VIRTIO_TRANSPORTS - 1; i >= 0; i--) {
@@ -366,7 +479,6 @@  static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
 
 static void machvirt_init(QEMUMachineInitArgs *args)
 {
-    qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
     int n;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
@@ -451,17 +563,19 @@  static void machvirt_init(QEMUMachineInitArgs *args)
     }
 
     for (n = 0; n < NUM_IRQS; n++) {
-        pic[n] = qdev_get_gpio_in(dev, n);
+        vbi->pic[n] = qdev_get_gpio_in(dev, n);
     }
 
-    create_uart(vbi, pic);
-    create_ethernet(vbi, pic);
+    create_uart(vbi);
+
+    /* create vfio platform devices if any are passed in command line*/
+    create_vfio_devices(vbi);
 
     /* Create mmio transports, so the user can create virtio backends
      * (which will be automatically plugged in to the transports). If
      * no backend is created the transport will just sit harmlessly idle.
      */
-    create_virtio_devices(vbi, pic);
+    create_virtio_devices(vbi);
 
     vbi->bootinfo.ram_size = args->ram_size;
     vbi->bootinfo.kernel_filename = args->kernel_filename;
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index f148edd..8f30d41 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -93,6 +93,7 @@  typedef struct VFIODevice {
     int num_irqs;
     int interrupt; /* type of the interrupt, might disappear */
     char *name;
+    char *compat;
     uint32_t mmap_timeout; /* mmap timeout value in ms */
     VFIORegion regions[PLATFORM_NUM_REGIONS];
     QLIST_ENTRY(VFIODevice) next;
@@ -100,6 +101,22 @@  typedef struct VFIODevice {
     QLIST_HEAD(, VFIOINTp) intp_list;
 } VFIODevice;
 
+/*
+ * returns properties from a QEMU VFIO device such as
+ * name, compatibility, num IRQs, size of the register set
+ */
+void vfio_get_props(SysBusDevice *s, char **pname,
+                    char **pcompat, int *pnum_irqs, size_t *psize);
+
+void vfio_get_props(SysBusDevice *s, char **pname,
+                    char **pcompat, int *pnum_irqs, size_t *psize) {
+
+     VFIODevice *vdev = DO_UPCAST(VFIODevice, sbdev, s);
+     *pname = vdev->name;
+     *pcompat = vdev->compat;
+     *pnum_irqs = vdev->num_irqs;
+     *psize = vdev->regions[0].size;
+}
 
 
 static void vfio_unmask_intp(VFIODevice *vdev, int index)
@@ -556,11 +573,6 @@  static void vfio_platform_realize(DeviceState *dev, Error **errp)
     struct stat st;
     int groupid, i, ret;
 
-
-    /* TODO: pass device name on command line */
-    vdev->name = malloc(PATH_MAX);
-    strcpy(vdev->name, "fff51000.ethernet");
-
     /* Check that the host device exists */
     snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/", vdev->name);
     if (stat(path, &st) < 0) {
@@ -568,6 +580,8 @@  static void vfio_platform_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    DPRINTF("vfio device %s, compat = %s\n", path, vdev->compat);
+
     strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
 
     len = readlink(path, iommu_group_path, PATH_MAX);
@@ -596,10 +610,15 @@  static void vfio_platform_realize(DeviceState *dev, Error **errp)
     QLIST_FOREACH(pvdev, &group->device_list, next) {
         DPRINTF("compare %s versus %s\n", pvdev->name, vdev->name);
         if (strcmp(pvdev->name, vdev->name) == 0) {
-
+            /*
+             * in current implementation realize is called twice:
+             * 1) once in the virt. machine (where qdev stuff are done +
+             *    device tree generation,
+             * 2) once in vl.c (-device standard handling)
+             * on 2) realize completes here.
+             */
             DPRINTF("vfio device %s already is attached to group %d\n",
                     vdev->name, groupid);
-
             vfio_put_group(group, NULL);
             return;
         }
@@ -625,14 +644,22 @@  static const VMStateDescription vfio_platform_vmstate = {
     .unmigratable = 1,
 };
 
+static Property vfio_platform_dev_properties[] = {
+DEFINE_PROP_STRING("vfio_device", VFIODevice, name),
+DEFINE_PROP_STRING("compat", VFIODevice, compat),
+DEFINE_PROP_UINT32("mmap-timeout-ms", VFIODevice, mmap_timeout, 1100),
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static void vfio_platform_dev_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = vfio_platform_realize;
     dc->vmsd = &vfio_platform_vmstate;
+    dc->props = vfio_platform_dev_properties;
     dc->desc = "VFIO-based platform device assignment";
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->cannot_instantiate_with_device_add_yet = false;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }