@@ -1,4 +1,5 @@
ifeq ($(CONFIG_LINUX), y)
obj-$(CONFIG_SOFTMMU) += common.o
obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_SOFTMMU) += platform.o
endif
new file mode 100644
@@ -0,0 +1,528 @@
+/*
+ * vfio based device assignment support - platform devices
+ *
+ * Copyright Linaro Limited, 2014
+ *
+ * Authors:
+ * Kim Phillips <kim.phillips@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on vfio based PCI device assignment support:
+ * Copyright Red Hat, Inc. 2012
+ */
+
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "qemu/error-report.h"
+#include "qemu/range.h"
+#include "sysemu/sysemu.h"
+#include "hw/vfio/vfio-platform.h"
+
+extern const MemoryRegionOps vfio_region_ops;
+extern const MemoryListener vfio_memory_listener;
+extern QLIST_HEAD(, VFIOGroup) group_list;
+extern QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces;
+
+static void vfio_put_device(VFIOPlatformDevice *vdev)
+{
+ unsigned int i;
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ for (i = 0; i < vbasedev->num_regions; i++) {
+ g_free(vdev->regions[i]);
+ }
+ g_free(vdev->regions);
+ vfio_put_base_device(&vdev->vbasedev);
+}
+
+/*
+ * It is mandatory to pass a VFIOPlatformDevice since VFIODevice
+ * is not a QOM Object and cannot be passed to memory region functions
+*/
+static void vfio_map_region(VFIOPlatformDevice *vdev, int nr)
+{
+ VFIORegion *region = vdev->regions[nr];
+ unsigned size = region->size;
+ char name[64];
+
+ snprintf(name, sizeof(name), "VFIO %s region %d",
+ vdev->vbasedev.name, nr);
+
+ /* A "slow" read/write mapping underlies all regions */
+ memory_region_init_io(®ion->mem, OBJECT(vdev), &vfio_region_ops,
+ region, name, size);
+
+ strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
+
+ if (vfio_mmap_region(OBJECT(vdev), region, ®ion->mem,
+ ®ion->mmap_mem, ®ion->mmap, size, 0, name)) {
+ error_report("%s unsupported. Performance may be slow", name);
+ }
+}
+
+static void print_regions(VFIOPlatformDevice *vdev)
+{
+ int i;
+
+ DPRINTF("Device \"%s\" counts %d region(s):\n",
+ vdev->vbasedev.name, vdev->vbasedev.num_regions);
+
+ for (i = 0; i < vdev->vbasedev.num_regions; i++) {
+ DPRINTF("- region %d flags = 0x%lx, size = 0x%lx, "
+ "fd= %d, offset = 0x%lx\n",
+ vdev->regions[i]->nr,
+ (unsigned long)vdev->regions[i]->flags,
+ (unsigned long)vdev->regions[i]->size,
+ vdev->regions[i]->fd,
+ (unsigned long)vdev->regions[i]->fd_offset);
+ }
+}
+
+static int vfio_populate_regions(VFIODevice *vbasedev)
+{
+ struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+ int i, ret = errno;
+ VFIOPlatformDevice *vdev =
+ container_of(vbasedev, VFIOPlatformDevice, vbasedev);
+
+ vdev->regions = g_malloc0(sizeof(VFIORegion *) * vbasedev->num_regions);
+
+ for (i = 0; i < vbasedev->num_regions; i++) {
+ vdev->regions[i] = g_malloc0(sizeof(VFIORegion));
+ reg_info.index = i;
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info);
+ if (ret) {
+ error_report("vfio: Error getting region %d info: %m", i);
+ goto error;
+ }
+
+ vdev->regions[i]->flags = reg_info.flags;
+ vdev->regions[i]->size = reg_info.size;
+ vdev->regions[i]->fd_offset = reg_info.offset;
+ vdev->regions[i]->fd = vbasedev->fd;
+ vdev->regions[i]->nr = i;
+ vdev->regions[i]->vbasedev = vbasedev;
+ }
+ print_regions(vdev);
+ return ret;
+error:
+ vfio_put_device(vdev);
+ return ret;
+}
+
+/* not implemented yet */
+static int vfio_platform_check_device(VFIODevice *vdev)
+{
+ return 0;
+}
+
+/* not implemented yet */
+static bool vfio_platform_compute_needs_reset(VFIODevice *vdev)
+{
+return false;
+}
+
+/* not implemented yet */
+static int vfio_platform_hot_reset_multi(VFIODevice *vdev)
+{
+return 0;
+}
+
+/*
+ * eoi function is called on the first access to any MMIO region
+ * after an IRQ was triggered. It is assumed this access corresponds
+ * to the IRQ status register reset.
+ * With such a mechanism, a single IRQ can be handled at a time since
+ * there is no way to know which IRQ was completed by the guest.
+ * (we would need additional details about the IRQ status register mask)
+ */
+static void vfio_platform_eoi(VFIODevice *vbasedev)
+{
+ VFIOINTp *intp;
+ VFIOPlatformDevice *vdev =
+ container_of(vbasedev, VFIOPlatformDevice, vbasedev);
+ bool eoi_done = false;
+
+ QLIST_FOREACH(intp, &vdev->intp_list, next) {
+ if (intp->state == VFIO_IRQ_ACTIVE) {
+ if (eoi_done) {
+ error_report("several IRQ pending: "
+ "this case should not happen!\n");
+ }
+ DPRINTF("EOI IRQ #%d fd=%d\n",
+ intp->pin, event_notifier_get_fd(&intp->interrupt));
+ intp->state = VFIO_IRQ_INACTIVE;
+
+ /* deassert the virtual IRQ and unmask physical one */
+ qemu_set_irq(intp->qemuirq, 0);
+ vfio_unmask_irqindex(vbasedev, intp->pin);
+ eoi_done = true;
+ }
+ }
+
+ /*
+ * in case there are pending IRQs, handle them one at a time */
+ if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
+ intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
+ vfio_intp_interrupt(intp);
+ QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
+ }
+ return;
+}
+
+/*
+ * enable/disable the fast path mode
+ * fast path = MMIO region is mmaped (no KVM TRAP)
+ * slow path = MMIO region is trapped and region callbacks are called
+ * slow path enables to trap the IRQ status register guest reset
+*/
+
+static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
+{
+ VFIORegion *region;
+ int i;
+
+ DPRINTF("fast path = %d\n", enabled);
+
+ for (i = 0; i < vdev->vbasedev.num_regions; i++) {
+ region = vdev->regions[i];
+
+ /* register space is unmapped to trap EOI */
+ memory_region_set_enabled(®ion->mmap_mem, enabled);
+ }
+}
+
+/*
+ * Checks whether the IRQ is still pending. In the negative
+ * the fast path mode (where reg space is mmaped) can be restored.
+ * if the IRQ is still pending, we must keep on trapping IRQ status
+ * register reset with mmap disabled (slow path).
+ * the function is called on mmap_timer event.
+ * by construction a single fd is handled at a time. See EOI comment
+ * for additional details.
+ */
+static void vfio_intp_mmap_enable(void *opaque)
+{
+ VFIOINTp *tmp;
+ VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
+ bool one_active_irq = false;
+
+ QLIST_FOREACH(tmp, &vdev->intp_list, next) {
+ if (tmp->state == VFIO_IRQ_ACTIVE) {
+ if (one_active_irq) {
+ error_report("several active IRQ: "
+ "this case should not happen!\n");
+ }
+ DPRINTF("IRQ #%d still pending, stay in slow path\n",
+ tmp->pin);
+ timer_mod(vdev->mmap_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ vdev->mmap_timeout);
+ one_active_irq = true;
+ }
+ }
+ if (one_active_irq) {
+ return;
+ }
+ DPRINTF("no pending IRQ, restore fast path\n");
+ vfio_mmap_set_enabled(vdev, true);
+}
+
+/*
+ * The fd handler
+ */
+void vfio_intp_interrupt(void *opaque)
+{
+ int ret;
+ VFIOINTp *tmp, *intp = (VFIOINTp *)opaque;
+ VFIOPlatformDevice *vdev = intp->vdev;
+ bool one_active_irq = false;
+
+ /*
+ * first check whether there is a pending IRQ
+ * in the positive the new IRQ cannot be handled until the
+ * active one is not completed.
+ * by construction the same IRQ as the pending one cannot hit
+ * since the physical IRQ was disabled by the VFIO driver
+ */
+ QLIST_FOREACH(tmp, &vdev->intp_list, next) {
+ if (tmp->state == VFIO_IRQ_ACTIVE) {
+ one_active_irq = true;
+ }
+ }
+ if (one_active_irq) {
+ /*
+ * the new IRQ gets a pending status and is pushed in
+ * the pending queue
+ */
+ intp->state = VFIO_IRQ_PENDING;
+ QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
+ intp, pqnext);
+ return;
+ }
+
+ /* no active IRQ, the new IRQ can be forwarded to guest */
+ DPRINTF("Handle IRQ #%d (fd = %d)\n",
+ intp->pin, event_notifier_get_fd(&intp->interrupt));
+
+ ret = event_notifier_test_and_clear(&intp->interrupt);
+ if (!ret) {
+ DPRINTF("Error when clearing fd=%d\n",
+ event_notifier_get_fd(&intp->interrupt));
+ }
+
+ intp->state = VFIO_IRQ_ACTIVE;
+
+ /* sets slow path */
+ vfio_mmap_set_enabled(vdev, false);
+
+ /* trigger the virtual IRQ */
+ qemu_set_irq(intp->qemuirq, 1);
+
+ /* schedule the mmap timer which will restore mmap path after EOI*/
+ if (vdev->mmap_timeout) {
+ timer_mod(vdev->mmap_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + vdev->mmap_timeout);
+ }
+}
+
+static int vfio_enable_intp(VFIODevice *vbasedev, unsigned int index)
+{
+ struct vfio_irq_set *irq_set;
+ int32_t *pfd;
+ int ret, argsz;
+ int device = vbasedev->fd;
+ VFIOPlatformDevice *vdev =
+ container_of(vbasedev, VFIOPlatformDevice, vbasedev);
+ SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
+ VFIOINTp *intp;
+
+ /* allocate and populate a new VFIOINTp structure put in a queue list */
+ intp = g_malloc0(sizeof(*intp));
+ intp->vdev = vdev;
+ intp->pin = index;
+ intp->state = VFIO_IRQ_INACTIVE;
+ sysbus_init_irq(sbdev, &intp->qemuirq);
+
+ ret = event_notifier_init(&intp->interrupt, 0);
+
+ if (ret) {
+ error_report("vfio: Error: event_notifier_init failed ");
+ return ret;
+ }
+ /* build the irq_set to be passed to the vfio kernel driver */
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = index;
+ irq_set->start = 0;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+
+ *pfd = event_notifier_get_fd(&intp->interrupt);
+
+ DPRINTF("register fd=%d/irq index=%d to kernel\n", *pfd, index);
+
+ qemu_set_fd_handler(*pfd, vfio_intp_interrupt, NULL, intp);
+
+ /*
+ * pass the index/fd binding to the kernel driver so that it
+ * triggers this fd on HW IRQ
+ */
+ ret = ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set);
+ g_free(irq_set);
+ if (ret) {
+ error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
+ qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
+ close(*pfd); /* TO DO : replace by event_notifier_cleanup */
+ return -errno;
+ }
+
+ /* store the new intp in qlist */
+ QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
+ return 0;
+}
+
+static int vfio_populate_interrupts(VFIODevice *vbasedev)
+{
+ struct vfio_irq_info irq = { .argsz = sizeof(irq) };
+ int i, ret;
+ VFIOPlatformDevice *vdev =
+ container_of(vbasedev, VFIOPlatformDevice, vbasedev);
+
+ vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+ vfio_intp_mmap_enable, vdev);
+
+ QSIMPLEQ_INIT(&vdev->pending_intp_queue);
+
+ for (i = 0; i < vbasedev->num_irqs; i++) {
+ irq.index = i;
+
+ DPRINTF("Retrieve IRQ info from vfio platform driver ...\n");
+
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
+ if (ret) {
+ error_printf("vfio: error getting device %s irq info",
+ vbasedev->name);
+ }
+ DPRINTF("- IRQ index %d: count %d, flags=0x%x\n",
+ irq.index, irq.count, irq.flags);
+
+ vfio_enable_intp(vbasedev, irq.index);
+ }
+ return 0;
+}
+
+static VFIODeviceOps vfio_platform_ops = {
+ .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
+ .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
+ .vfio_eoi = vfio_platform_eoi,
+ .vfio_check_device = vfio_platform_check_device,
+ .vfio_populate_regions = vfio_populate_regions,
+ .vfio_populate_interrupts = vfio_populate_interrupts,
+};
+
+static int vfio_base_device_init(VFIODevice *vbasedev)
+{
+ VFIOGroup *group;
+ VFIODevice *vbasedev_iter;
+ char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+ ssize_t len;
+ struct stat st;
+ int groupid;
+ int ret;
+
+ /* name must be set prior to the call */
+ if (vbasedev->name == NULL) {
+ return -errno;
+ }
+
+ /* Check that the host device exists */
+ snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/",
+ vbasedev->name);
+
+ if (stat(path, &st) < 0) {
+ error_report("vfio: error: no such host device: %s", path);
+ return -errno;
+ }
+
+ strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+ len = readlink(path, iommu_group_path, sizeof(path));
+ if (len <= 0 || len >= sizeof(path)) {
+ error_report("vfio: error no iommu_group for device");
+ return len < 0 ? -errno : ENAMETOOLONG;
+ }
+
+ iommu_group_path[len] = 0;
+ group_name = basename(iommu_group_path);
+
+ if (sscanf(group_name, "%d", &groupid) != 1) {
+ error_report("vfio: error reading %s: %m", path);
+ return -errno;
+ }
+
+ DPRINTF("%s(%s) group %d\n", __func__, vbasedev->name, groupid);
+
+ group = vfio_get_group(groupid, &address_space_memory);
+ if (!group) {
+ error_report("vfio: failed to get group %d", groupid);
+ return -ENOENT;
+ }
+
+ snprintf(path, sizeof(path), "%s", vbasedev->name);
+
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
+ error_report("vfio: error: device %s is already attached", path);
+ vfio_put_group(group);
+ return -EBUSY;
+ }
+ }
+ ret = vfio_get_device(group, path, vbasedev);
+ if (ret < 0) {
+ error_report("vfio: failed to get device %s", path);
+ vfio_put_group(group);
+ return ret;
+ }
+ return ret;
+}
+
+static void vfio_platform_realize(DeviceState *dev, Error **errp)
+{
+ VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
+ SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ int i, ret;
+
+ vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
+ vbasedev->ops = &vfio_platform_ops;
+
+ DPRINTF("vfio device %s, compat = %s\n", vbasedev->name, vdev->compat);
+
+ ret = vfio_base_device_init(vbasedev);
+ if (ret < 0) {
+ return;
+ }
+
+ for (i = 0; i < vbasedev->num_regions; i++) {
+ vfio_map_region(vdev, i);
+ sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
+ }
+}
+
+static const VMStateDescription vfio_platform_vmstate = {
+ .name = TYPE_VFIO_PLATFORM,
+ .version_id = 3,
+ .minimum_version_id = 2,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ },
+ .unmigratable = 1,
+};
+
+static Property vfio_platform_dev_properties[] = {
+ DEFINE_PROP_STRING("vfio_device", VFIOPlatformDevice, vbasedev.name),
+ DEFINE_PROP_STRING("compat", VFIOPlatformDevice, compat),
+ DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
+ mmap_timeout, 1100),
+ DEFINE_PROP_UINT32("num_irqs", VFIOPlatformDevice,
+ vbasedev.num_irqs, 0),
+ DEFINE_PROP_UINT32("num_regions", VFIOPlatformDevice,
+ vbasedev.num_regions, 0),
+ DEFINE_PROP_BOOL("irqfd", VFIOPlatformDevice, irqfd_allowed, true),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vfio_platform_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = vfio_platform_realize;
+ dc->props = vfio_platform_dev_properties;
+ dc->vmsd = &vfio_platform_vmstate;
+ dc->desc = "VFIO-based platform device assignment";
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo vfio_platform_dev_info = {
+ .name = TYPE_VFIO_PLATFORM,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(VFIOPlatformDevice),
+ .class_init = vfio_platform_class_init,
+ .class_size = sizeof(VFIOPlatformDeviceClass),
+};
+
+static void register_vfio_platform_dev_type(void)
+{
+ type_register_static(&vfio_platform_dev_info);
+}
+
+type_init(register_vfio_platform_dev_type)
new file mode 100644
@@ -0,0 +1,74 @@
+/*
+ * vfio based device assignment support - platform devices
+ *
+ * Copyright Linaro Limited, 2014
+ *
+ * Authors:
+ * Kim Phillips <kim.phillips@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on vfio based PCI device assignment support:
+ * Copyright Red Hat, Inc. 2012
+ */
+
+#ifndef HW_VFIO_VFIO_PLATFORM_H
+#define HW_VFIO_VFIO_PLATFORM_H
+
+#include "hw/sysbus.h"
+#include "hw/vfio/vfio-common.h"
+
+#define TYPE_VFIO_PLATFORM "vfio-platform"
+
+enum {
+ VFIO_IRQ_INACTIVE = 0,
+ VFIO_IRQ_PENDING = 1,
+ VFIO_IRQ_ACTIVE = 2,
+ /* VFIO_IRQ_ACTIVE_AND_PENDING cannot happen with VFIO */
+};
+
+typedef struct VFIOINTp {
+ QLIST_ENTRY(VFIOINTp) next; /* entry for IRQ list */
+ QSIMPLEQ_ENTRY(VFIOINTp) pqnext; /* entry for pending IRQ queue */
+ EventNotifier interrupt; /* eventfd triggered on interrupt */
+ EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
+ qemu_irq qemuirq;
+ struct VFIOPlatformDevice *vdev; /* back pointer to device */
+ int state; /* inactive, pending, active */
+ bool kvm_accel; /* set when QEMU bypass through KVM enabled */
+ uint8_t pin; /* index */
+ uint8_t virtualID; /* virtual IRQ */
+} VFIOINTp;
+
+typedef struct VFIOPlatformDevice {
+ SysBusDevice sbdev;
+ VFIODevice vbasedev; /* not a QOM object */
+ VFIORegion **regions;
+ QLIST_HEAD(, VFIOINTp) intp_list; /* list of IRQ */
+ /* queue of pending IRQ */
+ QSIMPLEQ_HEAD(pending_intp_queue, VFIOINTp) pending_intp_queue;
+ char *compat; /* compatibility string */
+ bool irqfd_allowed;
+ uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
+ QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
+} VFIOPlatformDevice;
+
+
+typedef struct VFIOPlatformDeviceClass {
+ /*< private >*/
+ SysBusDeviceClass parent_class;
+ /*< public >*/
+} VFIOPlatformDeviceClass;
+
+#define VFIO_PLATFORM_DEVICE(obj) \
+ OBJECT_CHECK(VFIOPlatformDevice, (obj), TYPE_VFIO_PLATFORM)
+#define VFIO_PLATFORM_DEVICE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(VFIOPlatformDeviceClass, (klass), TYPE_VFIO_PLATFORM)
+#define VFIO_PLATFORM_DEVICE_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(VFIOPlatformDeviceClass, (obj), TYPE_VFIO_PLATFORM)
+
+void vfio_intp_interrupt(void *opaque);
+void vfio_setup_irqfd(SysBusDevice *dev, int index, int virq);
+
+#endif