Message ID | 20201026105504.4023620-16-philmd@redhat.com |
---|---|
State | New |
Headers | show |
Series | util/vfio-helpers: Allow using multiple MSIX IRQs | expand |
Hi Philippe, On 10/26/20 11:55 AM, Philippe Mathieu-Daudé wrote: > This driver uses the host page size to align its memory regions, > but this size is not always compatible with the IOMMU. Add a > check if the size matches, and bails out providing a hint what > is the minimum page size the driver should use. > > Suggested-by: Alex Williamson <alex.williamson@redhat.com> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> > Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> > --- > util/vfio-helpers.c | 28 ++++++++++++++++++++++++++-- > util/trace-events | 1 + > 2 files changed, 27 insertions(+), 2 deletions(-) > > diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c > index 5e288dfa113..874d76c2a2a 100644 > --- a/util/vfio-helpers.c > +++ b/util/vfio-helpers.c > @@ -11,6 +11,7 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/cutils.h" > #include <sys/ioctl.h> > #include <linux/vfio.h> > #include "qapi/error.h" > @@ -288,7 +289,7 @@ static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf) > } > > static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, > - Error **errp) > + size_t *requested_page_size, Error **errp) > { > int ret; > int i; > @@ -299,6 +300,8 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, > struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; > char *group_file = NULL; > > + assert(requested_page_size && is_power_of_2(*requested_page_size)); > + > s->usable_iova_ranges = NULL; > > /* Create a new container */ > @@ -373,6 +376,27 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, > ret = -errno; > goto fail; > } > + if (!(iommu_info->flags & VFIO_IOMMU_INFO_PGSIZES)) { > + error_setg(errp, "Failed to get IOMMU page size info"); > + ret = -EINVAL; > + goto fail; > + } > + trace_qemu_vfio_iommu_iova_pgsizes(iommu_info->iova_pgsizes); > + if (!(iommu_info->iova_pgsizes & *requested_page_size)) { > + g_autofree char *req_page_size_str = size_to_str(*requested_page_size); > + g_autofree char *min_page_size_str = NULL; > + uint64_t pgsizes_masked; > + > + pgsizes_masked = MAKE_64BIT_MASK(0, ctz64(*requested_page_size)); > + *requested_page_size = 1U << ctz64(iommu_info->iova_pgsizes > + & ~pgsizes_masked); > + min_page_size_str = size_to_str(*requested_page_size); > + error_setg(errp, "Unsupported IOMMU page size: %s", req_page_size_str); > + error_append_hint(errp, "Minimum IOMMU page size: %s\n", > + min_page_size_str); this blocks the 64kB tentative support. Before I was able to run the UC with 64kB page host while the MPS used by the device is 4kB. Of course I have no evidence yet my work is correct - besides it works in my case for a sepcific device - but at least we should make sure we do not introduce a new blocker here. Also as discussed together f68453237b block/nvme: Map doorbells pages write-only causes troubles with 64kB pages as there, you attempt to map 2 consecutive 4kB pages with different attributes. The 2d mmap fails. Thanks Eric > + ret = -EINVAL; > + goto fail; > + } > > /* > * if the kernel does not report usable IOVA regions, choose > @@ -520,7 +544,7 @@ QEMUVFIOState *qemu_vfio_open_pci(const char *device, size_t *min_page_size, > int r; > QEMUVFIOState *s = g_new0(QEMUVFIOState, 1); > > - r = qemu_vfio_init_pci(s, device, errp); > + r = qemu_vfio_init_pci(s, device, min_page_size, errp); > if (r) { > g_free(s); > return NULL; > diff --git a/util/trace-events b/util/trace-events > index 7faad2a718c..3c36def9f30 100644 > --- a/util/trace-events > +++ b/util/trace-events > @@ -87,6 +87,7 @@ qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host > qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d &iova %p" > qemu_vfio_dma_mapped(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64" size 0x%zx" > qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p" > +qemu_vfio_iommu_iova_pgsizes(uint64_t iova_pgsizes) "iommu page size bitmask: 0x%08"PRIx64 > qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size %d (region ofs 0x%"PRIx64" size %"PRId64")" > qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size %d (region ofs 0x%"PRIx64" size %"PRId64")" > qemu_vfio_region_info(const char *desc, uint64_t offset, uint64_t size, uint32_t cap_offset) "region '%s' ofs 0x%"PRIx64" size %"PRId64" cap_ofs %"PRId32 >
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c index 5e288dfa113..874d76c2a2a 100644 --- a/util/vfio-helpers.c +++ b/util/vfio-helpers.c @@ -11,6 +11,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include <sys/ioctl.h> #include <linux/vfio.h> #include "qapi/error.h" @@ -288,7 +289,7 @@ static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf) } static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, - Error **errp) + size_t *requested_page_size, Error **errp) { int ret; int i; @@ -299,6 +300,8 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; char *group_file = NULL; + assert(requested_page_size && is_power_of_2(*requested_page_size)); + s->usable_iova_ranges = NULL; /* Create a new container */ @@ -373,6 +376,27 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, ret = -errno; goto fail; } + if (!(iommu_info->flags & VFIO_IOMMU_INFO_PGSIZES)) { + error_setg(errp, "Failed to get IOMMU page size info"); + ret = -EINVAL; + goto fail; + } + trace_qemu_vfio_iommu_iova_pgsizes(iommu_info->iova_pgsizes); + if (!(iommu_info->iova_pgsizes & *requested_page_size)) { + g_autofree char *req_page_size_str = size_to_str(*requested_page_size); + g_autofree char *min_page_size_str = NULL; + uint64_t pgsizes_masked; + + pgsizes_masked = MAKE_64BIT_MASK(0, ctz64(*requested_page_size)); + *requested_page_size = 1U << ctz64(iommu_info->iova_pgsizes + & ~pgsizes_masked); + min_page_size_str = size_to_str(*requested_page_size); + error_setg(errp, "Unsupported IOMMU page size: %s", req_page_size_str); + error_append_hint(errp, "Minimum IOMMU page size: %s\n", + min_page_size_str); + ret = -EINVAL; + goto fail; + } /* * if the kernel does not report usable IOVA regions, choose @@ -520,7 +544,7 @@ QEMUVFIOState *qemu_vfio_open_pci(const char *device, size_t *min_page_size, int r; QEMUVFIOState *s = g_new0(QEMUVFIOState, 1); - r = qemu_vfio_init_pci(s, device, errp); + r = qemu_vfio_init_pci(s, device, min_page_size, errp); if (r) { g_free(s); return NULL; diff --git a/util/trace-events b/util/trace-events index 7faad2a718c..3c36def9f30 100644 --- a/util/trace-events +++ b/util/trace-events @@ -87,6 +87,7 @@ qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d &iova %p" qemu_vfio_dma_mapped(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64" size 0x%zx" qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p" +qemu_vfio_iommu_iova_pgsizes(uint64_t iova_pgsizes) "iommu page size bitmask: 0x%08"PRIx64 qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size %d (region ofs 0x%"PRIx64" size %"PRId64")" qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size %d (region ofs 0x%"PRIx64" size %"PRId64")" qemu_vfio_region_info(const char *desc, uint64_t offset, uint64_t size, uint32_t cap_offset) "region '%s' ofs 0x%"PRIx64" size %"PRId64" cap_ofs %"PRId32