Message ID | 1600122570-12941-1-git-send-email-mjrosato@linux.ibm.com |
---|---|
Headers | show |
Series | s390x/pci: Accomodate vfio DMA limiting | expand |
On 9/15/20 12:29 AM, Matthew Rosato wrote: > Rather than duplicating the same loop in multiple locations, > create a static function to do the work. Why not do that first in your series? > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > --- > hw/vfio/common.c | 33 +++++++++++++++------------------ > 1 file changed, 15 insertions(+), 18 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 7f4a338..bfbbfe4 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -825,17 +825,12 @@ static void vfio_listener_release(VFIOContainer *container) > } > } > > -struct vfio_info_cap_header * > -vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) > +static struct vfio_info_cap_header * > +vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) > { > struct vfio_info_cap_header *hdr; > - void *ptr = info; > - > - if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { > - return NULL; > - } > > - for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { > + for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { > if (hdr->id == id) { > return hdr; > } > @@ -844,23 +839,25 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) > return NULL; > } > > + > +struct vfio_info_cap_header * > +vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) > +{ > + if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { > + return NULL; > + } > + > + return vfio_get_cap((void *)info, info->cap_offset, id); > +} > + > static struct vfio_info_cap_header * > vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) > { > - struct vfio_info_cap_header *hdr; > - void *ptr = info; > - > if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { > return NULL; > } > > - for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { > - if (hdr->id == id) { > - return hdr; > - } > - } > - > - return NULL; > + return vfio_get_cap((void *)info, info->cap_offset, id); > } > > bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, >
On Mon, 14 Sep 2020 18:29:29 -0400 Matthew Rosato <mjrosato@linux.ibm.com> wrote: > When an s390 guest is using lazy unmapping, it can result in a very > large number of oustanding DMA requests, far beyond the default > limit configured for vfio. Let's track DMA usage similar to vfio > in the host, and trigger the guest to flush their DMA mappings > before vfio runs out. > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > --- > hw/s390x/s390-pci-bus.c | 99 +++++++++++++++++++++++++++++++++++++++++++++--- > hw/s390x/s390-pci-bus.h | 9 +++++ > hw/s390x/s390-pci-inst.c | 29 +++++++++++--- > hw/s390x/s390-pci-inst.h | 3 ++ > 4 files changed, 129 insertions(+), 11 deletions(-) > (...) > @@ -737,6 +742,82 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) > object_unref(OBJECT(iommu)); > } > > +static bool s390_sync_dma_avail(int fd, unsigned int *avail) Not sure I like the name. It sounds like the function checks whether "sync dma" is available. Maybe s390_update_dma_avail()? > +{ > + struct vfio_iommu_type1_info *info; > + uint32_t argsz; > + bool rval = false; > + int ret; > + > + if (avail == NULL) { > + return false; > + } > + > + argsz = sizeof(struct vfio_iommu_type1_info); > + info = g_malloc0(argsz); > + info->argsz = argsz; > + /* > + * If the specified argsz is not large enough to contain all > + * capabilities it will be updated upon return. In this case > + * use the updated value to get the entire capability chain. > + */ > + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > + if (argsz != info->argsz) { > + argsz = info->argsz; > + info = g_realloc(info, argsz); > + info->argsz = argsz; > + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > + } > + > + if (ret) { > + goto out; > + } > + > + /* If the capability exists, update with the current value */ > + rval = vfio_get_info_dma_avail(info, avail); Adding vfio specific things into the generic s390 pci emulation code looks a bit ugly... I'd prefer to factor that out into a separate file, especially if you plan to add more vfio-specific things in the future. > + > +out: > + g_free(info); > + return rval; > +} > + (...) > diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c > index 2f7a7d7..6af9af4 100644 > --- a/hw/s390x/s390-pci-inst.c > +++ b/hw/s390x/s390-pci-inst.c > @@ -32,6 +32,9 @@ > } \ > } while (0) > > +#define INC_DMA_AVAIL(iommu) if (iommu->dma_limit) iommu->dma_limit->avail++; > +#define DEC_DMA_AVAIL(iommu) if (iommu->dma_limit) iommu->dma_limit->avail--; Hm... maybe lowercase inline functions might be better here? > + > static void s390_set_status_code(CPUS390XState *env, > uint8_t r, uint64_t status_code) > { (...) > @@ -620,6 +629,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) > S390PCIIOMMU *iommu; > S390IOTLBEntry entry; > hwaddr start, end; > + uint32_t dma_avail; > > if (env->psw.mask & PSW_MASK_PSTATE) { > s390_program_interrupt(env, PGM_PRIVILEGED, ra); > @@ -675,8 +685,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) > } > > start += entry.len; > - while (entry.iova < start && entry.iova < end) { > - s390_pci_update_iotlb(iommu, &entry); > + dma_avail = 1; /* Assume non-zero dma_avail to start */ > + while (entry.iova < start && entry.iova < end && dma_avail > 0) { > + dma_avail = s390_pci_update_iotlb(iommu, &entry); > entry.iova += PAGE_SIZE; > entry.translated_addr += PAGE_SIZE; > } > @@ -689,7 +700,13 @@ err: > s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0); > } else { > pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++; > - setcc(cpu, ZPCI_PCI_LS_OK); > + if (dma_avail > 0) { When I compile this (with a headers update), the compiler moans here about an uninitialized variable. > + setcc(cpu, ZPCI_PCI_LS_OK); > + } else { > + /* vfio DMA mappings are exhausted, trigger a RPCIT */ > + setcc(cpu, ZPCI_PCI_LS_ERR); > + s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES); > + } > } > return 0; > } (...)
On 15/09/2020 00.29, Matthew Rosato wrote: > When an s390 guest is using lazy unmapping, it can result in a very > large number of oustanding DMA requests, far beyond the default > limit configured for vfio. Let's track DMA usage similar to vfio > in the host, and trigger the guest to flush their DMA mappings > before vfio runs out. > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > --- > hw/s390x/s390-pci-bus.c | 99 +++++++++++++++++++++++++++++++++++++++++++++--- > hw/s390x/s390-pci-bus.h | 9 +++++ > hw/s390x/s390-pci-inst.c | 29 +++++++++++--- > hw/s390x/s390-pci-inst.h | 3 ++ > 4 files changed, 129 insertions(+), 11 deletions(-) > > diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c > index 92146a2..23474cd 100644 > --- a/hw/s390x/s390-pci-bus.c > +++ b/hw/s390x/s390-pci-bus.c > @@ -11,6 +11,8 @@ > * directory. > */ > > +#include <sys/ioctl.h> > + > #include "qemu/osdep.h" > #include "qapi/error.h" > #include "qapi/visitor.h" > @@ -24,6 +26,9 @@ > #include "qemu/error-report.h" > #include "qemu/module.h" > > +#include "hw/vfio/pci.h" > +#include "hw/vfio/vfio-common.h" > + > #ifndef DEBUG_S390PCI_BUS > #define DEBUG_S390PCI_BUS 0 > #endif > @@ -737,6 +742,82 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) > object_unref(OBJECT(iommu)); > } > > +static bool s390_sync_dma_avail(int fd, unsigned int *avail) > +{ > + struct vfio_iommu_type1_info *info; You could use g_autofree to get rid of the g_free() at the end. > + uint32_t argsz; > + bool rval = false; > + int ret; > + > + if (avail == NULL) { > + return false; > + } Since this is a "static" local function, and calling it with avail == NULL does not make too much sense, I think I'd rather turn this into an assert() instead. > + argsz = sizeof(struct vfio_iommu_type1_info); > + info = g_malloc0(argsz); > + info->argsz = argsz; > + /* > + * If the specified argsz is not large enough to contain all > + * capabilities it will be updated upon return. In this case > + * use the updated value to get the entire capability chain. > + */ > + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > + if (argsz != info->argsz) { > + argsz = info->argsz; > + info = g_realloc(info, argsz); > + info->argsz = argsz; > + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > + } > + > + if (ret) { > + goto out; > + } > + > + /* If the capability exists, update with the current value */ > + rval = vfio_get_info_dma_avail(info, avail); > + > +out: > + g_free(info); > + return rval; > +} > + > +static S390PCIDMACount *s390_start_dma_count(S390pciState *s, VFIODevice *vdev) > +{ > + int id = vdev->group->container->fd; > + S390PCIDMACount *cnt; > + uint32_t avail; > + > + if (!s390_sync_dma_avail(id, &avail)) { > + return NULL; > + } > + > + QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { > + if (cnt->id == id) { > + cnt->users++; > + return cnt; > + } > + } > + > + cnt = g_new0(S390PCIDMACount, 1); > + cnt->id = id; > + cnt->users = 1; > + cnt->avail = avail; > + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); > + return cnt; > +} > + > +static void s390_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) > +{ > + if (cnt == NULL) { > + return; > + } Either use assert() or drop this completely (since you're checking it at the caller site already). > + cnt->users--; > + if (cnt->users == 0) { > + QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link); > + } > +} > + > static void s390_pcihost_realize(DeviceState *dev, Error **errp) > { > PCIBus *b; > @@ -764,6 +845,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) > s->bus_no = 0; > QTAILQ_INIT(&s->pending_sei); > QTAILQ_INIT(&s->zpci_devs); > + QTAILQ_INIT(&s->zpci_dma_limit); > > css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, > S390_ADAPTER_SUPPRESSIBLE, errp); > @@ -902,6 +984,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > { > S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev); > PCIDevice *pdev = NULL; > + VFIOPCIDevice *vpdev = NULL; > S390PCIBusDevice *pbdev = NULL; > > if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { > @@ -941,17 +1024,20 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > } > } > > + pbdev->pdev = pdev; > + pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); > + pbdev->iommu->pbdev = pbdev; > + pbdev->state = ZPCI_FS_DISABLED; > + > if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > pbdev->fh |= FH_SHM_VFIO; > + vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); > + pbdev->iommu->dma_limit = s390_start_dma_count(s, > + &vpdev->vbasedev); > } else { > pbdev->fh |= FH_SHM_EMUL; > } > > - pbdev->pdev = pdev; > - pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); > - pbdev->iommu->pbdev = pbdev; > - pbdev->state = ZPCI_FS_DISABLED; > - > if (s390_pci_msix_init(pbdev)) { > error_setg(errp, "MSI-X support is mandatory " > "in the S390 architecture"); > @@ -1004,6 +1090,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, > pbdev->fid = 0; > QTAILQ_REMOVE(&s->zpci_devs, pbdev, link); > g_hash_table_remove(s->zpci_table, &pbdev->idx); > + if (pbdev->iommu->dma_limit) { > + s390_end_dma_count(s, pbdev->iommu->dma_limit); > + } > qdev_unrealize(dev); > } > } Thomas
On 9/15/20 2:16 AM, Philippe Mathieu-Daudé wrote: > On 9/15/20 12:29 AM, Matthew Rosato wrote: >> Rather than duplicating the same loop in multiple locations, >> create a static function to do the work. > > Why not do that first in your series? > Fair question. I did originally do this collapsing first, but I wasn't sure if Alex would want it so I split it out and tacked it on the end so it could be dropped if desired. I'd be just fine re-arranging and putting this patch first. >> >> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> >> --- >> hw/vfio/common.c | 33 +++++++++++++++------------------ >> 1 file changed, 15 insertions(+), 18 deletions(-) >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index 7f4a338..bfbbfe4 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -825,17 +825,12 @@ static void vfio_listener_release(VFIOContainer *container) >> } >> } >> >> -struct vfio_info_cap_header * >> -vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) >> +static struct vfio_info_cap_header * >> +vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) >> { >> struct vfio_info_cap_header *hdr; >> - void *ptr = info; >> - >> - if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { >> - return NULL; >> - } >> >> - for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { >> + for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { >> if (hdr->id == id) { >> return hdr; >> } >> @@ -844,23 +839,25 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) >> return NULL; >> } >> >> + >> +struct vfio_info_cap_header * >> +vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) >> +{ >> + if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { >> + return NULL; >> + } >> + >> + return vfio_get_cap((void *)info, info->cap_offset, id); >> +} >> + >> static struct vfio_info_cap_header * >> vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) >> { >> - struct vfio_info_cap_header *hdr; >> - void *ptr = info; >> - >> if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { >> return NULL; >> } >> >> - for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { >> - if (hdr->id == id) { >> - return hdr; >> - } >> - } >> - >> - return NULL; >> + return vfio_get_cap((void *)info, info->cap_offset, id); >> } >> >> bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, >> >
On 9/15/20 7:28 AM, Cornelia Huck wrote: > On Mon, 14 Sep 2020 18:29:29 -0400 > Matthew Rosato <mjrosato@linux.ibm.com> wrote: > >> When an s390 guest is using lazy unmapping, it can result in a very >> large number of oustanding DMA requests, far beyond the default >> limit configured for vfio. Let's track DMA usage similar to vfio >> in the host, and trigger the guest to flush their DMA mappings >> before vfio runs out. >> >> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> >> --- >> hw/s390x/s390-pci-bus.c | 99 +++++++++++++++++++++++++++++++++++++++++++++--- >> hw/s390x/s390-pci-bus.h | 9 +++++ >> hw/s390x/s390-pci-inst.c | 29 +++++++++++--- >> hw/s390x/s390-pci-inst.h | 3 ++ >> 4 files changed, 129 insertions(+), 11 deletions(-) >> > > (...) > >> @@ -737,6 +742,82 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) >> object_unref(OBJECT(iommu)); >> } >> >> +static bool s390_sync_dma_avail(int fd, unsigned int *avail) > > Not sure I like the name. It sounds like the function checks whether > "sync dma" is available. Maybe s390_update_dma_avail()? > Sounds fine to me. >> +{ >> + struct vfio_iommu_type1_info *info; >> + uint32_t argsz; >> + bool rval = false; >> + int ret; >> + >> + if (avail == NULL) { >> + return false; >> + } >> + >> + argsz = sizeof(struct vfio_iommu_type1_info); >> + info = g_malloc0(argsz); >> + info->argsz = argsz; >> + /* >> + * If the specified argsz is not large enough to contain all >> + * capabilities it will be updated upon return. In this case >> + * use the updated value to get the entire capability chain. >> + */ >> + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); >> + if (argsz != info->argsz) { >> + argsz = info->argsz; >> + info = g_realloc(info, argsz); >> + info->argsz = argsz; >> + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); >> + } >> + >> + if (ret) { >> + goto out; >> + } >> + >> + /* If the capability exists, update with the current value */ >> + rval = vfio_get_info_dma_avail(info, avail); > > Adding vfio specific things into the generic s390 pci emulation code > looks a bit ugly... I'd prefer to factor that out into a separate file, > especially if you plan to add more vfio-specific things in the future. > Fair. hw/s390x/s390-pci-vfio.* ? >> + >> +out: >> + g_free(info); >> + return rval; >> +} >> + > > (...) > >> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c >> index 2f7a7d7..6af9af4 100644 >> --- a/hw/s390x/s390-pci-inst.c >> +++ b/hw/s390x/s390-pci-inst.c >> @@ -32,6 +32,9 @@ >> } \ >> } while (0) >> >> +#define INC_DMA_AVAIL(iommu) if (iommu->dma_limit) iommu->dma_limit->avail++; >> +#define DEC_DMA_AVAIL(iommu) if (iommu->dma_limit) iommu->dma_limit->avail--; > > Hm... maybe lowercase inline functions might be better here? > OK >> + >> static void s390_set_status_code(CPUS390XState *env, >> uint8_t r, uint64_t status_code) >> { > > (...) > >> @@ -620,6 +629,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) >> S390PCIIOMMU *iommu; >> S390IOTLBEntry entry; >> hwaddr start, end; >> + uint32_t dma_avail; >> >> if (env->psw.mask & PSW_MASK_PSTATE) { >> s390_program_interrupt(env, PGM_PRIVILEGED, ra); >> @@ -675,8 +685,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) >> } >> >> start += entry.len; >> - while (entry.iova < start && entry.iova < end) { >> - s390_pci_update_iotlb(iommu, &entry); >> + dma_avail = 1; /* Assume non-zero dma_avail to start */ >> + while (entry.iova < start && entry.iova < end && dma_avail > 0) { >> + dma_avail = s390_pci_update_iotlb(iommu, &entry); >> entry.iova += PAGE_SIZE; >> entry.translated_addr += PAGE_SIZE; >> } >> @@ -689,7 +700,13 @@ err: >> s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0); >> } else { >> pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++; >> - setcc(cpu, ZPCI_PCI_LS_OK); >> + if (dma_avail > 0) { > > When I compile this (with a headers update), the compiler moans here > about an uninitialized variable. > D'oh. Obviously dma_avail needs to be initialized outside of the if/else -- I'll double-check the logic here and fix. >> + setcc(cpu, ZPCI_PCI_LS_OK); >> + } else { >> + /* vfio DMA mappings are exhausted, trigger a RPCIT */ >> + setcc(cpu, ZPCI_PCI_LS_ERR); >> + s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES); >> + } >> } >> return 0; >> } > > (...) >
On 9/15/20 8:54 AM, Thomas Huth wrote: > On 15/09/2020 00.29, Matthew Rosato wrote: >> When an s390 guest is using lazy unmapping, it can result in a very >> large number of oustanding DMA requests, far beyond the default >> limit configured for vfio. Let's track DMA usage similar to vfio >> in the host, and trigger the guest to flush their DMA mappings >> before vfio runs out. >> >> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> >> --- >> hw/s390x/s390-pci-bus.c | 99 +++++++++++++++++++++++++++++++++++++++++++++--- >> hw/s390x/s390-pci-bus.h | 9 +++++ >> hw/s390x/s390-pci-inst.c | 29 +++++++++++--- >> hw/s390x/s390-pci-inst.h | 3 ++ >> 4 files changed, 129 insertions(+), 11 deletions(-) >> >> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c >> index 92146a2..23474cd 100644 >> --- a/hw/s390x/s390-pci-bus.c >> +++ b/hw/s390x/s390-pci-bus.c >> @@ -11,6 +11,8 @@ >> * directory. >> */ >> >> +#include <sys/ioctl.h> >> + >> #include "qemu/osdep.h" >> #include "qapi/error.h" >> #include "qapi/visitor.h" >> @@ -24,6 +26,9 @@ >> #include "qemu/error-report.h" >> #include "qemu/module.h" >> >> +#include "hw/vfio/pci.h" >> +#include "hw/vfio/vfio-common.h" >> + >> #ifndef DEBUG_S390PCI_BUS >> #define DEBUG_S390PCI_BUS 0 >> #endif >> @@ -737,6 +742,82 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) >> object_unref(OBJECT(iommu)); >> } >> >> +static bool s390_sync_dma_avail(int fd, unsigned int *avail) >> +{ >> + struct vfio_iommu_type1_info *info; > > You could use g_autofree to get rid of the g_free() at the end. > OK >> + uint32_t argsz; >> + bool rval = false; >> + int ret; >> + >> + if (avail == NULL) { >> + return false; >> + } > > Since this is a "static" local function, and calling it with avail == > NULL does not make too much sense, I think I'd rather turn this into an > assert() instead. > Sure, sounds good. >> + argsz = sizeof(struct vfio_iommu_type1_info); >> + info = g_malloc0(argsz); >> + info->argsz = argsz; >> + /* >> + * If the specified argsz is not large enough to contain all >> + * capabilities it will be updated upon return. In this case >> + * use the updated value to get the entire capability chain. >> + */ >> + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); >> + if (argsz != info->argsz) { >> + argsz = info->argsz; >> + info = g_realloc(info, argsz); >> + info->argsz = argsz; >> + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); >> + } >> + >> + if (ret) { >> + goto out; >> + } >> + >> + /* If the capability exists, update with the current value */ >> + rval = vfio_get_info_dma_avail(info, avail); >> + >> +out: >> + g_free(info); >> + return rval; >> +} >> + >> +static S390PCIDMACount *s390_start_dma_count(S390pciState *s, VFIODevice *vdev) >> +{ >> + int id = vdev->group->container->fd; >> + S390PCIDMACount *cnt; >> + uint32_t avail; >> + >> + if (!s390_sync_dma_avail(id, &avail)) { >> + return NULL; >> + } >> + >> + QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { >> + if (cnt->id == id) { >> + cnt->users++; >> + return cnt; >> + } >> + } >> + >> + cnt = g_new0(S390PCIDMACount, 1); >> + cnt->id = id; >> + cnt->users = 1; >> + cnt->avail = avail; >> + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); >> + return cnt; >> +} >> + >> +static void s390_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) >> +{ >> + if (cnt == NULL) { >> + return; >> + } > > Either use assert() or drop this completely (since you're checking it at > the caller site already). > Fair - I'll assert() here. Thanks!
On Tue, 15 Sep 2020 10:16:23 -0400 Matthew Rosato <mjrosato@linux.ibm.com> wrote: > On 9/15/20 7:28 AM, Cornelia Huck wrote: > > On Mon, 14 Sep 2020 18:29:29 -0400 > > Matthew Rosato <mjrosato@linux.ibm.com> wrote: > > > >> When an s390 guest is using lazy unmapping, it can result in a very > >> large number of oustanding DMA requests, far beyond the default > >> limit configured for vfio. Let's track DMA usage similar to vfio > >> in the host, and trigger the guest to flush their DMA mappings > >> before vfio runs out. > >> > >> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > >> --- > >> hw/s390x/s390-pci-bus.c | 99 +++++++++++++++++++++++++++++++++++++++++++++--- > >> hw/s390x/s390-pci-bus.h | 9 +++++ > >> hw/s390x/s390-pci-inst.c | 29 +++++++++++--- > >> hw/s390x/s390-pci-inst.h | 3 ++ > >> 4 files changed, 129 insertions(+), 11 deletions(-) > >> > > > > (...) > > > >> @@ -737,6 +742,82 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) > >> object_unref(OBJECT(iommu)); > >> } > >> > >> +static bool s390_sync_dma_avail(int fd, unsigned int *avail) > > > > Not sure I like the name. It sounds like the function checks whether > > "sync dma" is available. Maybe s390_update_dma_avail()? > > > > Sounds fine to me. > > >> +{ > >> + struct vfio_iommu_type1_info *info; > >> + uint32_t argsz; > >> + bool rval = false; > >> + int ret; > >> + > >> + if (avail == NULL) { > >> + return false; > >> + } > >> + > >> + argsz = sizeof(struct vfio_iommu_type1_info); > >> + info = g_malloc0(argsz); > >> + info->argsz = argsz; > >> + /* > >> + * If the specified argsz is not large enough to contain all > >> + * capabilities it will be updated upon return. In this case > >> + * use the updated value to get the entire capability chain. > >> + */ > >> + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > >> + if (argsz != info->argsz) { > >> + argsz = info->argsz; > >> + info = g_realloc(info, argsz); > >> + info->argsz = argsz; > >> + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > >> + } > >> + > >> + if (ret) { > >> + goto out; > >> + } > >> + > >> + /* If the capability exists, update with the current value */ > >> + rval = vfio_get_info_dma_avail(info, avail); > > > > Adding vfio specific things into the generic s390 pci emulation code > > looks a bit ugly... I'd prefer to factor that out into a separate file, > > especially if you plan to add more vfio-specific things in the future. > > > > Fair. hw/s390x/s390-pci-vfio.* ? Sounds good to me.