Message ID | 1600197283-25274-6-git-send-email-mjrosato@linux.ibm.com |
---|---|
State | Superseded |
Headers | show |
Series | s390x/pci: Accomodate vfio DMA limiting | expand |
On Tue, 15 Sep 2020 15:14:43 -0400 Matthew Rosato <mjrosato@linux.ibm.com> wrote: > When an s390 guest is using lazy unmapping, it can result in a very > large number of oustanding DMA requests, far beyond the default > limit configured for vfio. Let's track DMA usage similar to vfio > in the host, and trigger the guest to flush their DMA mappings > before vfio runs out. > > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > --- > hw/s390x/s390-pci-bus.c | 56 +++++++++++++++++++++++++++++++++++++++++++----- > hw/s390x/s390-pci-bus.h | 9 ++++++++ > hw/s390x/s390-pci-inst.c | 34 +++++++++++++++++++++++------ > hw/s390x/s390-pci-inst.h | 3 +++ > 4 files changed, 91 insertions(+), 11 deletions(-) (...) > @@ -737,6 +740,41 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) > object_unref(OBJECT(iommu)); > } > > +static S390PCIDMACount *s390_start_dma_count(S390pciState *s, VFIODevice *vdev) Should these go into the new vfio-related file? > +{ > + int id = vdev->group->container->fd; > + S390PCIDMACount *cnt; > + uint32_t avail; > + > + if (!s390_pci_update_dma_avail(id, &avail)) { > + return NULL; > + } > + > + QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { > + if (cnt->id == id) { > + cnt->users++; > + return cnt; > + } > + } > + > + cnt = g_new0(S390PCIDMACount, 1); > + cnt->id = id; > + cnt->users = 1; > + cnt->avail = avail; > + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); > + return cnt; > +} > + > +static void s390_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) > +{ > + assert(cnt); > + > + cnt->users--; > + if (cnt->users == 0) { > + QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link); > + } > +} > + > static void s390_pcihost_realize(DeviceState *dev, Error **errp) > { > PCIBus *b; > @@ -764,6 +802,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) > s->bus_no = 0; > QTAILQ_INIT(&s->pending_sei); > QTAILQ_INIT(&s->zpci_devs); > + QTAILQ_INIT(&s->zpci_dma_limit); > > css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, > S390_ADAPTER_SUPPRESSIBLE, errp); > @@ -902,6 +941,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > { > S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev); > PCIDevice *pdev = NULL; > + VFIOPCIDevice *vpdev = NULL; > S390PCIBusDevice *pbdev = NULL; > > if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { > @@ -941,17 +981,20 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > } > } > > + pbdev->pdev = pdev; > + pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); > + pbdev->iommu->pbdev = pbdev; > + pbdev->state = ZPCI_FS_DISABLED; > + > if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > pbdev->fh |= FH_SHM_VFIO; > + vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); > + pbdev->iommu->dma_limit = s390_start_dma_count(s, > + &vpdev->vbasedev); I think you can just pass s and pbdev to that function... that would move dealing with vfio specifics from this file. > } else { > pbdev->fh |= FH_SHM_EMUL; > } > > - pbdev->pdev = pdev; > - pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); > - pbdev->iommu->pbdev = pbdev; > - pbdev->state = ZPCI_FS_DISABLED; > - > if (s390_pci_msix_init(pbdev)) { > error_setg(errp, "MSI-X support is mandatory " > "in the S390 architecture"); (...) > diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c > index 2f7a7d7..cc34b17 100644 > --- a/hw/s390x/s390-pci-inst.c > +++ b/hw/s390x/s390-pci-inst.c > @@ -32,6 +32,9 @@ > } \ > } while (0) > > +#define inc_dma_avail(iommu) if (iommu->dma_limit) iommu->dma_limit->avail++; I was thinking more of something like static inline void inc_dma_avail(S390PCIIOMMU *iommu) { if (iommu->dma_limit) { iommu->dma_limit->avail++; } } > +#define dec_dma_avail(iommu) if (iommu->dma_limit) iommu->dma_limit->avail--; > + > static void s390_set_status_code(CPUS390XState *env, > uint8_t r, uint64_t status_code) > { (...)
On 9/16/20 7:05 AM, Cornelia Huck wrote: > On Tue, 15 Sep 2020 15:14:43 -0400 > Matthew Rosato <mjrosato@linux.ibm.com> wrote: > >> When an s390 guest is using lazy unmapping, it can result in a very >> large number of oustanding DMA requests, far beyond the default >> limit configured for vfio. Let's track DMA usage similar to vfio >> in the host, and trigger the guest to flush their DMA mappings >> before vfio runs out. >> >> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> >> --- >> hw/s390x/s390-pci-bus.c | 56 +++++++++++++++++++++++++++++++++++++++++++----- >> hw/s390x/s390-pci-bus.h | 9 ++++++++ >> hw/s390x/s390-pci-inst.c | 34 +++++++++++++++++++++++------ >> hw/s390x/s390-pci-inst.h | 3 +++ >> 4 files changed, 91 insertions(+), 11 deletions(-) > > (...) > >> @@ -737,6 +740,41 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) >> object_unref(OBJECT(iommu)); >> } >> >> +static S390PCIDMACount *s390_start_dma_count(S390pciState *s, VFIODevice *vdev) > > Should these go into the new vfio-related file? > >> +{ >> + int id = vdev->group->container->fd; >> + S390PCIDMACount *cnt; >> + uint32_t avail; >> + >> + if (!s390_pci_update_dma_avail(id, &avail)) { >> + return NULL; >> + } >> + >> + QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { >> + if (cnt->id == id) { >> + cnt->users++; >> + return cnt; >> + } >> + } >> + >> + cnt = g_new0(S390PCIDMACount, 1); >> + cnt->id = id; >> + cnt->users = 1; >> + cnt->avail = avail; >> + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); >> + return cnt; >> +} >> + >> +static void s390_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) >> +{ >> + assert(cnt); >> + >> + cnt->users--; >> + if (cnt->users == 0) { >> + QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link); >> + } >> +} >> + >> static void s390_pcihost_realize(DeviceState *dev, Error **errp) >> { >> PCIBus *b; >> @@ -764,6 +802,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) >> s->bus_no = 0; >> QTAILQ_INIT(&s->pending_sei); >> QTAILQ_INIT(&s->zpci_devs); >> + QTAILQ_INIT(&s->zpci_dma_limit); >> >> css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, >> S390_ADAPTER_SUPPRESSIBLE, errp); >> @@ -902,6 +941,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >> { >> S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev); >> PCIDevice *pdev = NULL; >> + VFIOPCIDevice *vpdev = NULL; >> S390PCIBusDevice *pbdev = NULL; >> >> if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { >> @@ -941,17 +981,20 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >> } >> } >> >> + pbdev->pdev = pdev; >> + pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); >> + pbdev->iommu->pbdev = pbdev; >> + pbdev->state = ZPCI_FS_DISABLED; >> + >> if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { >> pbdev->fh |= FH_SHM_VFIO; >> + vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); >> + pbdev->iommu->dma_limit = s390_start_dma_count(s, >> + &vpdev->vbasedev); > > I think you can just pass s and pbdev to that function... that would > move dealing with vfio specifics from this file. I had considered this as well, should have went with my gut -- I'll move them. > >> } else { >> pbdev->fh |= FH_SHM_EMUL; >> } >> >> - pbdev->pdev = pdev; >> - pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); >> - pbdev->iommu->pbdev = pbdev; >> - pbdev->state = ZPCI_FS_DISABLED; >> - >> if (s390_pci_msix_init(pbdev)) { >> error_setg(errp, "MSI-X support is mandatory " >> "in the S390 architecture"); > > (...) > >> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c >> index 2f7a7d7..cc34b17 100644 >> --- a/hw/s390x/s390-pci-inst.c >> +++ b/hw/s390x/s390-pci-inst.c >> @@ -32,6 +32,9 @@ >> } \ >> } while (0) >> >> +#define inc_dma_avail(iommu) if (iommu->dma_limit) iommu->dma_limit->avail++; > > I was thinking more of something like > > static inline void inc_dma_avail(S390PCIIOMMU *iommu) > { > if (iommu->dma_limit) { > iommu->dma_limit->avail++; > } > } > Ah, I read the 'lowercase' and missed the 'inline function' part of your previous comment, sorry. Will change. >> +#define dec_dma_avail(iommu) if (iommu->dma_limit) iommu->dma_limit->avail--; >> + >> static void s390_set_status_code(CPUS390XState *env, >> uint8_t r, uint64_t status_code) >> { > > (...) >
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 92146a2..8e8398d 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -17,6 +17,7 @@ #include "cpu.h" #include "s390-pci-bus.h" #include "s390-pci-inst.h" +#include "s390-pci-vfio.h" #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" #include "hw/pci/pci_bridge.h" @@ -24,6 +25,8 @@ #include "qemu/error-report.h" #include "qemu/module.h" +#include "hw/vfio/pci.h" + #ifndef DEBUG_S390PCI_BUS #define DEBUG_S390PCI_BUS 0 #endif @@ -737,6 +740,41 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) object_unref(OBJECT(iommu)); } +static S390PCIDMACount *s390_start_dma_count(S390pciState *s, VFIODevice *vdev) +{ + int id = vdev->group->container->fd; + S390PCIDMACount *cnt; + uint32_t avail; + + if (!s390_pci_update_dma_avail(id, &avail)) { + return NULL; + } + + QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { + if (cnt->id == id) { + cnt->users++; + return cnt; + } + } + + cnt = g_new0(S390PCIDMACount, 1); + cnt->id = id; + cnt->users = 1; + cnt->avail = avail; + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); + return cnt; +} + +static void s390_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) +{ + assert(cnt); + + cnt->users--; + if (cnt->users == 0) { + QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link); + } +} + static void s390_pcihost_realize(DeviceState *dev, Error **errp) { PCIBus *b; @@ -764,6 +802,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) s->bus_no = 0; QTAILQ_INIT(&s->pending_sei); QTAILQ_INIT(&s->zpci_devs); + QTAILQ_INIT(&s->zpci_dma_limit); css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, S390_ADAPTER_SUPPRESSIBLE, errp); @@ -902,6 +941,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, { S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev); PCIDevice *pdev = NULL; + VFIOPCIDevice *vpdev = NULL; S390PCIBusDevice *pbdev = NULL; if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { @@ -941,17 +981,20 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, } } + pbdev->pdev = pdev; + pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); + pbdev->iommu->pbdev = pbdev; + pbdev->state = ZPCI_FS_DISABLED; + if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { pbdev->fh |= FH_SHM_VFIO; + vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + pbdev->iommu->dma_limit = s390_start_dma_count(s, + &vpdev->vbasedev); } else { pbdev->fh |= FH_SHM_EMUL; } - pbdev->pdev = pdev; - pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); - pbdev->iommu->pbdev = pbdev; - pbdev->state = ZPCI_FS_DISABLED; - if (s390_pci_msix_init(pbdev)) { error_setg(errp, "MSI-X support is mandatory " "in the S390 architecture"); @@ -1004,6 +1047,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, pbdev->fid = 0; QTAILQ_REMOVE(&s->zpci_devs, pbdev, link); g_hash_table_remove(s->zpci_table, &pbdev->idx); + if (pbdev->iommu->dma_limit) { + s390_end_dma_count(s, pbdev->iommu->dma_limit); + } qdev_unrealize(dev); } } diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h index 0458059..f166fd9 100644 --- a/hw/s390x/s390-pci-bus.h +++ b/hw/s390x/s390-pci-bus.h @@ -270,6 +270,13 @@ typedef struct S390IOTLBEntry { uint64_t perm; } S390IOTLBEntry; +typedef struct S390PCIDMACount { + int id; + int users; + uint32_t avail; + QTAILQ_ENTRY(S390PCIDMACount) link; +} S390PCIDMACount; + struct S390PCIIOMMU { Object parent_obj; S390PCIBusDevice *pbdev; @@ -281,6 +288,7 @@ struct S390PCIIOMMU { uint64_t pba; uint64_t pal; GHashTable *iotlb; + S390PCIDMACount *dma_limit; }; typedef struct S390PCIIOMMUTable { @@ -356,6 +364,7 @@ struct S390pciState { GHashTable *zpci_table; QTAILQ_HEAD(, SeiContainer) pending_sei; QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs; + QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit; }; S390pciState *s390_get_phb(void); diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index 2f7a7d7..cc34b17 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -32,6 +32,9 @@ } \ } while (0) +#define inc_dma_avail(iommu) if (iommu->dma_limit) iommu->dma_limit->avail++; +#define dec_dma_avail(iommu) if (iommu->dma_limit) iommu->dma_limit->avail--; + static void s390_set_status_code(CPUS390XState *env, uint8_t r, uint64_t status_code) { @@ -572,7 +575,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) return 0; } -static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) +static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + S390IOTLBEntry *entry) { S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova); IOMMUTLBEntry notify = { @@ -585,14 +589,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) if (entry->perm == IOMMU_NONE) { if (!cache) { - return; + goto out; } g_hash_table_remove(iommu->iotlb, &entry->iova); + inc_dma_avail(iommu); } else { if (cache) { if (cache->perm == entry->perm && cache->translated_addr == entry->translated_addr) { - return; + goto out; } notify.perm = IOMMU_NONE; @@ -606,9 +611,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) cache->len = PAGE_SIZE; cache->perm = entry->perm; g_hash_table_replace(iommu->iotlb, &cache->iova, cache); + dec_dma_avail(iommu); } memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); + +out: + return iommu->dma_limit ? iommu->dma_limit->avail : 1; } int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) @@ -620,6 +629,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) S390PCIIOMMU *iommu; S390IOTLBEntry entry; hwaddr start, end; + uint32_t dma_avail; if (env->psw.mask & PSW_MASK_PSTATE) { s390_program_interrupt(env, PGM_PRIVILEGED, ra); @@ -658,6 +668,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) } iommu = pbdev->iommu; + if (iommu->dma_limit) { + dma_avail = iommu->dma_limit->avail; + } else { + dma_avail = 1; + } if (!iommu->g_iota) { error = ERR_EVENT_INVALAS; goto err; @@ -675,8 +690,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) } start += entry.len; - while (entry.iova < start && entry.iova < end) { - s390_pci_update_iotlb(iommu, &entry); + while (entry.iova < start && entry.iova < end && + (dma_avail > 0 || entry.perm == IOMMU_NONE)) { + dma_avail = s390_pci_update_iotlb(iommu, &entry); entry.iova += PAGE_SIZE; entry.translated_addr += PAGE_SIZE; } @@ -689,7 +705,13 @@ err: s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0); } else { pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++; - setcc(cpu, ZPCI_PCI_LS_OK); + if (dma_avail > 0) { + setcc(cpu, ZPCI_PCI_LS_OK); + } else { + /* vfio DMA mappings are exhausted, trigger a RPCIT */ + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES); + } } return 0; } diff --git a/hw/s390x/s390-pci-inst.h b/hw/s390x/s390-pci-inst.h index fa3bf8b..8ee3a3c 100644 --- a/hw/s390x/s390-pci-inst.h +++ b/hw/s390x/s390-pci-inst.h @@ -254,6 +254,9 @@ typedef struct ClpReqRspQueryPciGrp { #define ZPCI_STPCIFC_ST_INVAL_DMAAS 28 #define ZPCI_STPCIFC_ST_ERROR_RECOVER 40 +/* Refresh PCI Translations status codes */ +#define ZPCI_RPCIT_ST_INSUFF_RES 16 + /* FIB function controls */ #define ZPCI_FIB_FC_ENABLED 0x80 #define ZPCI_FIB_FC_ERROR 0x40
When an s390 guest is using lazy unmapping, it can result in a very large number of oustanding DMA requests, far beyond the default limit configured for vfio. Let's track DMA usage similar to vfio in the host, and trigger the guest to flush their DMA mappings before vfio runs out. Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> --- hw/s390x/s390-pci-bus.c | 56 +++++++++++++++++++++++++++++++++++++++++++----- hw/s390x/s390-pci-bus.h | 9 ++++++++ hw/s390x/s390-pci-inst.c | 34 +++++++++++++++++++++++------ hw/s390x/s390-pci-inst.h | 3 +++ 4 files changed, 91 insertions(+), 11 deletions(-)