@@ -1120,6 +1120,70 @@ static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
}
+static int vfio_pci_add_vma(struct vfio_pci_device *vdev,
+ struct vm_area_struct *vma)
+{
+ struct vfio_pci_mmap_vma *mmap_vma;
+
+ mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
+ if (!mmap_vma)
+ return -ENOMEM;
+
+ mmap_vma->vma = vma;
+
+ mutex_lock(&vdev->vma_lock);
+ list_add(&mmap_vma->vma_next, &vdev->vma_list);
+ mutex_unlock(&vdev->vma_lock);
+
+ return 0;
+}
+
+/*
+ * Zap mmaps on open so that we can fault them in on access and therefore
+ * our vma_list only tracks mappings accessed since last zap.
+ */
+static void vfio_pci_mmap_open(struct vm_area_struct *vma)
+{
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+static void vfio_pci_mmap_close(struct vm_area_struct *vma)
+{
+ struct vfio_pci_device *vdev = vma->vm_private_data;
+ struct vfio_pci_mmap_vma *mmap_vma;
+
+ mutex_lock(&vdev->vma_lock);
+ list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
+ if (mmap_vma->vma == vma) {
+ list_del(&mmap_vma->vma_next);
+ kfree(mmap_vma);
+ break;
+ }
+ }
+ mutex_unlock(&vdev->vma_lock);
+}
+
+static int vfio_pci_mmap_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct vfio_pci_device *vdev = vma->vm_private_data;
+
+ if (vfio_pci_add_vma(vdev, vma))
+ return VM_FAULT_OOM;
+
+ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot))
+ return VM_FAULT_SIGBUS;
+
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct vfio_pci_mmap_ops = {
+ .open = vfio_pci_mmap_open,
+ .close = vfio_pci_mmap_close,
+ .fault = vfio_pci_mmap_fault,
+};
+
static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
{
struct vfio_pci_device *vdev = device_data;
@@ -1185,8 +1249,14 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
- return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- req_len, vma->vm_page_prot);
+ /*
+ * See remap_pfn_range(), called from vfio_pci_fault() but we can't
+ * change vm_flags within the fault handler. Set them now.
+ */
+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_ops = &vfio_pci_mmap_ops;
+
+ return 0;
}
static void vfio_pci_request(void *device_data, unsigned int count)
@@ -1243,6 +1313,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vdev->irq_type = VFIO_PCI_NUM_IRQS;
mutex_init(&vdev->igate);
spin_lock_init(&vdev->irqlock);
+ mutex_init(&vdev->vma_lock);
+ INIT_LIST_HEAD(&vdev->vma_list);
ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
if (ret) {
@@ -63,6 +63,11 @@ struct vfio_pci_dummy_resource {
struct list_head res_next;
};
+struct vfio_pci_mmap_vma {
+ struct vm_area_struct *vma;
+ struct list_head vma_next;
+};
+
struct vfio_pci_device {
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
@@ -95,6 +100,8 @@ struct vfio_pci_device {
struct eventfd_ctx *err_trigger;
struct eventfd_ctx *req_trigger;
struct list_head dummy_resources_list;
+ struct mutex vma_lock;
+ struct list_head vma_list;
};
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)