@@ -5,8 +5,7 @@ Virtual Machine Manager
=======================
The Gunyah Virtual Machine Manager is a Linux driver to support launching
-virtual machines using Gunyah. It presently supports launching non-proxy
-scheduled Linux-like virtual machines.
+virtual machines using Gunyah.
Except for some basic information about the location of initial binaries,
most of the configuration about a Gunyah virtual machine is described in the
@@ -116,3 +115,30 @@ GH_VM_START
~~~~~~~~~~~
This ioctl starts the VM.
+
+GH_VM_ADD_FUNCTION
+~~~~~~~~~~~~~~~~~~
+
+This ioctl registers a Gunyah VM function with the VM manager. The VM function
+is described with a `type` string and some arguments for that type. Typically,
+the function is added before the VM starts, but the function doesn't "operate"
+until the VM starts with GH_VM_START: e.g. vCPU ioclts will all return an error
+until the VM starts because the vCPUs don't exist until the VM is started. This
+allows the VMM to set up all the kernel functionality needed for the VM *before*
+the VM starts.
+
+The possible types are documented below:
+
+Type: "vcpu"
+^^^^^^^^^^^^
+
+::
+
+ struct gh_fn_vcpu_arg {
+ __u32 vcpu_id;
+ };
+
+The vcpu type will register with the VM Manager to expect to control
+vCPU number `vcpu_id`. It returns a file descriptor allowing interaction with
+the vCPU. See the Gunyah vCPU API description sections for interacting with
+the Gunyah vCPU file descriptors.
@@ -15,6 +15,7 @@ static const uint32_t gunyah_known_uuids[][4] = {
#define GH_HYPERCALL_HYP_IDENTIFY GH_HYPERCALL(0x0000)
#define GH_HYPERCALL_MSGQ_SEND GH_HYPERCALL(0x001B)
#define GH_HYPERCALL_MSGQ_RECV GH_HYPERCALL(0x001C)
+#define GH_HYPERCALL_VCPU_RUN GH_HYPERCALL(0x0065)
/**
* gh_hypercall_get_uid() - Returns a UID when running under a Gunyah hypervisor
@@ -104,5 +105,32 @@ int gh_hypercall_msgq_recv(u64 capid, uintptr_t buff, size_t size, size_t *recv_
}
EXPORT_SYMBOL_GPL(gh_hypercall_msgq_recv);
+int gh_hypercall_vcpu_run(u64 capid, u64 *resume_data, struct gh_hypercall_vcpu_run_resp *resp)
+{
+ struct arm_smccc_1_2_regs args = {
+ .a0 = GH_HYPERCALL_VCPU_RUN,
+ .a1 = capid,
+ .a2 = resume_data[0],
+ .a3 = resume_data[1],
+ .a4 = resume_data[2],
+ /* C language says this will be implictly zero. Gunyah requires 0, so be explicit */
+ .a5 = 0,
+ };
+ struct arm_smccc_1_2_regs res;
+
+ arm_smccc_1_2_hvc(&args, &res);
+
+ if (res.a0)
+ return res.a0;
+
+ resp->state = res.a1;
+ resp->state_data[0] = res.a2;
+ resp->state_data[1] = res.a3;
+ resp->state_data[2] = res.a4;
+
+ return res.a0;
+}
+EXPORT_SYMBOL_GPL(gh_hypercall_vcpu_run);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Gunyah Hypervisor Hypercalls");
@@ -15,3 +15,14 @@ config GUNYAH
config GUNYAH_PLATFORM_HOOKS
tristate
+
+config GUNYAH_VCPU
+ tristate "Runnable Gunyah vCPUs"
+ depends on GUNYAH
+ help
+ Enable kernel support for host-scheduled vCPUs running under Gunyah.
+ When selecting this option, userspace virtual machine managers (VMM)
+ can schedule the guest VM's vCPUs instead of using Gunyah's scheduler.
+ VMMs can also handle stage 2 faults of the vCPUs.
+
+ Say Y/M here if unsure and you want to support Gunyah VMMs.
@@ -5,3 +5,5 @@ obj-$(CONFIG_GUNYAH_PLATFORM_HOOKS) += gunyah_platform_hooks.o
gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
+
+obj-$(CONFIG_GUNYAH_VCPU) += gunyah_vcpu.o
new file mode 100644
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/gunyah.h>
+#include <linux/gunyah_vm_mgr.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "vm_mgr.h"
+
+#include <uapi/linux/gunyah.h>
+
+#define MAX_VCPU_NAME 20 /* gh-vcpu:u32_max+NUL */
+
+struct gunyah_vcpu {
+ struct gunyah_resource *rsc;
+ struct gunyah_vm_resource_ticket ticket;
+ struct gunyah_vm_function *f;
+ struct gunyah_vm *ghvm;
+
+ bool handle_mmio;
+ struct gh_vcpu_run *vcpu_run;
+
+ struct kref kref;
+ struct completion ready;
+ struct mutex run_lock;
+};
+
+/* VCPU is ready to run */
+#define GH_VCPU_STATE_READY 0
+/* VCPU is sleeping until an interrupt arrives */
+#define GH_VCPU_STATE_EXPECTS_WAKEUP 1
+/* VCPU is powered off */
+#define GH_VCPU_STATE_POWERED_OFF 2
+/* VCPU is blocked in EL2 for unspecified reason */
+#define GH_VCPU_STATE_BLOCKED 3
+/* VCPU has returned for MMIO READ */
+#define GH_VCPU_ADDRSPACE_VMMIO_READ 4
+/* VCPU has returned for MMIO WRITE */
+#define GH_VCPU_ADDRSPACE_VMMIO_WRITE 5
+
+static void vcpu_release(struct kref *kref)
+{
+ struct gunyah_vcpu *vcpu = container_of(kref, struct gunyah_vcpu, kref);
+
+ kfree(vcpu);
+}
+
+/*
+ * When hypervisor allows us to schedule vCPU again, it gives us an interrupt
+ */
+static irqreturn_t gh_vcpu_irq_handler(int irq, void *data)
+{
+ struct gunyah_vcpu *vcpu = data;
+
+ complete(&vcpu->ready);
+ return IRQ_HANDLED;
+}
+
+static void gh_handle_mmio_return(struct gunyah_vcpu *vcpu, u64 *state)
+{
+ if (!vcpu->vcpu_run->mmio.is_write)
+ memcpy(&state[0], vcpu->vcpu_run->mmio.data, vcpu->vcpu_run->mmio.len);
+
+ vcpu->handle_mmio = false;
+ vcpu->vcpu_run->exit_reason = GH_VM_EXIT_UNKNOWN;
+}
+
+static bool gh_handle_mmio(struct gunyah_vcpu *vcpu,
+ struct gh_hypercall_vcpu_run_resp *vcpu_run_resp)
+{
+ int ret = 0;
+
+ if (vcpu_run_resp->state == GH_VCPU_ADDRSPACE_VMMIO_READ) {
+ vcpu->vcpu_run->mmio.is_write = 0;
+ vcpu->vcpu_run->exit_reason = GH_VM_EXIT_MMIO;
+ } else { /* GH_VCPU_ADDRSPACE_VMMIO_WRITE case */
+ ret = gh_vm_mgr_mmio_write(vcpu->f->ghvm, vcpu_run_resp->state_data[0],
+ vcpu_run_resp->state_data[1], vcpu_run_resp->state_data[2]);
+ if (!ret)
+ return true;
+
+ vcpu->vcpu_run->mmio.is_write = 1;
+ memcpy(vcpu->vcpu_run->mmio.data, &vcpu_run_resp->state_data[2],
+ vcpu_run_resp->state_data[1]);
+ }
+
+ vcpu->vcpu_run->mmio.phys_addr = vcpu_run_resp->state_data[0];
+ vcpu->vcpu_run->mmio.len = vcpu_run_resp->state_data[1];
+ vcpu->vcpu_run->exit_reason = GH_VM_EXIT_MMIO;
+
+ return false;
+}
+
+/**
+ * gh_vcpu_run() - Request Gunyah to begin scheduling this vCPU.
+ * @vcpu: The client descriptor that was obtained via gunyah_vcpu_alloc()
+ */
+static int gh_vcpu_run(struct gunyah_vcpu *vcpu)
+{
+ struct gh_hypercall_vcpu_run_resp vcpu_run_resp;
+ u64 state_data[3] = { 0 };
+ int ret = 0;
+
+ ret = gh_vm_ensure_started(vcpu->ghvm);
+ if (ret)
+ return ret;
+
+ if (mutex_lock_interruptible(&vcpu->run_lock))
+ return -ERESTARTSYS;
+
+ if (!vcpu->rsc || !vcpu->f) {
+ ret = -ENODEV;
+ goto out;
+ }
+ /* Last exit reason was EXIT_MMIO. Userspace has filled in the data, now we need to tell
+ * Gunyah about the response.
+ */
+ if (vcpu->handle_mmio)
+ gh_handle_mmio_return(vcpu, state_data);
+
+ while (!ret && !signal_pending(current)) {
+ if (vcpu->vcpu_run->immediate_exit) {
+ ret = -EINTR;
+ goto out;
+ }
+ ret = gh_hypercall_vcpu_run(vcpu->rsc->capid, state_data, &vcpu_run_resp);
+ if (ret == GH_ERROR_OK) {
+ switch (vcpu_run_resp.state) {
+ case GH_VCPU_STATE_READY:
+ if (need_resched())
+ schedule();
+ break;
+ case GH_VCPU_STATE_EXPECTS_WAKEUP:
+ case GH_VCPU_STATE_POWERED_OFF:
+ ret = wait_for_completion_interruptible(&vcpu->ready);
+ /* reinitialize completion before next VCPU_RUN. If we reinitialize
+ * after the VCPU_RUN, interrupt may have already come before
+ * we can re-initialize and then waiting for an interrupt that
+ * was already handled.
+ */
+ reinit_completion(&vcpu->ready);
+ break;
+ case GH_VCPU_STATE_BLOCKED:
+ schedule();
+ break;
+ case GH_VCPU_ADDRSPACE_VMMIO_READ:
+ case GH_VCPU_ADDRSPACE_VMMIO_WRITE:
+ if (!gh_handle_mmio(vcpu, &vcpu_run_resp)) {
+ vcpu->handle_mmio = true;
+ goto out;
+ }
+ break;
+ default:
+ pr_warn_ratelimited("Unknown vCPU state: %llx\n",
+ vcpu_run_resp.state);
+ schedule();
+ break;
+ }
+ } else if (ret == GH_ERROR_RETRY) {
+ schedule();
+ ret = 0;
+ } else
+ ret = gh_remap_error(ret);
+ }
+
+out:
+ mutex_unlock(&vcpu->run_lock);
+
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ return ret;
+}
+
+static long gh_vcpu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct gunyah_vcpu *vcpu = filp->private_data;
+ long ret = -EINVAL;
+
+ switch (cmd) {
+ case GH_VCPU_RUN:
+ ret = gh_vcpu_run(vcpu);
+ break;
+ case GH_VCPU_MMAP_SIZE:
+ ret = PAGE_SIZE;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static int gh_vcpu_release(struct inode *inode, struct file *filp)
+{
+ struct gunyah_vcpu *vcpu = filp->private_data;
+
+ kref_put(&vcpu->kref, vcpu_release);
+ return 0;
+}
+
+static vm_fault_t gh_vcpu_fault(struct vm_fault *vmf)
+{
+ struct gunyah_vcpu *vcpu = vmf->vma->vm_file->private_data;
+ struct page *page = NULL;
+
+ if (vmf->pgoff == 0)
+ page = virt_to_page(vcpu->vcpu_run);
+
+ get_page(page);
+ vmf->page = page;
+ return 0;
+}
+
+static const struct vm_operations_struct gh_vcpu_ops = {
+ .fault = gh_vcpu_fault,
+};
+
+static int gh_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &gh_vcpu_ops;
+ return 0;
+}
+
+static const struct file_operations gh_vcpu_fops = {
+ .unlocked_ioctl = gh_vcpu_ioctl,
+ .release = gh_vcpu_release,
+ .llseek = noop_llseek,
+ .mmap = gh_vcpu_mmap,
+};
+
+static int gunyah_vcpu_populate(struct gunyah_vm_resource_ticket *ticket,
+ struct gunyah_resource *ghrsc)
+{
+ struct gunyah_vcpu *vcpu = container_of(ticket, struct gunyah_vcpu, ticket);
+ int ret;
+
+ mutex_lock(&vcpu->run_lock);
+ vcpu->rsc = ghrsc;
+
+ init_completion(&vcpu->ready);
+ mutex_unlock(&vcpu->run_lock);
+
+ ret = request_irq(vcpu->rsc->irq, gh_vcpu_irq_handler, IRQF_TRIGGER_RISING, "gh_vcpu",
+ vcpu);
+ if (ret)
+ pr_warn("Failed to request vcpu irq %d: %d", vcpu->rsc->irq, ret);
+
+ return ret;
+}
+
+static void gunyah_vcpu_unpopulate(struct gunyah_vm_resource_ticket *ticket,
+ struct gunyah_resource *ghrsc)
+{
+ struct gunyah_vcpu *vcpu = container_of(ticket, struct gunyah_vcpu, ticket);
+
+ vcpu->vcpu_run->immediate_exit = true;
+ mutex_lock(&vcpu->run_lock);
+ free_irq(vcpu->rsc->irq, vcpu);
+ vcpu->rsc = NULL;
+ mutex_unlock(&vcpu->run_lock);
+}
+
+static long gunyah_vcpu_bind(struct gunyah_vm_function *f)
+{
+ struct gunyah_vcpu *vcpu;
+ char name[MAX_VCPU_NAME];
+ struct file *file;
+ struct page *page;
+ int fd;
+ long r;
+
+ if (!gh_api_has_feature(GH_API_FEATURE_VCPU))
+ return -EOPNOTSUPP;
+
+ vcpu = kzalloc(sizeof(*vcpu), GFP_KERNEL);
+ if (!vcpu)
+ return -ENOMEM;
+
+ vcpu->f = f;
+ f->data = vcpu;
+ mutex_init(&vcpu->run_lock);
+ kref_init(&vcpu->kref);
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page) {
+ r = -ENOMEM;
+ goto err_destroy_vcpu;
+ }
+ vcpu->vcpu_run = page_address(page);
+
+ vcpu->ticket.resource_type = GUNYAH_RESOURCE_TYPE_VCPU;
+ vcpu->ticket.label = f->fn.vcpu.vcpu_id;
+ vcpu->ticket.owner = THIS_MODULE;
+ vcpu->ticket.populate = gunyah_vcpu_populate;
+ vcpu->ticket.unpopulate = gunyah_vcpu_unpopulate;
+
+ r = ghvm_add_resource_ticket(f->ghvm, &vcpu->ticket);
+ if (r)
+ goto err_destroy_page;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0) {
+ r = fd;
+ goto err_remove_vcpu;
+ }
+
+ if (!get_gunyah_vm(f->ghvm)) {
+ r = -ENODEV;
+ goto err_put_fd;
+ }
+ vcpu->ghvm = f->ghvm;
+
+ kref_get(&vcpu->kref);
+ snprintf(name, sizeof(name), "gh-vcpu:%d", vcpu->ticket.label);
+ file = anon_inode_getfile(name, &gh_vcpu_fops, vcpu, O_RDWR);
+ if (IS_ERR(file)) {
+ r = PTR_ERR(file);
+ goto err_put_ghvm;
+ }
+
+ fd_install(fd, file);
+
+ return fd;
+err_put_ghvm:
+ put_gunyah_vm(vcpu->ghvm);
+err_put_fd:
+ put_unused_fd(fd);
+err_remove_vcpu:
+ ghvm_remove_resource_ticket(f->ghvm, &vcpu->ticket);
+err_destroy_page:
+ free_page((unsigned long)vcpu->vcpu_run);
+err_destroy_vcpu:
+ kfree(vcpu);
+ return r;
+}
+
+static void gunyah_vcpu_release(struct gunyah_vm_function *f)
+{
+ struct gunyah_vcpu *vcpu = f->data;
+
+ ghvm_remove_resource_ticket(vcpu->f->ghvm, &vcpu->ticket);
+ vcpu->f = NULL;
+
+ kref_put(&vcpu->kref, vcpu_release);
+}
+
+DECLARE_GUNYAH_VM_FUNCTION_INIT(vcpu, gunyah_vcpu_bind, gunyah_vcpu_release);
+MODULE_DESCRIPTION("Gunyah vCPU Driver");
+MODULE_LICENSE("GPL");
@@ -431,6 +431,31 @@ static int gh_vm_start(struct gunyah_vm *ghvm)
return ret;
}
+int gh_vm_ensure_started(struct gunyah_vm *ghvm)
+{
+ int ret;
+
+retry:
+ ret = down_read_interruptible(&ghvm->status_lock);
+ if (ret)
+ return ret;
+
+ if (unlikely(ghvm->vm_status == GH_RM_VM_STATUS_NO_STATE)) {
+ up_read(&ghvm->status_lock);
+ ret = gh_vm_start(ghvm);
+ if (ret)
+ return ret;
+ goto retry;
+ }
+
+ if (unlikely(ghvm->vm_status != GH_RM_VM_STATUS_READY))
+ ret = -ENODEV;
+
+ up_read(&ghvm->status_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gh_vm_ensure_started);
+
static void gh_vm_stop(struct gunyah_vm *ghvm)
{
int ret;
@@ -64,6 +64,7 @@ struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm,
struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find_mapping(struct gunyah_vm *ghvm,
u64 gpa, u32 size);
+int gh_vm_ensure_started(struct gunyah_vm *ghvm);
int gh_vm_mgr_mmio_write(struct gunyah_vm *ghvm, u64 addr, u32 len, u64 data);
#endif
@@ -169,4 +169,11 @@ void gh_hypercall_hyp_identify(struct gh_hypercall_hyp_identify_resp *hyp_identi
int gh_hypercall_msgq_send(u64 capid, size_t size, uintptr_t buff, int tx_flags, bool *ready);
int gh_hypercall_msgq_recv(u64 capid, uintptr_t buff, size_t size, size_t *recv_size, bool *ready);
+struct gh_hypercall_vcpu_run_resp {
+ u64 state;
+ u64 state_data[3];
+};
+
+int gh_hypercall_vcpu_run(u64 capid, u64 *resume_data, struct gh_hypercall_vcpu_run_resp *resp);
+
#endif
@@ -53,9 +53,14 @@ struct gh_vm_dtb_config {
#define GUNYAH_FUNCTION_NAME_SIZE 32
#define GUNYAH_FUNCTION_MAX_ARG_SIZE 1024
+struct gh_fn_vcpu_arg {
+ __u32 vcpu_id;
+};
+
struct gh_vm_function {
char name[GUNYAH_FUNCTION_NAME_SIZE];
union {
+ struct gh_device_vcpu_arg vcpu;
char data[GUNYAH_FUNCTION_MAX_ARG_SIZE];
};
};
@@ -63,4 +68,29 @@ struct gh_vm_function {
#define GH_VM_ADD_FUNCTION _IOW(GH_IOCTL_TYPE, 0x4, struct gh_vm_function)
#define GH_VM_REMOVE_FUNCTION _IOW(GH_IOCTL_TYPE, 0x7, struct gh_vm_function)
+/* for GH_VCPU_RUN, returned by mmap(vcpu_fd, offset=0) */
+struct gh_vcpu_run {
+ /* in */
+ __u8 immediate_exit;
+ __u8 padding1[7];
+
+ /* out */
+#define GH_VM_EXIT_UNKNOWN 0
+#define GH_VM_EXIT_MMIO 1
+ __u32 exit_reason;
+
+ union {
+ /* GH_VM_EXIT_MMIO */
+ struct {
+ __u64 phys_addr;
+ __u8 data[8];
+ __u32 len;
+ __u8 is_write;
+ } mmio;
+ };
+};
+
+#define GH_VCPU_RUN _IO(GH_IOCTL_TYPE, 0x5)
+#define GH_VCPU_MMAP_SIZE _IO(GH_IOCTL_TYPE, 0x6)
+
#endif