@@ -8,6 +8,7 @@
*
*/
#include <linux/cpumask.h>
+#include <linux/pagemap.h>
#include <linux/kvm_host.h>
#include <linux/kvm_irqfd.h>
#include <linux/perf_event.h>
@@ -476,7 +477,7 @@ static inline u32 reclaim_flags(bool share)
GUNYAH_MEMEXTENT_DONATE_FROM_PROTECTED);
}
-static int gunyah_memory_provide_folio(struct gunyah_vm *ghvm,
+static int __gunyah_memory_provide_folio(struct gunyah_vm *ghvm,
struct folio *folio, gfn_t gfn, bool share, bool write)
{
struct gunyah_resource *guest_extent, *host_extent, *addrspace;
@@ -573,7 +574,7 @@ static int gunyah_memory_provide_folio(struct gunyah_vm *ghvm,
return ret;
}
-static int gunyah_memory_reclaim_folio(struct gunyah_vm *ghvm,
+static int gunyah_memory_reclaim_folio_locked(struct gunyah_vm *ghvm,
struct folio *folio, gfn_t gfn, bool share)
{
u32 map_flags = BIT(GUNYAH_ADDRSPACE_MAP_FLAG_PARTIAL);
@@ -713,6 +714,144 @@ static int gunyah_reclaim_memory_parcel(struct gunyah_vm *ghvm,
return 0;
}
+static int gunyah_memory_provide_folio(struct gunyah_vm *ghvm, gfn_t gfn, bool write)
+{
+ struct kvm *kvm = &ghvm->kvm;
+ struct kvm_memory_slot *memslot;
+ struct page *page;
+ struct folio *folio;
+ int ret;
+
+ /* Gunyah always starts guest address space at 1G */
+ if (gfn < gpa_to_gfn(SZ_1G))
+ return -EINVAL;
+
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (!memslot)
+ return -ENOENT;
+
+ page = memslot->arch.pages[gfn - memslot->base_gfn];
+ folio = page_folio(page);
+
+ folio_lock(folio);
+ /* Did we race with another vCPU? */
+ if (folio_test_private(folio)) {
+ folio_unlock(folio);
+ return 0;
+ }
+
+ ret = __gunyah_memory_provide_folio(ghvm, folio, gfn, false, true);
+ if (ret) {
+ folio_unlock(folio);
+ return ret;
+ }
+ folio_set_private(folio);
+ folio_unlock(folio);
+
+ return 0;
+}
+
+static int gunyah_reclaim_memory_range(struct gunyah_vm *ghvm, gfn_t start, gfn_t nr)
+{
+ struct kvm *kvm = &ghvm->kvm;
+ struct kvm_memory_slot *slot;
+ struct kvm_memslots *slots;
+ struct kvm_memslot_iter iter;
+ gfn_t end = start + nr;
+ int i, ret;
+
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
+ slots = __kvm_memslots(kvm, i);
+
+ kvm_for_each_memslot_in_gfn_range(&iter, slots, start, end) {
+ struct page **pages, *page;
+ struct folio *folio;
+ unsigned long offset;
+ unsigned long reclaimed = 0;
+
+ slot = iter.slot;
+ pages = slot->arch.pages;
+ for (offset = 0; offset < slot->npages;) {
+ page = pages[offset];
+ folio = page_folio(page);
+ folio_lock(folio);
+ if (folio_test_private(folio)) {
+ ret = gunyah_memory_reclaim_folio_locked(ghvm, folio,
+ slot->base_gfn + offset, false);
+ if (ret) {
+ WARN_ON_ONCE(1);
+ return ret;
+ }
+ folio_clear_private(folio);
+ reclaimed++;
+ }
+ folio_unlock(folio);
+ offset += folio_nr_pages(folio);
+ }
+ }
+ }
+ return 0;
+}
+
+static int gunyah_memory_parcel_to_paged(struct gunyah_vm *ghvm, gfn_t start, gfn_t nr)
+{
+ struct kvm *kvm = &ghvm->kvm;
+ struct kvm_memory_slot *memslot;
+ struct page **pages, *page;
+ struct folio *folio;
+ int i;
+
+ memslot = gfn_to_memslot(kvm, start);
+ if (!memslot)
+ return -ENOENT;
+
+ if (start - memslot->base_gfn < nr)
+ return -EINVAL;
+
+ pages = &memslot->arch.pages[start - memslot->base_gfn];
+
+ for (i = 0; i < nr;) {
+ page = pages[i];
+ folio = page_folio(page);
+ VM_BUG_ON(folio_test_private(folio));
+ folio_set_private(folio);
+ i += folio_nr_pages(folio);
+ }
+
+ return 0;
+}
+
+static int gunyah_start_paging(struct gunyah_vm *ghvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *slot;
+ struct gunyah_rm_mem_entry *entries, *entry;
+ int count = 0;
+ int bkt, ret;
+
+ slots = kvm_memslots(&ghvm->kvm);
+ kvm_for_each_memslot(slot, bkt, slots) {
+ if (slot->base_gfn >= PFN_DOWN(SZ_1G))
+ count++;
+ }
+
+ entries = entry = kcalloc(count, sizeof(*entries), GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
+ kvm_for_each_memslot(slot, bkt, slots) {
+ if (slot->base_gfn < PFN_DOWN(SZ_1G))
+ continue;
+ entry->phys_addr = cpu_to_le64(gfn_to_gpa(slot->base_gfn));
+ entry->size = cpu_to_le64(gfn_to_gpa(slot->npages));
+ entry++;
+ }
+
+ ret = gunyah_rm_vm_set_demand_paging(ghvm->rm, ghvm->vmid, count, entries);
+ kfree(entries);
+ return ret;
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -772,7 +911,14 @@ static int gunyah_handle_page_fault(
struct gunyah_vcpu *vcpu,
const struct gunyah_hypercall_vcpu_run_resp *vcpu_run_resp)
{
- return -EINVAL;
+ u64 addr = vcpu_run_resp->state_data[0];
+ bool write = !!vcpu_run_resp->state_data[1];
+ int ret = 0;
+
+ ret = gunyah_memory_provide_folio(vcpu->ghvm, gpa_to_gfn(addr), write);
+ if (!ret || ret == -EAGAIN)
+ return 0;
+ return ret;
}
static bool gunyah_kvm_handle_mmio(struct gunyah_vcpu *vcpu,
@@ -1614,6 +1760,28 @@ static int gunyah_vm_start(struct gunyah_vm *ghvm)
goto err;
}
+ ret = gunyah_start_paging(ghvm);
+ if (ret) {
+ pr_warn("Failed to set up demand paging: %d\n", ret);
+ goto err;
+ }
+
+ ret = gunyah_memory_parcel_to_paged(ghvm, ghvm->dtb.parcel_start,
+ ghvm->dtb.parcel_pages);
+ if (ret) {
+ pr_warn("Failed to set up paging for memparcel: %d\n", ret);
+ goto err;
+ }
+
+ ret = gunyah_rm_vm_set_address_layout(
+ ghvm->rm, ghvm->vmid, GUNYAH_RM_RANGE_ID_IMAGE,
+ ghvm->dtb.parcel_start << PAGE_SHIFT,
+ ghvm->dtb.parcel_pages << PAGE_SHIFT);
+ if (ret) {
+ pr_warn("Failed to set location of DTB mem parcel: %d\n", ret);
+ goto err;
+ }
+
ret = gunyah_rm_vm_init(ghvm->rm, ghvm->vmid);
if (ret) {
ghvm->vm_status = GUNYAH_RM_VM_STATUS_INIT_FAILED;
@@ -1719,6 +1887,14 @@ static void gunyah_destroy_vm(struct gunyah_vm *ghvm)
pr_err("Failed to reclaim DTB parcel: %d\n", ret);
}
+ /**
+ * If this fails, we're going to lose the memory for good and is
+ * BUG_ON-worthy, but not unrecoverable (we just lose memory).
+ * This call should always succeed though because the VM is in not
+ * running and RM will let us reclaim all the memory.
+ */
+ WARN_ON(gunyah_reclaim_memory_range(ghvm, gpa_to_gfn(SZ_1G), gpa_to_gfn(U64_MAX)));
+
gunyah_vm_remove_resource_ticket(ghvm, &ghvm->addrspace_ticket);
gunyah_vm_remove_resource_ticket(ghvm, &ghvm->host_shared_extent_ticket);
gunyah_vm_remove_resource_ticket(ghvm, &ghvm->host_private_extent_ticket);
Tell resource manager to enable demand paging and wire vCPU faults to provide the backing folio. When a folio is donated to the guest, PG_private is set on the folio so that we can recognize it later when taking it back (e.g. to reclaim the folio when the VM exists). Signed-off-by: Karim Manaouil <karim.manaouil@linaro.org> --- arch/arm64/kvm/gunyah.c | 182 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 179 insertions(+), 3 deletions(-)