@@ -1319,7 +1319,7 @@ yet and must be cleared on entry.
:Capability: KVM_CAP_USER_MEMORY
:Architectures: all
:Type: vm ioctl
-:Parameters: struct kvm_userspace_memory_region (in)
+:Parameters: struct kvm_userspace_memory_region(_ext) (in)
:Returns: 0 on success, -1 on error
::
@@ -1332,9 +1332,18 @@ yet and must be cleared on entry.
__u64 userspace_addr; /* start of the userspace allocated memory */
};
+ struct kvm_userspace_memory_region_ext {
+ struct kvm_userspace_memory_region region;
+ __u64 private_offset;
+ __u32 private_fd;
+ __u32 pad1;
+ __u64 pad2[14];
+};
+
/* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
+ #define KVM_MEM_PRIVATE (1UL << 2)
This ioctl allows the user to create, modify or delete a guest physical
memory slot. Bits 0-15 of "slot" specify the slot id and this value
@@ -1365,12 +1374,27 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
be identical. This allows large pages in the guest to be backed by large
pages in the host.
-The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
-KVM_MEM_READONLY. The former can be set to instruct KVM to keep track of
-writes to memory within the slot. See KVM_GET_DIRTY_LOG ioctl to know how to
-use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
-to make a new slot read-only. In this case, writes to this memory will be
-posted to userspace as KVM_EXIT_MMIO exits.
+kvm_userspace_memory_region_ext includes all the kvm_userspace_memory_region
+fields. It also includes additional fields for some specific features. See
+below description of flags field for more information. It's recommended to use
+kvm_userspace_memory_region_ext in new userspace code.
+
+The flags field supports below flags:
+
+- KVM_MEM_LOG_DIRTY_PAGES can be set to instruct KVM to keep track of writes to
+ memory within the slot. See KVM_GET_DIRTY_LOG ioctl to know how to use it.
+
+- KVM_MEM_READONLY can be set, if KVM_CAP_READONLY_MEM capability allows it, to
+ make a new slot read-only. In this case, writes to this memory will be posted
+ to userspace as KVM_EXIT_MMIO exits.
+
+- KVM_MEM_PRIVATE can be set to indicate a new slot has private memory backed by
+ a file descirptor(fd) and the content of the private memory is invisible to
+ userspace. In this case, userspace should use private_fd/private_offset in
+ kvm_userspace_memory_region_ext to instruct KVM to provide private memory to
+ guest. Userspace should guarantee not to map the same pfn indicated by
+ private_fd/private_offset to different gfns with multiple memslots. Failed to
+ do this may result undefined behavior.
When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
the memory region are automatically reflected into the guest. For example, an
@@ -48,6 +48,8 @@ config KVM
select SRCU
select INTERVAL_TREE
select HAVE_KVM_PM_NOTIFIER if PM
+ select HAVE_KVM_PRIVATE_MEM if X86_64
+ select MEMFILE_NOTIFIER if HAVE_KVM_PRIVATE_MEM
help
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
@@ -12154,7 +12154,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
}
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- struct kvm_userspace_memory_region m;
+ struct kvm_user_mem_region m;
m.slot = id | (i << 16);
m.flags = 0;
@@ -44,6 +44,7 @@
#include <asm/kvm_host.h>
#include <linux/kvm_dirty_ring.h>
+#include <linux/memfile_notifier.h>
#ifndef KVM_MAX_VCPU_IDS
#define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS
@@ -576,8 +577,16 @@ struct kvm_memory_slot {
u32 flags;
short id;
u16 as_id;
+ struct file *private_file;
+ loff_t private_offset;
+ struct memfile_notifier notifier;
};
+static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
+{
+ return slot && (slot->flags & KVM_MEM_PRIVATE);
+}
+
static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
{
return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
@@ -1109,9 +1118,9 @@ enum kvm_mr_change {
};
int kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+ const struct kvm_user_mem_region *mem);
int __kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+ const struct kvm_user_mem_region *mem);
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -103,6 +103,33 @@ struct kvm_userspace_memory_region {
__u64 userspace_addr; /* start of the userspace allocated memory */
};
+struct kvm_userspace_memory_region_ext {
+ struct kvm_userspace_memory_region region;
+ __u64 private_offset;
+ __u32 private_fd;
+ __u32 pad1;
+ __u64 pad2[14];
+};
+
+#ifdef __KERNEL__
+/*
+ * kvm_user_mem_region is a kernel-only alias of kvm_userspace_memory_region_ext
+ * that "unpacks" kvm_userspace_memory_region so that KVM can directly access
+ * all fields from the top-level "extended" region.
+ */
+struct kvm_user_mem_region {
+ __u32 slot;
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size;
+ __u64 userspace_addr;
+ __u64 private_offset;
+ __u32 private_fd;
+ __u32 pad1;
+ __u64 pad2[14];
+};
+#endif
+
/*
* The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
* other bits are reserved for kvm internal use which are defined in
@@ -110,6 +137,7 @@ struct kvm_userspace_memory_region {
*/
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
+#define KVM_MEM_PRIVATE (1UL << 2)
/* for KVM_IRQ_LINE */
struct kvm_irq_level {
@@ -72,3 +72,6 @@ config KVM_XFER_TO_GUEST_WORK
config HAVE_KVM_PM_NOTIFIER
bool
+
+config HAVE_KVM_PRIVATE_MEM
+ bool
@@ -1508,7 +1508,7 @@ static void kvm_replace_memslot(struct kvm *kvm,
}
}
-static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
+static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
{
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
@@ -1902,7 +1902,7 @@ static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id,
* Must be called holding kvm->slots_lock for write.
*/
int __kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+ const struct kvm_user_mem_region *mem)
{
struct kvm_memory_slot *old, *new;
struct kvm_memslots *slots;
@@ -2006,7 +2006,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
int kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+ const struct kvm_user_mem_region *mem)
{
int r;
@@ -2018,7 +2018,7 @@ int kvm_set_memory_region(struct kvm *kvm,
EXPORT_SYMBOL_GPL(kvm_set_memory_region);
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ struct kvm_user_mem_region *mem)
{
if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
return -EINVAL;
@@ -4608,6 +4608,33 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
return fd;
}
+#define SANITY_CHECK_MEM_REGION_FIELD(field) \
+do { \
+ BUILD_BUG_ON(offsetof(struct kvm_user_mem_region, field) != \
+ offsetof(struct kvm_userspace_memory_region, field)); \
+ BUILD_BUG_ON(sizeof_field(struct kvm_user_mem_region, field) != \
+ sizeof_field(struct kvm_userspace_memory_region, field)); \
+} while (0)
+
+#define SANITY_CHECK_MEM_REGION_EXT_FIELD(field) \
+do { \
+ BUILD_BUG_ON(offsetof(struct kvm_user_mem_region, field) != \
+ offsetof(struct kvm_userspace_memory_region_ext, field)); \
+ BUILD_BUG_ON(sizeof_field(struct kvm_user_mem_region, field) != \
+ sizeof_field(struct kvm_userspace_memory_region_ext, field)); \
+} while (0)
+
+static void kvm_sanity_check_user_mem_region_alias(void)
+{
+ SANITY_CHECK_MEM_REGION_FIELD(slot);
+ SANITY_CHECK_MEM_REGION_FIELD(flags);
+ SANITY_CHECK_MEM_REGION_FIELD(guest_phys_addr);
+ SANITY_CHECK_MEM_REGION_FIELD(memory_size);
+ SANITY_CHECK_MEM_REGION_FIELD(userspace_addr);
+ SANITY_CHECK_MEM_REGION_EXT_FIELD(private_offset);
+ SANITY_CHECK_MEM_REGION_EXT_FIELD(private_fd);
+}
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4631,14 +4658,35 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
case KVM_SET_USER_MEMORY_REGION: {
- struct kvm_userspace_memory_region kvm_userspace_mem;
+ struct kvm_user_mem_region mem;
+ unsigned long size;
+ u32 flags;
+
+ kvm_sanity_check_user_mem_region_alias();
+
+ memset(&mem, 0, sizeof(mem));
r = -EFAULT;
- if (copy_from_user(&kvm_userspace_mem, argp,
- sizeof(kvm_userspace_mem)))
+
+ if (get_user(flags,
+ (u32 __user *)(argp + offsetof(typeof(mem), flags))))
+ goto out;
+
+ if (flags & KVM_MEM_PRIVATE) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ size = sizeof(struct kvm_userspace_memory_region);
+
+ if (copy_from_user(&mem, argp, size))
+ goto out;
+
+ r = -EINVAL;
+ if ((flags ^ mem.flags) & KVM_MEM_PRIVATE)
goto out;
- r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
+ r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
break;
}
case KVM_GET_DIRTY_LOG: {