diff mbox series

[V1,1/6] KVM: x86: Add support for testing private memory

Message ID 20221111014244.1714148-2-vannapurve@google.com
State New
Headers show
Series selftests: KVM: selftests for fd-based private memory | expand

Commit Message

Vishal Annapurve Nov. 11, 2022, 1:42 a.m. UTC
Introduce HAVE_KVM_PRIVATE_MEM_TESTING config to be able to test fd based
approach to support private memory with non-confidential selftest VMs.
To support this testing few important aspects need to be considered from
the perspective of selftests -
* KVM needs to know whether the access from guest VM is private or shared.
Confidential VMs (SNP/TDX) carry a dedicated bit in gpa that can be used by
KVM to deduce the nature of the access.
Non-confidential VMs don't have mechanism to carry/convey such an
information to KVM. So KVM just relies on what attributes are set by
userspace VMM keeping the userspace VMM in the TCB for the testing
purposes.
* arch_private_mem_supported is updated to allow private memory logic to
work with non-confidential vm selftests.

Signed-off-by: Vishal Annapurve <vannapurve@google.com>
---
 arch/x86/kvm/mmu/mmu.c          | 4 ++++
 arch/x86/kvm/mmu/mmu_internal.h | 4 +++-
 virt/kvm/Kconfig                | 4 ++++
 virt/kvm/kvm_main.c             | 2 +-
 4 files changed, 12 insertions(+), 2 deletions(-)

Comments

Chao Peng Nov. 22, 2022, 10:07 a.m. UTC | #1
On Fri, Nov 11, 2022 at 01:42:39AM +0000, Vishal Annapurve wrote:
> Introduce HAVE_KVM_PRIVATE_MEM_TESTING config to be able to test fd based
> approach to support private memory with non-confidential selftest VMs.
> To support this testing few important aspects need to be considered from
> the perspective of selftests -
> * KVM needs to know whether the access from guest VM is private or shared.
> Confidential VMs (SNP/TDX) carry a dedicated bit in gpa that can be used by
> KVM to deduce the nature of the access.
> Non-confidential VMs don't have mechanism to carry/convey such an
> information to KVM. So KVM just relies on what attributes are set by
> userspace VMM keeping the userspace VMM in the TCB for the testing
> purposes.
> * arch_private_mem_supported is updated to allow private memory logic to
> work with non-confidential vm selftests.
> 
> Signed-off-by: Vishal Annapurve <vannapurve@google.com>
> ---
>  arch/x86/kvm/mmu/mmu.c          | 4 ++++
>  arch/x86/kvm/mmu/mmu_internal.h | 4 +++-
>  virt/kvm/Kconfig                | 4 ++++
>  virt/kvm/kvm_main.c             | 2 +-
>  4 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 10017a9f26ee..b3118d00b284 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4280,6 +4280,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  
>  	fault->gfn = fault->addr >> PAGE_SHIFT;
>  	fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING
> +	fault->is_private = kvm_slot_can_be_private(fault->slot) &&
> +			kvm_mem_is_private(vcpu->kvm, fault->gfn);
> +#endif
>  
>  	if (page_fault_handle_page_track(vcpu, fault))
>  		return RET_PF_EMULATE;
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 5cdff5ca546c..2e759f39c2c5 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -188,7 +188,6 @@ struct kvm_page_fault {
>  
>  	/* Derived from mmu and global state.  */
>  	const bool is_tdp;
> -	const bool is_private;
>  	const bool nx_huge_page_workaround_enabled;
>  
>  	/*
> @@ -221,6 +220,9 @@ struct kvm_page_fault {
>  	/* The memslot containing gfn. May be NULL. */
>  	struct kvm_memory_slot *slot;
>  
> +	/* Derived from encryption bits of the faulting GPA for CVMs. */
> +	bool is_private;

Either we can wrap it with the CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING or if
it looks ugly I can remove the "const" in my code.

Chao
> +
>  	/* Outputs of kvm_faultin_pfn.  */
>  	kvm_pfn_t pfn;
>  	hva_t hva;
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 69ca59e82149..300876afb0ca 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -93,3 +93,7 @@ config HAVE_KVM_RESTRICTED_MEM
>  config KVM_GENERIC_PRIVATE_MEM
>         bool
>         depends on HAVE_KVM_RESTRICTED_MEM
> +
> +config HAVE_KVM_PRIVATE_MEM_TESTING
> +       bool
> +       depends on KVM_GENERIC_PRIVATE_MEM
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index dae6a2c196ad..54e57b7f1c15 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1750,7 +1750,7 @@ static void kvm_replace_memslot(struct kvm *kvm,
>  
>  bool __weak kvm_arch_has_private_mem(struct kvm *kvm)
>  {
> -	return false;
> +	return IS_ENABLED(CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING);
>  }
>  
>  static int check_memory_region_flags(struct kvm *kvm,
> -- 
> 2.38.1.431.g37b22c650d-goog
Sean Christopherson Nov. 22, 2022, 8:06 p.m. UTC | #2
On Tue, Nov 22, 2022, Chao Peng wrote:
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 10017a9f26ee..b3118d00b284 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -4280,6 +4280,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> >  
> >  	fault->gfn = fault->addr >> PAGE_SHIFT;
> >  	fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING
> > +	fault->is_private = kvm_slot_can_be_private(fault->slot) &&
> > +			kvm_mem_is_private(vcpu->kvm, fault->gfn);
> > +#endif
> >  
> >  	if (page_fault_handle_page_track(vcpu, fault))
> >  		return RET_PF_EMULATE;
> > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > index 5cdff5ca546c..2e759f39c2c5 100644
> > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > @@ -188,7 +188,6 @@ struct kvm_page_fault {
> >  
> >  	/* Derived from mmu and global state.  */
> >  	const bool is_tdp;
> > -	const bool is_private;
> >  	const bool nx_huge_page_workaround_enabled;
> >  
> >  	/*
> > @@ -221,6 +220,9 @@ struct kvm_page_fault {
> >  	/* The memslot containing gfn. May be NULL. */
> >  	struct kvm_memory_slot *slot;
> >  
> > +	/* Derived from encryption bits of the faulting GPA for CVMs. */
> > +	bool is_private;
> 
> Either we can wrap it with the CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING or if
> it looks ugly I can remove the "const" in my code.

Hmm, I think we can keep the const.  Similar to the bug in kvm_faultin_pfn()[*],
the kvm_slot_can_be_private() is bogus.  A fault should be considered private if
it's marked as private, whether or not userspace has configured the slot to be
private is irrelevant.  I.e. the xarray is the single source of truth, memslots
are just plumbing.

Then kvm_mmu_do_page_fault() can do something like:

diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index dbaf6755c5a7..456a9daa36e5 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -260,6 +260,8 @@ enum {
 static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
                                        u32 err, bool prefetch)
 {
+       bool is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault);
+
        struct kvm_page_fault fault = {
                .addr = cr2_or_gpa,
                .error_code = err,
@@ -269,13 +271,15 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
                .rsvd = err & PFERR_RSVD_MASK,
                .user = err & PFERR_USER_MASK,
                .prefetch = prefetch,
-               .is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
+               .is_tdp = is_tdp,
                .nx_huge_page_workaround_enabled =
                        is_nx_huge_page_enabled(vcpu->kvm),
 
                .max_level = KVM_MAX_HUGEPAGE_LEVEL,
                .req_level = PG_LEVEL_4K,
                .goal_level = PG_LEVEL_4K,
+               .private = IS_ENABLED(CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING) && is_tdp &&
+                          kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
        };
        int r;

[*] https://lore.kernel.org/all/Y3Vgc5KrNRA8r6vh@google.com
Marc Orr Nov. 24, 2022, 1:49 a.m. UTC | #3
On Tue, Nov 22, 2022 at 12:06 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Tue, Nov 22, 2022, Chao Peng wrote:
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index 10017a9f26ee..b3118d00b284 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -4280,6 +4280,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> > >
> > >     fault->gfn = fault->addr >> PAGE_SHIFT;
> > >     fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
> > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING
> > > +   fault->is_private = kvm_slot_can_be_private(fault->slot) &&
> > > +                   kvm_mem_is_private(vcpu->kvm, fault->gfn);
> > > +#endif
> > >
> > >     if (page_fault_handle_page_track(vcpu, fault))
> > >             return RET_PF_EMULATE;
> > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > > index 5cdff5ca546c..2e759f39c2c5 100644
> > > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > > @@ -188,7 +188,6 @@ struct kvm_page_fault {
> > >
> > >     /* Derived from mmu and global state.  */
> > >     const bool is_tdp;
> > > -   const bool is_private;
> > >     const bool nx_huge_page_workaround_enabled;
> > >
> > >     /*
> > > @@ -221,6 +220,9 @@ struct kvm_page_fault {
> > >     /* The memslot containing gfn. May be NULL. */
> > >     struct kvm_memory_slot *slot;
> > >
> > > +   /* Derived from encryption bits of the faulting GPA for CVMs. */
> > > +   bool is_private;
> >
> > Either we can wrap it with the CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING or if
> > it looks ugly I can remove the "const" in my code.
>
> Hmm, I think we can keep the const.  Similar to the bug in kvm_faultin_pfn()[*],
> the kvm_slot_can_be_private() is bogus.  A fault should be considered private if
> it's marked as private, whether or not userspace has configured the slot to be
> private is irrelevant.  I.e. the xarray is the single source of truth, memslots
> are just plumbing.

If we incorporate Sean's suggestion and use xarray as the single
source of truth, then can we get rid of the
HAVE_KVM_PRIVATE_MEM_TESTING config?

Specifically, the self test can call the KVM_MEMORY_ENCRYPT_REG_REGION
ioctl which will set the bits for the private FD within KVM's xarray.

(Maybe this was part of the point that Sean was making; but his
feedback seemed focused on the discussion about keeping `is_private`
const, whereas I've been staring at this trying to figure out if we
can run the UPM selftests on a non-TDX/SNP VM WITHOUT a special
test-only config. And Sean's idea seems to eliminate the need for the
awkward CONFIG.)
Chao Peng Nov. 24, 2022, 1:17 p.m. UTC | #4
On Tue, Nov 22, 2022 at 08:06:01PM +0000, Sean Christopherson wrote:
> On Tue, Nov 22, 2022, Chao Peng wrote:
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index 10017a9f26ee..b3118d00b284 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -4280,6 +4280,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> > >  
> > >  	fault->gfn = fault->addr >> PAGE_SHIFT;
> > >  	fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
> > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING
> > > +	fault->is_private = kvm_slot_can_be_private(fault->slot) &&
> > > +			kvm_mem_is_private(vcpu->kvm, fault->gfn);
> > > +#endif
> > >  
> > >  	if (page_fault_handle_page_track(vcpu, fault))
> > >  		return RET_PF_EMULATE;
> > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > > index 5cdff5ca546c..2e759f39c2c5 100644
> > > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > > @@ -188,7 +188,6 @@ struct kvm_page_fault {
> > >  
> > >  	/* Derived from mmu and global state.  */
> > >  	const bool is_tdp;
> > > -	const bool is_private;
> > >  	const bool nx_huge_page_workaround_enabled;
> > >  
> > >  	/*
> > > @@ -221,6 +220,9 @@ struct kvm_page_fault {
> > >  	/* The memslot containing gfn. May be NULL. */
> > >  	struct kvm_memory_slot *slot;
> > >  
> > > +	/* Derived from encryption bits of the faulting GPA for CVMs. */
> > > +	bool is_private;
> > 
> > Either we can wrap it with the CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING or if
> > it looks ugly I can remove the "const" in my code.
> 
> Hmm, I think we can keep the const.  Similar to the bug in kvm_faultin_pfn()[*],
> the kvm_slot_can_be_private() is bogus.  A fault should be considered private if
> it's marked as private, whether or not userspace has configured the slot to be
> private is irrelevant.  I.e. the xarray is the single source of truth, memslots
> are just plumbing.

That makes sense to me. Thanks.

> 
> Then kvm_mmu_do_page_fault() can do something like:
> 
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index dbaf6755c5a7..456a9daa36e5 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -260,6 +260,8 @@ enum {
>  static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>                                         u32 err, bool prefetch)
>  {
> +       bool is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault);
> +
>         struct kvm_page_fault fault = {
>                 .addr = cr2_or_gpa,
>                 .error_code = err,
> @@ -269,13 +271,15 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>                 .rsvd = err & PFERR_RSVD_MASK,
>                 .user = err & PFERR_USER_MASK,
>                 .prefetch = prefetch,
> -               .is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
> +               .is_tdp = is_tdp,
>                 .nx_huge_page_workaround_enabled =
>                         is_nx_huge_page_enabled(vcpu->kvm),
>  
>                 .max_level = KVM_MAX_HUGEPAGE_LEVEL,
>                 .req_level = PG_LEVEL_4K,
>                 .goal_level = PG_LEVEL_4K,
> +               .private = IS_ENABLED(CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING) && is_tdp &&
> +                          kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
>         };
>         int r;
> 
> [*] https://lore.kernel.org/all/Y3Vgc5KrNRA8r6vh@google.com
Sean Christopherson Nov. 28, 2022, 4:21 p.m. UTC | #5
On Wed, Nov 23, 2022, Marc Orr wrote:
> On Tue, Nov 22, 2022 at 12:06 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > @@ -221,6 +220,9 @@ struct kvm_page_fault {
> > > >     /* The memslot containing gfn. May be NULL. */
> > > >     struct kvm_memory_slot *slot;
> > > >
> > > > +   /* Derived from encryption bits of the faulting GPA for CVMs. */
> > > > +   bool is_private;
> > >
> > > Either we can wrap it with the CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING or if
> > > it looks ugly I can remove the "const" in my code.
> >
> > Hmm, I think we can keep the const.  Similar to the bug in kvm_faultin_pfn()[*],
> > the kvm_slot_can_be_private() is bogus.  A fault should be considered private if
> > it's marked as private, whether or not userspace has configured the slot to be
> > private is irrelevant.  I.e. the xarray is the single source of truth, memslots
> > are just plumbing.
> 
> If we incorporate Sean's suggestion and use xarray as the single
> source of truth, then can we get rid of the
> HAVE_KVM_PRIVATE_MEM_TESTING config?

No, we still want the opt-in config.  

> Specifically, the self test can call the KVM_MEMORY_ENCRYPT_REG_REGION
> ioctl which will set the bits for the private FD within KVM's xarray.

Yes, but that should be disallowed for regular VMs without HAVE_KVM_PRIVATE_MEM_TESTING=y.

> (Maybe this was part of the point that Sean was making; but his
> feedback seemed focused on the discussion about keeping `is_private`
> const, whereas I've been staring at this trying to figure out if we
> can run the UPM selftests on a non-TDX/SNP VM WITHOUT a special
> test-only config. And Sean's idea seems to eliminate the need for the
> awkward CONFIG.)

"need" was always relative.  It's obviously possible to enable any code without a
Kconfig, the question is whether or not it's a good idea to do so.  In this case,
the answer is "no", because allowing private memory opens up a number a of code
paths and thus potential bugs.  And we need something for kvm_arch_has_private_mem()
because returning "true" unconditionally is not correct for regular VMs.
Michael Roth Dec. 2, 2022, 12:26 a.m. UTC | #6
On Tue, Nov 22, 2022 at 08:06:01PM +0000, Sean Christopherson wrote:
> On Tue, Nov 22, 2022, Chao Peng wrote:
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index 10017a9f26ee..b3118d00b284 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -4280,6 +4280,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> > >  
> > >  	fault->gfn = fault->addr >> PAGE_SHIFT;
> > >  	fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
> > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING
> > > +	fault->is_private = kvm_slot_can_be_private(fault->slot) &&
> > > +			kvm_mem_is_private(vcpu->kvm, fault->gfn);
> > > +#endif
> > >  
> > >  	if (page_fault_handle_page_track(vcpu, fault))
> > >  		return RET_PF_EMULATE;
> > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > > index 5cdff5ca546c..2e759f39c2c5 100644
> > > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > > @@ -188,7 +188,6 @@ struct kvm_page_fault {
> > >  
> > >  	/* Derived from mmu and global state.  */
> > >  	const bool is_tdp;
> > > -	const bool is_private;
> > >  	const bool nx_huge_page_workaround_enabled;
> > >  
> > >  	/*
> > > @@ -221,6 +220,9 @@ struct kvm_page_fault {
> > >  	/* The memslot containing gfn. May be NULL. */
> > >  	struct kvm_memory_slot *slot;
> > >  
> > > +	/* Derived from encryption bits of the faulting GPA for CVMs. */
> > > +	bool is_private;
> > 
> > Either we can wrap it with the CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING or if
> > it looks ugly I can remove the "const" in my code.
> 
> Hmm, I think we can keep the const.  Similar to the bug in kvm_faultin_pfn()[*],
> the kvm_slot_can_be_private() is bogus.  A fault should be considered private if
> it's marked as private, whether or not userspace has configured the slot to be
> private is irrelevant.  I.e. the xarray is the single source of truth, memslots
> are just plumbing.

I've been looking at pulling this series into our SNP+UPM patchset (and
replacing the UPM selftests that were including with UPMv9). We ended up
with something similar to what you've suggested, but instead of calling
kvm_mem_is_private() directly we added a wrapper in mmu_internal.h that's
called via:

kvm_mmu_do_page_fault():
  struct kvm_page_fault fault = {
    ...
    .is_private = kvm_mmu_fault_is_private()

where kvm_mmu_fault_is_private() is defined something like:

static bool kvm_mmu_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 err)
{
        struct kvm_memory_slot *slot;
        gfn_t gfn = gpa_to_gfn(gpa);
        bool private_fault = false;

        slot = gfn_to_memslot(kvm, gpa_to_gfn(gpa));
        if (!slot)
                goto out;

        if (!kvm_slot_can_be_private(slot))
                goto out;

		/* If platform hook returns 1 then use it's determination of private_fault */
        if (static_call(kvm_x86_fault_is_private)(kvm, gpa, err, &private_fault) == 1)
                goto out;

        /*
         * Handling below is for guests that rely on the VMM to control when a fault
         * should be treated as private or not via KVM_MEM_ENCRYPT_{REG,UNREG}_REGION.
         * This is mainly for the KVM self-tests for restricted memory.
         */
#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING
        private_fault = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa);
#endif

out:
        return private_fault;
}

I tried removing kvm_slot_can_be_private() based on your comments, but
we ended up hitting a crash in restrictedmem_get_page(). I think this is
because the xarray currently defaults to 'private', so when KVM MMU relies
only on xarray it can hit cases where it thinks a GPA should be backed
by a restricted page, but when it calls kvm_restrictedmem_get_pfn() a
null slot->restricted_file gets passed to restricted_get_page() and it
blows up.

I know Chao mentioned they were considering switching to 'shared' as the
default xarray value, which might fix this issue, but until then we've
left these checks in place.

Just figured I'd mention this in case Vishal hits similar issues.

-Mike

> 
> Then kvm_mmu_do_page_fault() can do something like:
> 
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index dbaf6755c5a7..456a9daa36e5 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -260,6 +260,8 @@ enum {
>  static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>                                         u32 err, bool prefetch)
>  {
> +       bool is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault);
> +
>         struct kvm_page_fault fault = {
>                 .addr = cr2_or_gpa,
>                 .error_code = err,
> @@ -269,13 +271,15 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>                 .rsvd = err & PFERR_RSVD_MASK,
>                 .user = err & PFERR_USER_MASK,
>                 .prefetch = prefetch,
> -               .is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
> +               .is_tdp = is_tdp,
>                 .nx_huge_page_workaround_enabled =
>                         is_nx_huge_page_enabled(vcpu->kvm),
>  
>                 .max_level = KVM_MAX_HUGEPAGE_LEVEL,
>                 .req_level = PG_LEVEL_4K,
>                 .goal_level = PG_LEVEL_4K,
> +               .private = IS_ENABLED(CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING) && is_tdp &&
> +                          kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
>         };
>         int r;
> 
> [*] https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2FY3Vgc5KrNRA8r6vh%40google.com&amp;data=05%7C01%7CMichael.Roth%40amd.com%7Cc65b2b9b200e41f189ff08daccc4ffdc%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638047443786540517%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=Oajn46ulTFXBh0nIx61YmbbMAqW64EqKRniZJwLfXLs%3D&amp;reserved=0
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 10017a9f26ee..b3118d00b284 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4280,6 +4280,10 @@  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 
 	fault->gfn = fault->addr >> PAGE_SHIFT;
 	fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING
+	fault->is_private = kvm_slot_can_be_private(fault->slot) &&
+			kvm_mem_is_private(vcpu->kvm, fault->gfn);
+#endif
 
 	if (page_fault_handle_page_track(vcpu, fault))
 		return RET_PF_EMULATE;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 5cdff5ca546c..2e759f39c2c5 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -188,7 +188,6 @@  struct kvm_page_fault {
 
 	/* Derived from mmu and global state.  */
 	const bool is_tdp;
-	const bool is_private;
 	const bool nx_huge_page_workaround_enabled;
 
 	/*
@@ -221,6 +220,9 @@  struct kvm_page_fault {
 	/* The memslot containing gfn. May be NULL. */
 	struct kvm_memory_slot *slot;
 
+	/* Derived from encryption bits of the faulting GPA for CVMs. */
+	bool is_private;
+
 	/* Outputs of kvm_faultin_pfn.  */
 	kvm_pfn_t pfn;
 	hva_t hva;
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 69ca59e82149..300876afb0ca 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -93,3 +93,7 @@  config HAVE_KVM_RESTRICTED_MEM
 config KVM_GENERIC_PRIVATE_MEM
        bool
        depends on HAVE_KVM_RESTRICTED_MEM
+
+config HAVE_KVM_PRIVATE_MEM_TESTING
+       bool
+       depends on KVM_GENERIC_PRIVATE_MEM
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dae6a2c196ad..54e57b7f1c15 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1750,7 +1750,7 @@  static void kvm_replace_memslot(struct kvm *kvm,
 
 bool __weak kvm_arch_has_private_mem(struct kvm *kvm)
 {
-	return false;
+	return IS_ENABLED(CONFIG_HAVE_KVM_PRIVATE_MEM_TESTING);
 }
 
 static int check_memory_region_flags(struct kvm *kvm,