Message ID | 20210809175620.720923-2-ltykernel@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86/Hyper-V: Add Hyper-V Isolation VM support | expand |
On Mon, Aug 09, 2021 at 01:56:05PM -0400, Tianyu Lan wrote: [...] > static int hv_cpu_init(unsigned int cpu) > { > union hv_vp_assist_msr_contents msr = { 0 }; > @@ -85,6 +111,8 @@ static int hv_cpu_init(unsigned int cpu) > } > } > > + hyperv_init_ghcb(); > + Why is the return value not checked here? If that's not required, can you leave a comment? Wei.
From: Tianyu Lan <ltykernel@gmail.com> Sent: Monday, August 9, 2021 10:56 AM > Subject: [PATCH V3 01/13] x86/HV: Initialize GHCB page in Isolation VM The subject line tag on patches under arch/x86/hyperv is generally "x86/hyperv:". There's some variation in the spelling of "hyperv", but let's go with the all lowercase "hyperv". > > Hyper-V exposes GHCB page via SEV ES GHCB MSR for SNP guest > to communicate with hypervisor. Map GHCB page for all > cpus to read/write MSR register and submit hvcall request > via GHCB. > > Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> > --- > arch/x86/hyperv/hv_init.c | 66 +++++++++++++++++++++++++++++++-- > arch/x86/include/asm/mshyperv.h | 2 + > include/asm-generic/mshyperv.h | 2 + > 3 files changed, 66 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c > index 708a2712a516..0bb4d9ca7a55 100644 > --- a/arch/x86/hyperv/hv_init.c > +++ b/arch/x86/hyperv/hv_init.c > @@ -20,6 +20,7 @@ > #include <linux/kexec.h> > #include <linux/version.h> > #include <linux/vmalloc.h> > +#include <linux/io.h> > #include <linux/mm.h> > #include <linux/hyperv.h> > #include <linux/slab.h> > @@ -42,6 +43,31 @@ static void *hv_hypercall_pg_saved; > struct hv_vp_assist_page **hv_vp_assist_page; > EXPORT_SYMBOL_GPL(hv_vp_assist_page); > > +static int hyperv_init_ghcb(void) > +{ > + u64 ghcb_gpa; > + void *ghcb_va; > + void **ghcb_base; > + > + if (!ms_hyperv.ghcb_base) > + return -EINVAL; > + > + /* > + * GHCB page is allocated by paravisor. The address > + * returned by MSR_AMD64_SEV_ES_GHCB is above shared > + * ghcb boundary and map it here. > + */ > + rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); > + ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB); > + if (!ghcb_va) > + return -ENOMEM; > + > + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); > + *ghcb_base = ghcb_va; > + > + return 0; > +} > + > static int hv_cpu_init(unsigned int cpu) > { > union hv_vp_assist_msr_contents msr = { 0 }; > @@ -85,6 +111,8 @@ static int hv_cpu_init(unsigned int cpu) > } > } > > + hyperv_init_ghcb(); > + > return 0; > } > > @@ -177,6 +205,14 @@ static int hv_cpu_die(unsigned int cpu) > { > struct hv_reenlightenment_control re_ctrl; > unsigned int new_cpu; > + void **ghcb_va = NULL; I'm not seeing any reason why this needs to be initialized. > + > + if (ms_hyperv.ghcb_base) { > + ghcb_va = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); > + if (*ghcb_va) > + memunmap(*ghcb_va); > + *ghcb_va = NULL; > + } > > hv_common_cpu_die(cpu); > > @@ -383,9 +419,19 @@ void __init hyperv_init(void) > VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, > VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, > __builtin_return_address(0)); > - if (hv_hypercall_pg == NULL) { > - wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); > - goto remove_cpuhp_state; > + if (hv_hypercall_pg == NULL) > + goto clean_guest_os_id; > + > + if (hv_isolation_type_snp()) { > + ms_hyperv.ghcb_base = alloc_percpu(void *); > + if (!ms_hyperv.ghcb_base) > + goto clean_guest_os_id; > + > + if (hyperv_init_ghcb()) { > + free_percpu(ms_hyperv.ghcb_base); > + ms_hyperv.ghcb_base = NULL; > + goto clean_guest_os_id; > + } Having the GHCB setup code here splits the hypercall page setup into two parts, which is unexpected. First the memory is allocated for the hypercall page, then the GHCB stuff is done, then the hypercall MSR is setup. Is there a need to do this split? Also, if the GHCB stuff fails and you goto clean_guest_os_id, the memory allocated for the hypercall page is never freed. It's also unexpected to have hyperv_init_ghcb() called here and called in hv_cpu_init(). Wouldn't it be possible to setup ghcb_base *before* cpu_setup_state() is called, so that hv_cpu_init() would take care of calling hyperv_init_ghcb() for the boot CPU? That's the pattern used by the VP assist page, the percpu input page, etc. > } > > rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > @@ -456,7 +502,8 @@ void __init hyperv_init(void) > hv_query_ext_cap(0); > return; > > -remove_cpuhp_state: > +clean_guest_os_id: > + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); > cpuhp_remove_state(cpuhp); > free_vp_assist_page: > kfree(hv_vp_assist_page); > @@ -484,6 +531,9 @@ void hyperv_cleanup(void) > */ > hv_hypercall_pg = NULL; > > + if (ms_hyperv.ghcb_base) > + free_percpu(ms_hyperv.ghcb_base); > + I don't think this cleanup is necessary. The primary purpose of hyperv_cleanup() is to ensure that things like overlay pages are properly reset in Hyper-V before doing a kexec(), or before panic'ing and running the kdump kernel. There's no need to do general memory free'ing in Linux. Doing so just adds to the risk that the panic path could itself fail. > /* Reset the hypercall page */ > hypercall_msr.as_uint64 = 0; > wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > @@ -559,3 +609,11 @@ bool hv_is_isolation_supported(void) > { > return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; > } > + > +DEFINE_STATIC_KEY_FALSE(isolation_type_snp); > + > +bool hv_isolation_type_snp(void) > +{ > + return static_branch_unlikely(&isolation_type_snp); > +} > +EXPORT_SYMBOL_GPL(hv_isolation_type_snp); > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > index adccbc209169..6627cfd2bfba 100644 > --- a/arch/x86/include/asm/mshyperv.h > +++ b/arch/x86/include/asm/mshyperv.h > @@ -11,6 +11,8 @@ > #include <asm/paravirt.h> > #include <asm/mshyperv.h> > > +DECLARE_STATIC_KEY_FALSE(isolation_type_snp); > + > typedef int (*hyperv_fill_flush_list_func)( > struct hv_guest_mapping_flush_list *flush, > void *data); > diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h > index c1ab6a6e72b5..4269f3174e58 100644 > --- a/include/asm-generic/mshyperv.h > +++ b/include/asm-generic/mshyperv.h > @@ -36,6 +36,7 @@ struct ms_hyperv_info { > u32 max_lp_index; > u32 isolation_config_a; > u32 isolation_config_b; > + void __percpu **ghcb_base; This doesn't feel like the right place to put this pointer. The other fields in the ms_hyperv_info structure are just fixed values obtained from the CPUID instruction. The existing patterns similar to ghcb_base are the VP assist page and the percpu input and output args. They are all based on standalone global variables. It would be more consistent to do the same with the ghcb_base. > }; > extern struct ms_hyperv_info ms_hyperv; > > @@ -237,6 +238,7 @@ bool hv_is_hyperv_initialized(void); > bool hv_is_hibernation_supported(void); > enum hv_isolation_type hv_get_isolation_type(void); > bool hv_is_isolation_supported(void); > +bool hv_isolation_type_snp(void); > void hyperv_cleanup(void); > bool hv_query_ext_cap(u64 cap_query); > #else /* CONFIG_HYPERV */ > -- > 2.25.1
Hi Michael: Thanks for your review. On 8/13/2021 3:14 AM, Michael Kelley wrote: > From: Tianyu Lan <ltykernel@gmail.com> Sent: Monday, August 9, 2021 10:56 AM >> Subject: [PATCH V3 01/13] x86/HV: Initialize GHCB page in Isolation VM > > The subject line tag on patches under arch/x86/hyperv is generally "x86/hyperv:". > There's some variation in the spelling of "hyperv", but let's go with the all > lowercase "hyperv". OK. Will update. > >> >> Hyper-V exposes GHCB page via SEV ES GHCB MSR for SNP guest >> to communicate with hypervisor. Map GHCB page for all >> cpus to read/write MSR register and submit hvcall request >> via GHCB. >> >> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> >> --- >> arch/x86/hyperv/hv_init.c | 66 +++++++++++++++++++++++++++++++-- >> arch/x86/include/asm/mshyperv.h | 2 + >> include/asm-generic/mshyperv.h | 2 + >> 3 files changed, 66 insertions(+), 4 deletions(-) >> >> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c >> index 708a2712a516..0bb4d9ca7a55 100644 >> --- a/arch/x86/hyperv/hv_init.c >> +++ b/arch/x86/hyperv/hv_init.c >> @@ -20,6 +20,7 @@ >> #include <linux/kexec.h> >> #include <linux/version.h> >> #include <linux/vmalloc.h> >> +#include <linux/io.h> >> #include <linux/mm.h> >> #include <linux/hyperv.h> >> #include <linux/slab.h> >> @@ -42,6 +43,31 @@ static void *hv_hypercall_pg_saved; >> struct hv_vp_assist_page **hv_vp_assist_page; >> EXPORT_SYMBOL_GPL(hv_vp_assist_page); >> >> +static int hyperv_init_ghcb(void) >> +{ >> + u64 ghcb_gpa; >> + void *ghcb_va; >> + void **ghcb_base; >> + >> + if (!ms_hyperv.ghcb_base) >> + return -EINVAL; >> + >> + /* >> + * GHCB page is allocated by paravisor. The address >> + * returned by MSR_AMD64_SEV_ES_GHCB is above shared >> + * ghcb boundary and map it here. >> + */ >> + rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); >> + ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB); >> + if (!ghcb_va) >> + return -ENOMEM; >> + >> + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); >> + *ghcb_base = ghcb_va; >> + >> + return 0; >> +} >> + >> static int hv_cpu_init(unsigned int cpu) >> { >> union hv_vp_assist_msr_contents msr = { 0 }; >> @@ -85,6 +111,8 @@ static int hv_cpu_init(unsigned int cpu) >> } >> } >> >> + hyperv_init_ghcb(); >> + >> return 0; >> } >> >> @@ -177,6 +205,14 @@ static int hv_cpu_die(unsigned int cpu) >> { >> struct hv_reenlightenment_control re_ctrl; >> unsigned int new_cpu; >> + void **ghcb_va = NULL; > > I'm not seeing any reason why this needs to be initialized. > >> + >> + if (ms_hyperv.ghcb_base) { >> + ghcb_va = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); >> + if (*ghcb_va) >> + memunmap(*ghcb_va); >> + *ghcb_va = NULL; >> + } >> >> hv_common_cpu_die(cpu); >> >> @@ -383,9 +419,19 @@ void __init hyperv_init(void) >> VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, >> VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, >> __builtin_return_address(0)); >> - if (hv_hypercall_pg == NULL) { >> - wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); >> - goto remove_cpuhp_state; >> + if (hv_hypercall_pg == NULL) >> + goto clean_guest_os_id; >> + >> + if (hv_isolation_type_snp()) { >> + ms_hyperv.ghcb_base = alloc_percpu(void *); >> + if (!ms_hyperv.ghcb_base) >> + goto clean_guest_os_id; >> + >> + if (hyperv_init_ghcb()) { >> + free_percpu(ms_hyperv.ghcb_base); >> + ms_hyperv.ghcb_base = NULL; >> + goto clean_guest_os_id; >> + } > > Having the GHCB setup code here splits the hypercall page setup into > two parts, which is unexpected. First the memory is allocated > for the hypercall page, then the GHCB stuff is done, then the hypercall > MSR is setup. Is there a need to do this split? Also, if the GHCB stuff > fails and you goto clean_guest_os_id, the memory allocated for the > hypercall page is never freed. Just not enable hypercall when fails to setup ghcb. Otherwise, we need to disable hypercall in the failure code path. Yes,hypercall page should be freed in the clean_guest_os_id path. > > It's also unexpected to have hyperv_init_ghcb() called here and called > in hv_cpu_init(). Wouldn't it be possible to setup ghcb_base *before* > cpu_setup_state() is called, so that hv_cpu_init() would take care of > calling hyperv_init_ghcb() for the boot CPU? That's the pattern used > by the VP assist page, the percpu input page, etc. I will have a try and report back. Thanks for suggestion. > >> } >> >> rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); >> @@ -456,7 +502,8 @@ void __init hyperv_init(void) >> hv_query_ext_cap(0); >> return; >> >> -remove_cpuhp_state: >> +clean_guest_os_id: >> + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); >> cpuhp_remove_state(cpuhp); >> free_vp_assist_page: >> kfree(hv_vp_assist_page); >> @@ -484,6 +531,9 @@ void hyperv_cleanup(void) >> */ >> hv_hypercall_pg = NULL; >> >> + if (ms_hyperv.ghcb_base) >> + free_percpu(ms_hyperv.ghcb_base); >> + > > I don't think this cleanup is necessary. The primary purpose of > hyperv_cleanup() is to ensure that things like overlay pages are > properly reset in Hyper-V before doing a kexec(), or before > panic'ing and running the kdump kernel. There's no need to do > general memory free'ing in Linux. Doing so just adds to the risk > that the panic path could itself fail. Nice catch. I will remove this. > >> /* Reset the hypercall page */ >> hypercall_msr.as_uint64 = 0; >> wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); >> @@ -559,3 +609,11 @@ bool hv_is_isolation_supported(void) >> { >> return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; >> } >> + >> +DEFINE_STATIC_KEY_FALSE(isolation_type_snp); >> + >> +bool hv_isolation_type_snp(void) >> +{ >> + return static_branch_unlikely(&isolation_type_snp); >> +} >> +EXPORT_SYMBOL_GPL(hv_isolation_type_snp); >> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h >> index adccbc209169..6627cfd2bfba 100644 >> --- a/arch/x86/include/asm/mshyperv.h >> +++ b/arch/x86/include/asm/mshyperv.h >> @@ -11,6 +11,8 @@ >> #include <asm/paravirt.h> >> #include <asm/mshyperv.h> >> >> +DECLARE_STATIC_KEY_FALSE(isolation_type_snp); >> + >> typedef int (*hyperv_fill_flush_list_func)( >> struct hv_guest_mapping_flush_list *flush, >> void *data); >> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h >> index c1ab6a6e72b5..4269f3174e58 100644 >> --- a/include/asm-generic/mshyperv.h >> +++ b/include/asm-generic/mshyperv.h >> @@ -36,6 +36,7 @@ struct ms_hyperv_info { >> u32 max_lp_index; >> u32 isolation_config_a; >> u32 isolation_config_b; >> + void __percpu **ghcb_base; > > This doesn't feel like the right place to put this pointer. The other > fields in the ms_hyperv_info structure are just fixed values obtained > from the CPUID instruction. The existing patterns similar to ghcb_base > are the VP assist page and the percpu input and output args. They are > all based on standalone global variables. It would be more consistent > to do the same with the ghcb_base. OK. I will update in the next version. > >> }; >> extern struct ms_hyperv_info ms_hyperv; >> >> @@ -237,6 +238,7 @@ bool hv_is_hyperv_initialized(void); >> bool hv_is_hibernation_supported(void); >> enum hv_isolation_type hv_get_isolation_type(void); >> bool hv_is_isolation_supported(void); >> +bool hv_isolation_type_snp(void); >> void hyperv_cleanup(void); >> bool hv_query_ext_cap(u64 cap_query); >> #else /* CONFIG_HYPERV */ >> -- >> 2.25.1 >
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 708a2712a516..0bb4d9ca7a55 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -20,6 +20,7 @@ #include <linux/kexec.h> #include <linux/version.h> #include <linux/vmalloc.h> +#include <linux/io.h> #include <linux/mm.h> #include <linux/hyperv.h> #include <linux/slab.h> @@ -42,6 +43,31 @@ static void *hv_hypercall_pg_saved; struct hv_vp_assist_page **hv_vp_assist_page; EXPORT_SYMBOL_GPL(hv_vp_assist_page); +static int hyperv_init_ghcb(void) +{ + u64 ghcb_gpa; + void *ghcb_va; + void **ghcb_base; + + if (!ms_hyperv.ghcb_base) + return -EINVAL; + + /* + * GHCB page is allocated by paravisor. The address + * returned by MSR_AMD64_SEV_ES_GHCB is above shared + * ghcb boundary and map it here. + */ + rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); + ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB); + if (!ghcb_va) + return -ENOMEM; + + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); + *ghcb_base = ghcb_va; + + return 0; +} + static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; @@ -85,6 +111,8 @@ static int hv_cpu_init(unsigned int cpu) } } + hyperv_init_ghcb(); + return 0; } @@ -177,6 +205,14 @@ static int hv_cpu_die(unsigned int cpu) { struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; + void **ghcb_va = NULL; + + if (ms_hyperv.ghcb_base) { + ghcb_va = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); + if (*ghcb_va) + memunmap(*ghcb_va); + *ghcb_va = NULL; + } hv_common_cpu_die(cpu); @@ -383,9 +419,19 @@ void __init hyperv_init(void) VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); - if (hv_hypercall_pg == NULL) { - wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - goto remove_cpuhp_state; + if (hv_hypercall_pg == NULL) + goto clean_guest_os_id; + + if (hv_isolation_type_snp()) { + ms_hyperv.ghcb_base = alloc_percpu(void *); + if (!ms_hyperv.ghcb_base) + goto clean_guest_os_id; + + if (hyperv_init_ghcb()) { + free_percpu(ms_hyperv.ghcb_base); + ms_hyperv.ghcb_base = NULL; + goto clean_guest_os_id; + } } rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -456,7 +502,8 @@ void __init hyperv_init(void) hv_query_ext_cap(0); return; -remove_cpuhp_state: +clean_guest_os_id: + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); cpuhp_remove_state(cpuhp); free_vp_assist_page: kfree(hv_vp_assist_page); @@ -484,6 +531,9 @@ void hyperv_cleanup(void) */ hv_hypercall_pg = NULL; + if (ms_hyperv.ghcb_base) + free_percpu(ms_hyperv.ghcb_base); + /* Reset the hypercall page */ hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -559,3 +609,11 @@ bool hv_is_isolation_supported(void) { return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; } + +DEFINE_STATIC_KEY_FALSE(isolation_type_snp); + +bool hv_isolation_type_snp(void) +{ + return static_branch_unlikely(&isolation_type_snp); +} +EXPORT_SYMBOL_GPL(hv_isolation_type_snp); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index adccbc209169..6627cfd2bfba 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -11,6 +11,8 @@ #include <asm/paravirt.h> #include <asm/mshyperv.h> +DECLARE_STATIC_KEY_FALSE(isolation_type_snp); + typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, void *data); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c1ab6a6e72b5..4269f3174e58 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -36,6 +36,7 @@ struct ms_hyperv_info { u32 max_lp_index; u32 isolation_config_a; u32 isolation_config_b; + void __percpu **ghcb_base; }; extern struct ms_hyperv_info ms_hyperv; @@ -237,6 +238,7 @@ bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); enum hv_isolation_type hv_get_isolation_type(void); bool hv_is_isolation_supported(void); +bool hv_isolation_type_snp(void); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); #else /* CONFIG_HYPERV */