Message ID | 20231230161954.569267-5-michael.roth@amd.com |
---|---|
State | Superseded |
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) Initialization Support | expand |
On 30/12/2023 17:19, Michael Roth wrote: > From: Brijesh Singh <brijesh.singh@amd.com> > > The memory integrity guarantees of SEV-SNP are enforced through a new > structure called the Reverse Map Table (RMP). The RMP is a single data > structure shared across the system that contains one entry for every 4K > page of DRAM that may be used by SEV-SNP VMs. APM2 section 15.36 details > a number of steps needed to detect/enable SEV-SNP and RMP table support > on the host: > > - Detect SEV-SNP support based on CPUID bit > - Initialize the RMP table memory reported by the RMP base/end MSR > registers and configure IOMMU to be compatible with RMP access > restrictions > - Set the MtrrFixDramModEn bit in SYSCFG MSR > - Set the SecureNestedPagingEn and VMPLEn bits in the SYSCFG MSR > - Configure IOMMU > > RMP table entry format is non-architectural and it can vary by > processor. It is defined by the PPR. Restrict SNP support to CPU > models/families which are compatible with the current RMP table entry > format to guard against any undefined behavior when running on other > system types. Future models/support will handle this through an > architectural mechanism to allow for broader compatibility. > > SNP host code depends on CONFIG_KVM_AMD_SEV config flag, which may be > enabled even when CONFIG_AMD_MEM_ENCRYPT isn't set, so update the > SNP-specific IOMMU helpers used here to rely on CONFIG_KVM_AMD_SEV > instead of CONFIG_AMD_MEM_ENCRYPT. > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > Co-developed-by: Ashish Kalra <ashish.kalra@amd.com> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > Co-developed-by: Tom Lendacky <thomas.lendacky@amd.com> > Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> > Co-developed-by: Michael Roth <michael.roth@amd.com> > Signed-off-by: Michael Roth <michael.roth@amd.com> > --- > arch/x86/Kbuild | 2 + > arch/x86/include/asm/msr-index.h | 11 +- > arch/x86/include/asm/sev.h | 6 + > arch/x86/kernel/cpu/amd.c | 15 +++ > arch/x86/virt/svm/Makefile | 3 + > arch/x86/virt/svm/sev.c | 219 +++++++++++++++++++++++++++++++ > 6 files changed, 255 insertions(+), 1 deletion(-) > create mode 100644 arch/x86/virt/svm/Makefile > create mode 100644 arch/x86/virt/svm/sev.c > > diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild > index 5a83da703e87..6a1f36df6a18 100644 > --- a/arch/x86/Kbuild > +++ b/arch/x86/Kbuild > @@ -28,5 +28,7 @@ obj-y += net/ > > obj-$(CONFIG_KEXEC_FILE) += purgatory/ > > +obj-y += virt/svm/ > + > # for cleaning > subdir- += boot tools > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h > index f1bd7b91b3c6..15ce1269f270 100644 > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -599,6 +599,8 @@ > #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) > #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) > #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) > +#define MSR_AMD64_RMP_BASE 0xc0010132 > +#define MSR_AMD64_RMP_END 0xc0010133 > > /* SNP feature bits enabled by the hypervisor */ > #define MSR_AMD64_SNP_VTOM BIT_ULL(3) > @@ -709,7 +711,14 @@ > #define MSR_K8_TOP_MEM2 0xc001001d > #define MSR_AMD64_SYSCFG 0xc0010010 > #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 > -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) > +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) > +#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 > +#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT) > +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 > +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) > +#define MSR_AMD64_SYSCFG_MFDM_BIT 19 > +#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) > + > #define MSR_K8_INT_PENDING_MSG 0xc0010055 > /* C1E active bits in int pending message */ > #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 > diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h > index 5b4a1ce3d368..1f59d8ba9776 100644 > --- a/arch/x86/include/asm/sev.h > +++ b/arch/x86/include/asm/sev.h > @@ -243,4 +243,10 @@ static inline u64 snp_get_unsupported_features(u64 status) { return 0; } > static inline u64 sev_get_status(void) { return 0; } > #endif > > +#ifdef CONFIG_KVM_AMD_SEV > +bool snp_probe_rmptable_info(void); > +#else > +static inline bool snp_probe_rmptable_info(void) { return false; } > +#endif > + > #endif > diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c > index 9a17165dfe84..0f0d425f0440 100644 > --- a/arch/x86/kernel/cpu/amd.c > +++ b/arch/x86/kernel/cpu/amd.c > @@ -20,6 +20,7 @@ > #include <asm/delay.h> > #include <asm/debugreg.h> > #include <asm/resctrl.h> > +#include <asm/sev.h> > > #ifdef CONFIG_X86_64 > # include <asm/mmconfig.h> > @@ -574,6 +575,20 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) > break; > } > > + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { > + /* > + * RMP table entry format is not architectural and it can vary by processor > + * and is defined by the per-processor PPR. Restrict SNP support on the > + * known CPU model and family for which the RMP table entry format is > + * currently defined for. > + */ > + if (!(c->x86 == 0x19 && c->x86_model <= 0xaf) && > + !(c->x86 == 0x1a && c->x86_model <= 0xf)) > + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); > + else if (!snp_probe_rmptable_info()) > + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); Is there a really good reason to perform the snp_probe_smptable_info() check at this point (instead of in snp_rmptable_init). snp_rmptable_init will also clear the cap on failure, and bsp_init_amd() runs too early to allow for the kernel to allocate the rmptable itself. I pointed out in the previous review that kernel allocation of rmptable is necessary in SNP-host capable VMs in Azure. > + } > + > return; > > warn: > diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile > new file mode 100644 > index 000000000000..ef2a31bdcc70 > --- /dev/null > +++ b/arch/x86/virt/svm/Makefile > @@ -0,0 +1,3 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +obj-$(CONFIG_KVM_AMD_SEV) += sev.o > diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c > new file mode 100644 > index 000000000000..ce7ede9065ed > --- /dev/null > +++ b/arch/x86/virt/svm/sev.c > @@ -0,0 +1,219 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * AMD SVM-SEV Host Support. > + * > + * Copyright (C) 2023 Advanced Micro Devices, Inc. > + * > + * Author: Ashish Kalra <ashish.kalra@amd.com> > + * > + */ > + > +#include <linux/cc_platform.h> > +#include <linux/printk.h> > +#include <linux/mm_types.h> > +#include <linux/set_memory.h> > +#include <linux/memblock.h> > +#include <linux/kernel.h> > +#include <linux/mm.h> > +#include <linux/cpumask.h> > +#include <linux/iommu.h> > +#include <linux/amd-iommu.h> > + > +#include <asm/sev.h> > +#include <asm/processor.h> > +#include <asm/setup.h> > +#include <asm/svm.h> > +#include <asm/smp.h> > +#include <asm/cpu.h> > +#include <asm/apic.h> > +#include <asm/cpuid.h> > +#include <asm/cmdline.h> > +#include <asm/iommu.h> > + > +/* > + * The RMP entry format is not architectural. The format is defined in PPR > + * Family 19h Model 01h, Rev B1 processor. > + */ > +struct rmpentry { > + u64 assigned : 1, > + pagesize : 1, > + immutable : 1, > + rsvd1 : 9, > + gpa : 39, > + asid : 10, > + vmsa : 1, > + validated : 1, > + rsvd2 : 1; > + u64 rsvd3; > +} __packed; > + > +/* > + * The first 16KB from the RMP_BASE is used by the processor for the > + * bookkeeping, the range needs to be added during the RMP entry lookup. > + */ > +#define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000 > + > +static u64 probed_rmp_base, probed_rmp_size; > +static struct rmpentry *rmptable __ro_after_init; > +static u64 rmptable_max_pfn __ro_after_init; > + > +#undef pr_fmt > +#define pr_fmt(fmt) "SEV-SNP: " fmt > + > +static int __mfd_enable(unsigned int cpu) > +{ > + u64 val; > + > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return 0; > + > + rdmsrl(MSR_AMD64_SYSCFG, val); > + > + val |= MSR_AMD64_SYSCFG_MFDM; > + > + wrmsrl(MSR_AMD64_SYSCFG, val); > + > + return 0; > +} > + > +static __init void mfd_enable(void *arg) > +{ > + __mfd_enable(smp_processor_id()); > +} > + > +static int __snp_enable(unsigned int cpu) > +{ > + u64 val; > + > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return 0; > + > + rdmsrl(MSR_AMD64_SYSCFG, val); > + > + val |= MSR_AMD64_SYSCFG_SNP_EN; > + val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN; > + > + wrmsrl(MSR_AMD64_SYSCFG, val); > + > + return 0; > +} > + > +static __init void snp_enable(void *arg) > +{ > + __snp_enable(smp_processor_id()); > +} > + > +#define RMP_ADDR_MASK GENMASK_ULL(51, 13) > + > +bool snp_probe_rmptable_info(void) > +{ > + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; > + > + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); > + rdmsrl(MSR_AMD64_RMP_END, rmp_end); > + > + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { > + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); > + return false; > + } > + > + if (rmp_base > rmp_end) { > + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); > + return false; > + } > + > + rmp_sz = rmp_end - rmp_base + 1; > + > + /* > + * Calculate the amount the memory that must be reserved by the BIOS to > + * address the whole RAM, including the bookkeeping area. The RMP itself > + * must also be covered. > + */ > + max_rmp_pfn = max_pfn; > + if (PHYS_PFN(rmp_end) > max_pfn) > + max_rmp_pfn = PHYS_PFN(rmp_end); > + > + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; > + > + if (calc_rmp_sz > rmp_sz) { > + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", > + calc_rmp_sz, rmp_sz); > + return false; > + } > + > + probed_rmp_base = rmp_base; > + probed_rmp_size = rmp_sz; > + > + pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n", > + probed_rmp_base, probed_rmp_base + probed_rmp_size - 1); > + > + return true; > +} > + > +static int __init __snp_rmptable_init(void) > +{ > + u64 rmptable_size; > + void *rmptable_start; > + u64 val; > + > + if (!probed_rmp_size) > + return 1; > + > + rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB); > + if (!rmptable_start) { > + pr_err("Failed to map RMP table\n"); > + return 1; > + } > + > + /* > + * Check if SEV-SNP is already enabled, this can happen in case of > + * kexec boot. > + */ > + rdmsrl(MSR_AMD64_SYSCFG, val); > + if (val & MSR_AMD64_SYSCFG_SNP_EN) > + goto skip_enable; > + > + memset(rmptable_start, 0, probed_rmp_size); > + > + /* Flush the caches to ensure that data is written before SNP is enabled. */ > + wbinvd_on_all_cpus(); > + > + /* MtrrFixDramModEn must be enabled on all the CPUs prior to enabling SNP. */ > + on_each_cpu(mfd_enable, NULL, 1); > + > + on_each_cpu(snp_enable, NULL, 1); > + > +skip_enable: > + rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ; > + rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ; > + > + rmptable = (struct rmpentry *)rmptable_start; > + rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1; > + > + return 0; > +} > + > +static int __init snp_rmptable_init(void) > +{ > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return 0; > + > + if (!amd_iommu_snp_en) > + return 0; Looks better - do you think it'll be OK to add a X86_FEATURE_HYPERVISOR check at this point later to account for SNP-host capable VMs with no access to an iommu? Jeremi > + > + if (__snp_rmptable_init()) > + goto nosnp; > + > + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); > + > + return 0; > + > +nosnp: > + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); > + return -ENOSYS; > +} > + > +/* > + * This must be called after the IOMMU has been initialized. > + */ > +device_initcall(snp_rmptable_init);
On Sat, Dec 30, 2023 at 10:19:32AM -0600, Michael Roth wrote: > From: Brijesh Singh <brijesh.singh@amd.com> > > The memory integrity guarantees of SEV-SNP are enforced through a new > structure called the Reverse Map Table (RMP). The RMP is a single data > structure shared across the system that contains one entry for every 4K > page of DRAM that may be used by SEV-SNP VMs. APM2 section 15.36 details > a number of steps needed to detect/enable SEV-SNP and RMP table support > on the host: > > - Detect SEV-SNP support based on CPUID bit > - Initialize the RMP table memory reported by the RMP base/end MSR > registers and configure IOMMU to be compatible with RMP access > restrictions > - Set the MtrrFixDramModEn bit in SYSCFG MSR > - Set the SecureNestedPagingEn and VMPLEn bits in the SYSCFG MSR > - Configure IOMMU > > RMP table entry format is non-architectural and it can vary by > processor. It is defined by the PPR. Restrict SNP support to CPU > models/families which are compatible with the current RMP table entry > format to guard against any undefined behavior when running on other > system types. Future models/support will handle this through an > architectural mechanism to allow for broader compatibility. > > SNP host code depends on CONFIG_KVM_AMD_SEV config flag, which may be > enabled even when CONFIG_AMD_MEM_ENCRYPT isn't set, so update the > SNP-specific IOMMU helpers used here to rely on CONFIG_KVM_AMD_SEV > instead of CONFIG_AMD_MEM_ENCRYPT. Small fixups to the commit message: The memory integrity guarantees of SEV-SNP are enforced through a new structure called the Reverse Map Table (RMP). The RMP is a single data structure shared across the system that contains one entry for every 4K page of DRAM that may be used by SEV-SNP VMs. The APM v2 section on Secure Nested Paging (SEV-SNP) details a number of steps needed to detect/enable SEV-SNP and RMP table support on the host: - Detect SEV-SNP support based on CPUID bit - Initialize the RMP table memory reported by the RMP base/end MSR registers and configure IOMMU to be compatible with RMP access restrictions - Set the MtrrFixDramModEn bit in SYSCFG MSR - Set the SecureNestedPagingEn and VMPLEn bits in the SYSCFG MSR - Configure IOMMU The RMP table entry format is non-architectural and it can vary by processor. It is defined by the PPR document for each respective CPU family. Restrict SNP support to CPU models/families which are compatible with the current RMP table entry format to guard against any undefined behavior when running on other system types. Future models/support will handle this through an architectural mechanism to allow for broader compatibility. The SNP host code depends on CONFIG_KVM_AMD_SEV config flag which may be enabled even when CONFIG_AMD_MEM_ENCRYPT isn't set, so update the SNP-specific IOMMU helpers used here to rely on CONFIG_KVM_AMD_SEV instead of CONFIG_AMD_MEM_ENCRYPT. > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h > index f1bd7b91b3c6..15ce1269f270 100644 > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -599,6 +599,8 @@ > #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) > #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) > #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) > +#define MSR_AMD64_RMP_BASE 0xc0010132 > +#define MSR_AMD64_RMP_END 0xc0010133 > > /* SNP feature bits enabled by the hypervisor */ > #define MSR_AMD64_SNP_VTOM BIT_ULL(3) > @@ -709,7 +711,14 @@ > #define MSR_K8_TOP_MEM2 0xc001001d > #define MSR_AMD64_SYSCFG 0xc0010010 > #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 > -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) > +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) > +#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 > +#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT) > +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 > +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) > +#define MSR_AMD64_SYSCFG_MFDM_BIT 19 > +#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) > + Fix the vertical alignment: diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 15ce1269f270..f482bc6a5ae7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -710,14 +710,14 @@ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_AMD64_SYSCFG 0xc0010010 -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) -#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 #define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT) -#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 -#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) -#define MSR_AMD64_SYSCFG_MFDM_BIT 19 -#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) +#define MSR_AMD64_SYSCFG_MFDM_BIT 19 +#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */
On Sat, Dec 30, 2023 at 10:19:32AM -0600, Michael Roth wrote: > + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { > + /* > + * RMP table entry format is not architectural and it can vary by processor > + * and is defined by the per-processor PPR. Restrict SNP support on the > + * known CPU model and family for which the RMP table entry format is > + * currently defined for. > + */ > + if (!(c->x86 == 0x19 && c->x86_model <= 0xaf) && > + !(c->x86 == 0x1a && c->x86_model <= 0xf)) > + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); > + else if (!snp_probe_rmptable_info()) > + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); > + } IOW, this below. Lemme send the ZEN5 thing as a separate patch. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 9492dcad560d..0fa702673e73 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -81,10 +81,8 @@ #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ - -/* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0f0d425f0440..46335c2df083 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -539,7 +539,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) /* Figure out Zen generations: */ switch (c->x86) { - case 0x17: { + case 0x17: switch (c->x86_model) { case 0x00 ... 0x2f: case 0x50 ... 0x5f: @@ -555,8 +555,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) goto warn; } break; - } - case 0x19: { + + case 0x19: switch (c->x86_model) { case 0x00 ... 0x0f: case 0x20 ... 0x5f: @@ -570,20 +570,31 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) goto warn; } break; - } + + case 0x1a: + switch (c->x86_model) { + case 0x00 ... 0x0f: + setup_force_cpu_cap(X86_FEATURE_ZEN5); + break; + default: + goto warn; + } + break; + default: break; } if (cpu_has(c, X86_FEATURE_SEV_SNP)) { /* - * RMP table entry format is not architectural and it can vary by processor + * RMP table entry format is not architectural, can vary by processor * and is defined by the per-processor PPR. Restrict SNP support on the * known CPU model and family for which the RMP table entry format is * currently defined for. */ - if (!(c->x86 == 0x19 && c->x86_model <= 0xaf) && - !(c->x86 == 0x1a && c->x86_model <= 0xf)) + if (!boot_cpu_has(X86_FEATURE_ZEN3) && + !boot_cpu_has(X86_FEATURE_ZEN4) && + !boot_cpu_has(X86_FEATURE_ZEN5)) setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); else if (!snp_probe_rmptable_info()) setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); @@ -1055,6 +1066,11 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); } +static void init_amd_zen5(struct cpuinfo_x86 *c) +{ + init_amd_zen_common(); +} + static void init_amd(struct cpuinfo_x86 *c) { u64 vm_cr; @@ -1100,6 +1116,8 @@ static void init_amd(struct cpuinfo_x86 *c) init_amd_zen3(c); else if (boot_cpu_has(X86_FEATURE_ZEN4)) init_amd_zen4(c); + else if (boot_cpu_has(X86_FEATURE_ZEN5)) + init_amd_zen5(c); /* * Enable workaround for FXSAVE leak on CPUs
On Sat, Dec 30, 2023 at 10:19:32AM -0600, Michael Roth wrote: > +static int __init __snp_rmptable_init(void) > +{ > + u64 rmptable_size; > + void *rmptable_start; > + u64 val; ... Ontop: diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index ce7ede9065ed..566bb6f39665 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -150,6 +150,11 @@ bool snp_probe_rmptable_info(void) return true; } +/* + * Do the necessary preparations which are verified by the firmware as + * described in the SNP_INIT_EX firmware command description in the SNP + * firmware ABI spec. + */ static int __init __snp_rmptable_init(void) { u64 rmptable_size;
On Sat, Dec 30, 2023 at 10:19:32AM -0600, Michael Roth wrote: > +static int __init __snp_rmptable_init(void) I already asked a year ago: https://lore.kernel.org/all/Y9ubi0i4Z750gdMm@zn.tnic/ why is the __ version - __snp_rmptable_init - carved out but crickets. It simply gets ignored. :-\ So let me do it myself, diff below. Please add to the next version: Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> after incorporating all the changes. Thx. --- diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 566bb6f39665..feed65f80776 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -155,19 +155,25 @@ bool snp_probe_rmptable_info(void) * described in the SNP_INIT_EX firmware command description in the SNP * firmware ABI spec. */ -static int __init __snp_rmptable_init(void) +static int __init snp_rmptable_init(void) { - u64 rmptable_size; void *rmptable_start; + u64 rmptable_size; u64 val; + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + return 0; + + if (!amd_iommu_snp_en) + return 0; + if (!probed_rmp_size) - return 1; + goto nosnp; rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB); if (!rmptable_start) { pr_err("Failed to map RMP table\n"); - return 1; + goto nosnp; } /* @@ -195,20 +201,6 @@ static int __init __snp_rmptable_init(void) rmptable = (struct rmpentry *)rmptable_start; rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1; - return 0; -} - -static int __init snp_rmptable_init(void) -{ - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - return 0; - - if (!amd_iommu_snp_en) - return 0; - - if (__snp_rmptable_init()) - goto nosnp; - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); return 0;
On 05/01/2024 17:21, Borislav Petkov wrote: > On Fri, Jan 05, 2024 at 05:09:16PM +0100, Borislav Petkov wrote: >> On Thu, Jan 04, 2024 at 12:05:27PM +0100, Jeremi Piotrowski wrote: >>> Is there a really good reason to perform the snp_probe_smptable_info() check at this >>> point (instead of in snp_rmptable_init). snp_rmptable_init will also clear the cap >>> on failure, and bsp_init_amd() runs too early to allow for the kernel to allocate the >>> rmptable itself. I pointed out in the previous review that kernel allocation of rmptable >>> is necessary in SNP-host capable VMs in Azure. >> >> What does that even mean?>> >> That function is doing some calculations after reading two MSRs. What >> can possibly go wrong?! > What I wrote: "allow for the kernel to allocate the rmptable". Until the kernel allocates a rmptable the two MSRs are not initialized in a VM. This is specific to SNP-host VMs because they don't have access to the system-wide rmptable (or a virtualized version of it), and the rmptable is only useful for kernel internal tracking in this case. So we don't strictly need one and could save the overhead but not having one would complicate the KVM SNP code so I'd rather allocate one for now. It makes most sense to perform the rmptable allocation later in kernel init, after platform detection and e820 setup. It isn't really used until device_initcall. https://lore.kernel.org/lkml/20230213103402.1189285-2-jpiotrowski@linux.microsoft.com/ (I'll be posting updated patches soon). > That could be one reason perhaps: > > "It needs to be called early enough to allow for AutoIBRS to not be disabled > just because SNP is supported. By calling it where it is currently called, the > SNP feature can be cleared if, even though supported, SNP can't be used, > allowing AutoIBRS to be used as a more performant Spectre mitigation." > > https://lore.kernel.org/r/8ec38db1-5ccf-4684-bc0d-d48579ebf0d0@amd.com > This logic seems twisted. Why use firmware rmptable allocation as a proxy for SEV-SNP enablement if BIOS provides an explicit flag to enable/disable SEV-SNP support. That would be a better signal to use to control AutoIBRS enablement.
On Tue, Jan 09, 2024 at 12:56:17PM +0100, Jeremi Piotrowski wrote: > Can we please not assume I am acting in bad faith. No you're not acting with bad faith. What you're doing, in my experience so far is, you come with some weird HV + guest models which has been invented somewhere, behind some closed doors, then you come with some desire that the upstream kernel should support it and you're not even documenting it properly and I'm left with asking questions all the time, what is this, what's the use case, blabla. Don't take this personally - I guess this is all due to NDAs, development schedules, and whatever else and yes, I've heard it all. But just because you want this, we're not going to jump on it and support it unconditionally. It needs to integrate properly with the rest of the kernel and if it doesn't, it is not going upstream. That simple. > I am explicitly trying to integrate nicely with AMD's KVM SNP host > patches to cover an additional usecase and get something upstreamable. And yet I still have no clue what your use case is. I always have to go ask behind the scenes and get some half-answers about *maybe* this is what they support. Looking at the patch you pointed at I see there a proper explanation of your nested SNP stuff. Finally!
On 09/01/2024 13:44, Borislav Petkov wrote: > On Tue, Jan 09, 2024 at 01:29:06PM +0100, Borislav Petkov wrote: >> At least three issues I see with that: >> >> - the allocation can fail so it is a lot more convenient when the >> firmware prepares it >> >> - the RMP_BASE and RMP_END writes need to be verified they actially did >> set up the RMP range because if they haven't, you might as well >> throw SNP security out of the window. In general, letting the kernel >> do the RMP allocation needs to be verified very very thoroughly. >> >> - a future feature might make this more complicated > > - What do you do if you boot on a system which has the RMP already > allocated in the BIOS? > > - How do you detect that it is the L1 kernel that must allocate the RMP? > > - Why can't you use the BIOS allocated RMP in your scenario too instead > of the L1 kernel allocating it? > > - ... > > I might think of more. > Sorry for not replying back sooner. I agree, lets get the base SNP stuff in and then talk about extensions. I want to sync up with Michael to make sure he's onboard with what I'm proposing. I'll add more design/documentation/usecase descriptions with the next submission and will make sure to address all the issues you brought up. Jeremi
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 5a83da703e87..6a1f36df6a18 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -28,5 +28,7 @@ obj-y += net/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ +obj-y += virt/svm/ + # for cleaning subdir- += boot tools diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index f1bd7b91b3c6..15ce1269f270 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -599,6 +599,8 @@ #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) +#define MSR_AMD64_RMP_BASE 0xc0010132 +#define MSR_AMD64_RMP_END 0xc0010133 /* SNP feature bits enabled by the hypervisor */ #define MSR_AMD64_SNP_VTOM BIT_ULL(3) @@ -709,7 +711,14 @@ #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_AMD64_SYSCFG 0xc0010010 #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24 +#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT) +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25 +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT) +#define MSR_AMD64_SYSCFG_MFDM_BIT 19 +#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT) + #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 5b4a1ce3d368..1f59d8ba9776 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -243,4 +243,10 @@ static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } #endif +#ifdef CONFIG_KVM_AMD_SEV +bool snp_probe_rmptable_info(void); +#else +static inline bool snp_probe_rmptable_info(void) { return false; } +#endif + #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9a17165dfe84..0f0d425f0440 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -20,6 +20,7 @@ #include <asm/delay.h> #include <asm/debugreg.h> #include <asm/resctrl.h> +#include <asm/sev.h> #ifdef CONFIG_X86_64 # include <asm/mmconfig.h> @@ -574,6 +575,20 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and it can vary by processor + * and is defined by the per-processor PPR. Restrict SNP support on the + * known CPU model and family for which the RMP table entry format is + * currently defined for. + */ + if (!(c->x86 == 0x19 && c->x86_model <= 0xaf) && + !(c->x86 == 0x1a && c->x86_model <= 0xf)) + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + else if (!snp_probe_rmptable_info()) + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + } + return; warn: diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile new file mode 100644 index 000000000000..ef2a31bdcc70 --- /dev/null +++ b/arch/x86/virt/svm/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_KVM_AMD_SEV) += sev.o diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c new file mode 100644 index 000000000000..ce7ede9065ed --- /dev/null +++ b/arch/x86/virt/svm/sev.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD SVM-SEV Host Support. + * + * Copyright (C) 2023 Advanced Micro Devices, Inc. + * + * Author: Ashish Kalra <ashish.kalra@amd.com> + * + */ + +#include <linux/cc_platform.h> +#include <linux/printk.h> +#include <linux/mm_types.h> +#include <linux/set_memory.h> +#include <linux/memblock.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/cpumask.h> +#include <linux/iommu.h> +#include <linux/amd-iommu.h> + +#include <asm/sev.h> +#include <asm/processor.h> +#include <asm/setup.h> +#include <asm/svm.h> +#include <asm/smp.h> +#include <asm/cpu.h> +#include <asm/apic.h> +#include <asm/cpuid.h> +#include <asm/cmdline.h> +#include <asm/iommu.h> + +/* + * The RMP entry format is not architectural. The format is defined in PPR + * Family 19h Model 01h, Rev B1 processor. + */ +struct rmpentry { + u64 assigned : 1, + pagesize : 1, + immutable : 1, + rsvd1 : 9, + gpa : 39, + asid : 10, + vmsa : 1, + validated : 1, + rsvd2 : 1; + u64 rsvd3; +} __packed; + +/* + * The first 16KB from the RMP_BASE is used by the processor for the + * bookkeeping, the range needs to be added during the RMP entry lookup. + */ +#define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000 + +static u64 probed_rmp_base, probed_rmp_size; +static struct rmpentry *rmptable __ro_after_init; +static u64 rmptable_max_pfn __ro_after_init; + +#undef pr_fmt +#define pr_fmt(fmt) "SEV-SNP: " fmt + +static int __mfd_enable(unsigned int cpu) +{ + u64 val; + + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + return 0; + + rdmsrl(MSR_AMD64_SYSCFG, val); + + val |= MSR_AMD64_SYSCFG_MFDM; + + wrmsrl(MSR_AMD64_SYSCFG, val); + + return 0; +} + +static __init void mfd_enable(void *arg) +{ + __mfd_enable(smp_processor_id()); +} + +static int __snp_enable(unsigned int cpu) +{ + u64 val; + + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + return 0; + + rdmsrl(MSR_AMD64_SYSCFG, val); + + val |= MSR_AMD64_SYSCFG_SNP_EN; + val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN; + + wrmsrl(MSR_AMD64_SYSCFG, val); + + return 0; +} + +static __init void snp_enable(void *arg) +{ + __snp_enable(smp_processor_id()); +} + +#define RMP_ADDR_MASK GENMASK_ULL(51, 13) + +bool snp_probe_rmptable_info(void) +{ + u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; + + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); + rdmsrl(MSR_AMD64_RMP_END, rmp_end); + + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); + return false; + } + + if (rmp_base > rmp_end) { + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); + return false; + } + + rmp_sz = rmp_end - rmp_base + 1; + + /* + * Calculate the amount the memory that must be reserved by the BIOS to + * address the whole RAM, including the bookkeeping area. The RMP itself + * must also be covered. + */ + max_rmp_pfn = max_pfn; + if (PHYS_PFN(rmp_end) > max_pfn) + max_rmp_pfn = PHYS_PFN(rmp_end); + + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; + + if (calc_rmp_sz > rmp_sz) { + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", + calc_rmp_sz, rmp_sz); + return false; + } + + probed_rmp_base = rmp_base; + probed_rmp_size = rmp_sz; + + pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n", + probed_rmp_base, probed_rmp_base + probed_rmp_size - 1); + + return true; +} + +static int __init __snp_rmptable_init(void) +{ + u64 rmptable_size; + void *rmptable_start; + u64 val; + + if (!probed_rmp_size) + return 1; + + rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB); + if (!rmptable_start) { + pr_err("Failed to map RMP table\n"); + return 1; + } + + /* + * Check if SEV-SNP is already enabled, this can happen in case of + * kexec boot. + */ + rdmsrl(MSR_AMD64_SYSCFG, val); + if (val & MSR_AMD64_SYSCFG_SNP_EN) + goto skip_enable; + + memset(rmptable_start, 0, probed_rmp_size); + + /* Flush the caches to ensure that data is written before SNP is enabled. */ + wbinvd_on_all_cpus(); + + /* MtrrFixDramModEn must be enabled on all the CPUs prior to enabling SNP. */ + on_each_cpu(mfd_enable, NULL, 1); + + on_each_cpu(snp_enable, NULL, 1); + +skip_enable: + rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ; + rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ; + + rmptable = (struct rmpentry *)rmptable_start; + rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1; + + return 0; +} + +static int __init snp_rmptable_init(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + return 0; + + if (!amd_iommu_snp_en) + return 0; + + if (__snp_rmptable_init()) + goto nosnp; + + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); + + return 0; + +nosnp: + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + return -ENOSYS; +} + +/* + * This must be called after the IOMMU has been initialized. + */ +device_initcall(snp_rmptable_init);