Message ID | 20210707154629.3977369-10-ltykernel@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | x86/Hyper-V: Add Hyper-V Isolation VM support | expand |
Hi Christoph & Konrad: Could you review this patch and make sure this is right way to resolve the memory remap request from AMD SEV-SNP vTOM case? Thanks. On 7/7/2021 11:46 PM, Tianyu Lan wrote: > From: Tianyu Lan <Tianyu.Lan@microsoft.com> > > In Isolation VM with AMD SEV, bounce buffer needs to be accessed via > extra address space which is above shared_gpa_boundary > (E.G 39 bit address line) reported by Hyper-V CPUID ISOLATION_CONFIG. > The access physical address will be original physical address + > shared_gpa_boundary. The shared_gpa_boundary in the AMD SEV SNP > spec is called virtual top of memory(vTOM). Memory addresses below > vTOM are automatically treated as private while memory above > vTOM is treated as shared. > > Introduce set_memory_decrypted_map() function to decrypt memory and > remap memory with platform callback. Use set_memory_decrypted_ > map() in the swiotlb code, store remap address returned by the new > API and use the remap address to copy data from/to swiotlb bounce buffer. > > Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> > --- > arch/x86/hyperv/ivm.c | 30 ++++++++++++++++++++++++++++++ > arch/x86/include/asm/mshyperv.h | 2 ++ > arch/x86/include/asm/set_memory.h | 2 ++ > arch/x86/mm/pat/set_memory.c | 28 ++++++++++++++++++++++++++++ > include/linux/swiotlb.h | 4 ++++ > kernel/dma/swiotlb.c | 11 ++++++++--- > 6 files changed, 74 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c > index 8a6f4e9e3d6c..ea33935e0c17 100644 > --- a/arch/x86/hyperv/ivm.c > +++ b/arch/x86/hyperv/ivm.c > @@ -267,3 +267,33 @@ int hv_set_mem_enc(unsigned long addr, int numpages, bool enc) > enc ? VMBUS_PAGE_NOT_VISIBLE > : VMBUS_PAGE_VISIBLE_READ_WRITE); > } > + > +/* > + * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM. > + */ > +unsigned long hv_map_memory(unsigned long addr, unsigned long size) > +{ > + unsigned long *pfns = kcalloc(size / HV_HYP_PAGE_SIZE, > + sizeof(unsigned long), > + GFP_KERNEL); > + unsigned long vaddr; > + int i; > + > + if (!pfns) > + return (unsigned long)NULL; > + > + for (i = 0; i < size / HV_HYP_PAGE_SIZE; i++) > + pfns[i] = virt_to_hvpfn((void *)addr + i * HV_HYP_PAGE_SIZE) + > + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); > + > + vaddr = (unsigned long)vmap_pfn(pfns, size / HV_HYP_PAGE_SIZE, > + PAGE_KERNEL_IO); > + kfree(pfns); > + > + return vaddr; > +} > + > +void hv_unmap_memory(unsigned long addr) > +{ > + vunmap((void *)addr); > +} > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > index fe03e3e833ac..ba3cb9e32fdb 100644 > --- a/arch/x86/include/asm/mshyperv.h > +++ b/arch/x86/include/asm/mshyperv.h > @@ -253,6 +253,8 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, > int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); > int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility); > int hv_set_mem_enc(unsigned long addr, int numpages, bool enc); > +unsigned long hv_map_memory(unsigned long addr, unsigned long size); > +void hv_unmap_memory(unsigned long addr); > void hv_sint_wrmsrl_ghcb(u64 msr, u64 value); > void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value); > void hv_signal_eom_ghcb(void); > diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h > index 43fa081a1adb..7a2117931830 100644 > --- a/arch/x86/include/asm/set_memory.h > +++ b/arch/x86/include/asm/set_memory.h > @@ -49,6 +49,8 @@ int set_memory_decrypted(unsigned long addr, int numpages); > int set_memory_np_noalias(unsigned long addr, int numpages); > int set_memory_nonglobal(unsigned long addr, int numpages); > int set_memory_global(unsigned long addr, int numpages); > +unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size); > +int set_memory_encrypted_unmap(unsigned long addr, unsigned long size); > > int set_pages_array_uc(struct page **pages, int addrinarray); > int set_pages_array_wc(struct page **pages, int addrinarray); > diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c > index 6cc83c57383d..5d4d3963f4a2 100644 > --- a/arch/x86/mm/pat/set_memory.c > +++ b/arch/x86/mm/pat/set_memory.c > @@ -2039,6 +2039,34 @@ int set_memory_decrypted(unsigned long addr, int numpages) > } > EXPORT_SYMBOL_GPL(set_memory_decrypted); > > +static unsigned long __map_memory(unsigned long addr, unsigned long size) > +{ > + if (hv_is_isolation_supported()) > + return hv_map_memory(addr, size); > + > + return addr; > +} > + > +static void __unmap_memory(unsigned long addr) > +{ > + if (hv_is_isolation_supported()) > + hv_unmap_memory(addr); > +} > + > +unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size) > +{ > + if (__set_memory_enc_dec(addr, size / PAGE_SIZE, false)) > + return (unsigned long)NULL; > + > + return __map_memory(addr, size); > +} > + > +int set_memory_encrypted_unmap(unsigned long addr, unsigned long size) > +{ > + __unmap_memory(addr); > + return __set_memory_enc_dec(addr, size / PAGE_SIZE, true); > +} > + > int set_pages_uc(struct page *page, int numpages) > { > unsigned long addr = (unsigned long)page_address(page); > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h > index f507e3eacbea..5c6f6c7380ef 100644 > --- a/include/linux/swiotlb.h > +++ b/include/linux/swiotlb.h > @@ -72,6 +72,9 @@ extern enum swiotlb_force swiotlb_force; > * @end: The end address of the swiotlb memory pool. Used to do a quick > * range check to see if the memory was in fact allocated by this > * API. > + * @vstart: The virtual start address of the swiotlb memory pool. The swiotlb > + * memory pool may be remapped in the memory encrypted case and store > + * virtual address for bounce buffer operation. > * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and > * @end. For default swiotlb, this is command line adjustable via > * setup_io_tlb_npages. > @@ -89,6 +92,7 @@ extern enum swiotlb_force swiotlb_force; > struct io_tlb_mem { > phys_addr_t start; > phys_addr_t end; > + void *vstart; > unsigned long nslabs; > unsigned long used; > unsigned int index; > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c > index d3fa4669229b..9911817250a8 100644 > --- a/kernel/dma/swiotlb.c > +++ b/kernel/dma/swiotlb.c > @@ -194,8 +194,13 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, > mem->slots[i].alloc_size = 0; > } > > - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); > - memset(vaddr, 0, bytes); > + mem->vstart = (void *)set_memory_decrypted_map((unsigned long)vaddr, bytes); > + if (!mem->vstart) { > + pr_err("Failed to decrypt memory.\n"); > + return; > + } > + > + memset(mem->vstart, 0, bytes); > } > > int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) > @@ -352,7 +357,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size > phys_addr_t orig_addr = mem->slots[index].orig_addr; > size_t alloc_size = mem->slots[index].alloc_size; > unsigned long pfn = PFN_DOWN(orig_addr); > - unsigned char *vaddr = phys_to_virt(tlb_addr); > + unsigned char *vaddr = mem->vstart + tlb_addr - mem->start; > > if (orig_addr == INVALID_PHYS_ADDR) > return; >
Please split the swiotlb changes into a separate patch from the consumer. > } > + > +/* > + * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM. > + */ > +unsigned long hv_map_memory(unsigned long addr, unsigned long size) > +{ > + unsigned long *pfns = kcalloc(size / HV_HYP_PAGE_SIZE, > + sizeof(unsigned long), > + GFP_KERNEL); > + unsigned long vaddr; > + int i; > + > + if (!pfns) > + return (unsigned long)NULL; > + > + for (i = 0; i < size / HV_HYP_PAGE_SIZE; i++) > + pfns[i] = virt_to_hvpfn((void *)addr + i * HV_HYP_PAGE_SIZE) + > + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); > + > + vaddr = (unsigned long)vmap_pfn(pfns, size / HV_HYP_PAGE_SIZE, > + PAGE_KERNEL_IO); > + kfree(pfns); > + > + return vaddr; This seems to miss a 'select VMAP_PFN'. But more importantly I don't think this actually works. Various DMA APIs do expect a struct page backing, so how is this going to work with say dma_mmap_attrs or dma_get_sgtable_attrs? > +static unsigned long __map_memory(unsigned long addr, unsigned long size) > +{ > + if (hv_is_isolation_supported()) > + return hv_map_memory(addr, size); > + > + return addr; > +} > + > +static void __unmap_memory(unsigned long addr) > +{ > + if (hv_is_isolation_supported()) > + hv_unmap_memory(addr); > +} > + > +unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size) > +{ > + if (__set_memory_enc_dec(addr, size / PAGE_SIZE, false)) > + return (unsigned long)NULL; > + > + return __map_memory(addr, size); > +} > + > +int set_memory_encrypted_unmap(unsigned long addr, unsigned long size) > +{ > + __unmap_memory(addr); > + return __set_memory_enc_dec(addr, size / PAGE_SIZE, true); > +} Why this obsfucation into all kinds of strange helpers? Also I think we want an ops vectors (or alternative calls) instead of the random if checks here. > + * @vstart: The virtual start address of the swiotlb memory pool. The swiotlb > + * memory pool may be remapped in the memory encrypted case and store Normall we'd call this vaddr or cpu_addr. > - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); > - memset(vaddr, 0, bytes); > + mem->vstart = (void *)set_memory_decrypted_map((unsigned long)vaddr, bytes); Please always pass kernel virtual addresses as pointers. And I think these APIs might need better names, e.g. arch_dma_map_decrypted and arch_dma_unmap_decrypted. Also these will need fallback versions for non-x86 architectures that currently use memory encryption.
On Wed, Jul 21, 2021 at 06:28:48PM +0800, Tianyu Lan wrote: > dma_mmap_attrs() and dma_get_sgtable_attrs() get input virtual address > belonging to backing memory with struct page and returns bounce buffer > dma physical address which is below shared_gpa_boundary(vTOM) and passed > to Hyper-V via vmbus protocol. > > The new map virtual address is only to access bounce buffer in swiotlb > code and will not be used other places. It's stored in the mem->vstart. > So the new API set_memory_decrypted_map() in this series is only called > in the swiotlb code. Other platforms may replace set_memory_decrypted() > with set_memory_decrypted_map() as requested. It seems like you are indeed not using these new helpers in dma_direct_alloc. How is dma_alloc_attrs/dma_alloc_coherent going to work on these systems?
Hi Christoph: I followed your this suggestion to rework the latest version(https://lkml.org/lkml/2021/8/9/805). I just remove the arch prefix from your suggested name arch_dma_map_decrypted because the platform may populate their map/umap callback in the ops. But from your latest comment(https://lkml.org/lkml/2021/8/12/149), these names confuse vs the actual dma_map_* calls... Could you help to give the right function name? The new map function is to map bounce buffer in the trust/Isolation VM and these buffers are DMA memory. On 7/20/2021 9:54 PM, Christoph Hellwig wrote: >> - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); >> - memset(vaddr, 0, bytes); >> + mem->vstart = (void *)set_memory_decrypted_map((unsigned long)vaddr, bytes); > Please always pass kernel virtual addresses as pointers. > > And I think these APIs might need better names, e.g. > > arch_dma_map_decrypted and arch_dma_unmap_decrypted. > > Also these will need fallback versions for non-x86 architectures that > currently use memory encryption.
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 8a6f4e9e3d6c..ea33935e0c17 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -267,3 +267,33 @@ int hv_set_mem_enc(unsigned long addr, int numpages, bool enc) enc ? VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE); } + +/* + * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM. + */ +unsigned long hv_map_memory(unsigned long addr, unsigned long size) +{ + unsigned long *pfns = kcalloc(size / HV_HYP_PAGE_SIZE, + sizeof(unsigned long), + GFP_KERNEL); + unsigned long vaddr; + int i; + + if (!pfns) + return (unsigned long)NULL; + + for (i = 0; i < size / HV_HYP_PAGE_SIZE; i++) + pfns[i] = virt_to_hvpfn((void *)addr + i * HV_HYP_PAGE_SIZE) + + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); + + vaddr = (unsigned long)vmap_pfn(pfns, size / HV_HYP_PAGE_SIZE, + PAGE_KERNEL_IO); + kfree(pfns); + + return vaddr; +} + +void hv_unmap_memory(unsigned long addr) +{ + vunmap((void *)addr); +} diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index fe03e3e833ac..ba3cb9e32fdb 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -253,6 +253,8 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility); int hv_set_mem_enc(unsigned long addr, int numpages, bool enc); +unsigned long hv_map_memory(unsigned long addr, unsigned long size); +void hv_unmap_memory(unsigned long addr); void hv_sint_wrmsrl_ghcb(u64 msr, u64 value); void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value); void hv_signal_eom_ghcb(void); diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 43fa081a1adb..7a2117931830 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -49,6 +49,8 @@ int set_memory_decrypted(unsigned long addr, int numpages); int set_memory_np_noalias(unsigned long addr, int numpages); int set_memory_nonglobal(unsigned long addr, int numpages); int set_memory_global(unsigned long addr, int numpages); +unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size); +int set_memory_encrypted_unmap(unsigned long addr, unsigned long size); int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 6cc83c57383d..5d4d3963f4a2 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2039,6 +2039,34 @@ int set_memory_decrypted(unsigned long addr, int numpages) } EXPORT_SYMBOL_GPL(set_memory_decrypted); +static unsigned long __map_memory(unsigned long addr, unsigned long size) +{ + if (hv_is_isolation_supported()) + return hv_map_memory(addr, size); + + return addr; +} + +static void __unmap_memory(unsigned long addr) +{ + if (hv_is_isolation_supported()) + hv_unmap_memory(addr); +} + +unsigned long set_memory_decrypted_map(unsigned long addr, unsigned long size) +{ + if (__set_memory_enc_dec(addr, size / PAGE_SIZE, false)) + return (unsigned long)NULL; + + return __map_memory(addr, size); +} + +int set_memory_encrypted_unmap(unsigned long addr, unsigned long size) +{ + __unmap_memory(addr); + return __set_memory_enc_dec(addr, size / PAGE_SIZE, true); +} + int set_pages_uc(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index f507e3eacbea..5c6f6c7380ef 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -72,6 +72,9 @@ extern enum swiotlb_force swiotlb_force; * @end: The end address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. + * @vstart: The virtual start address of the swiotlb memory pool. The swiotlb + * memory pool may be remapped in the memory encrypted case and store + * virtual address for bounce buffer operation. * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and * @end. For default swiotlb, this is command line adjustable via * setup_io_tlb_npages. @@ -89,6 +92,7 @@ extern enum swiotlb_force swiotlb_force; struct io_tlb_mem { phys_addr_t start; phys_addr_t end; + void *vstart; unsigned long nslabs; unsigned long used; unsigned int index; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d3fa4669229b..9911817250a8 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -194,8 +194,13 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->slots[i].alloc_size = 0; } - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); + mem->vstart = (void *)set_memory_decrypted_map((unsigned long)vaddr, bytes); + if (!mem->vstart) { + pr_err("Failed to decrypt memory.\n"); + return; + } + + memset(mem->vstart, 0, bytes); } int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) @@ -352,7 +357,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); + unsigned char *vaddr = mem->vstart + tlb_addr - mem->start; if (orig_addr == INVALID_PHYS_ADDR) return;