diff mbox series

[for-8.1,v10,01/14] linux-user: Adjust task_unmapped_base for reserved_va

Message ID 20230807163705.9848-2-richard.henderson@linaro.org
State Superseded
Headers show
Series linux-user: image mapping fixes | expand

Commit Message

Richard Henderson Aug. 7, 2023, 4:36 p.m. UTC
Ensure that the chosen values for mmap_next_start and
task_unmapped_base are within the guest address space.

Tested-by: Helge Deller <deller@gmx.de>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/user-mmap.h | 18 +++++++++++++++++-
 linux-user/main.c      | 28 ++++++++++++++++++++++++++++
 linux-user/mmap.c      | 18 +++---------------
 3 files changed, 48 insertions(+), 16 deletions(-)

Comments

Alex Bennée Aug. 8, 2023, 9:10 a.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Ensure that the chosen values for mmap_next_start and
> task_unmapped_base are within the guest address space.
>
> Tested-by: Helge Deller <deller@gmx.de>
> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  linux-user/user-mmap.h | 18 +++++++++++++++++-
>  linux-user/main.c      | 28 ++++++++++++++++++++++++++++
>  linux-user/mmap.c      | 18 +++---------------
>  3 files changed, 48 insertions(+), 16 deletions(-)
>
> diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h
> index 7265c2c116..fd456e024e 100644
> --- a/linux-user/user-mmap.h
> +++ b/linux-user/user-mmap.h
> @@ -18,6 +18,23 @@
>  #ifndef LINUX_USER_USER_MMAP_H
>  #define LINUX_USER_USER_MMAP_H
>  
> +#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
> +#ifdef TARGET_AARCH64
> +# define TASK_UNMAPPED_BASE  0x5500000000
> +#else
> +# define TASK_UNMAPPED_BASE  (1ul << 38)
> +#endif
> +#else
> +#ifdef TARGET_HPPA
> +# define TASK_UNMAPPED_BASE  0xfa000000
> +#else
> +# define TASK_UNMAPPED_BASE  0x40000000
> +#endif
> +#endif
> +
> +extern abi_ulong task_unmapped_base;
> +extern abi_ulong mmap_next_start;
> +
>  int target_mprotect(abi_ulong start, abi_ulong len, int prot);
>  abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
>                       int flags, int fd, off_t offset);
> @@ -26,7 +43,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
>                         abi_ulong new_size, unsigned long flags,
>                         abi_ulong new_addr);
>  abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice);
> -extern abi_ulong mmap_next_start;
>  abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
>  void mmap_fork_start(void);
>  void mmap_fork_end(int child);
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 556956c363..be621dc792 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -821,6 +821,34 @@ int main(int argc, char **argv, char **envp)
>          reserved_va = max_reserved_va;
>      }
>  
> +    /*
> +     * Temporarily disable
> +     *   "comparison is always false due to limited range of data type"
> +     * due to comparison between (possible) uint64_t and uintptr_t.
> +     */
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Wtype-limits"
> +
> +    /*
> +     * Select an initial value for task_unmapped_base that is in range.
> +     */
> +    if (reserved_va) {
> +        if (TASK_UNMAPPED_BASE < reserved_va) {
> +            task_unmapped_base = TASK_UNMAPPED_BASE;
> +        } else {
> +            /* The most common default formula is TASK_SIZE / 3. */
> +            task_unmapped_base = TARGET_PAGE_ALIGN(reserved_va / 3);
> +        }
> +    } else if (TASK_UNMAPPED_BASE < UINTPTR_MAX) {
> +        task_unmapped_base = TASK_UNMAPPED_BASE;
> +    } else {
> +        /* 32-bit host: pick something medium size. */
> +        task_unmapped_base = 0x10000000;
> +    }
> +    mmap_next_start = task_unmapped_base;
> +
> +#pragma GCC diagnostic pop
> +
>      {
>          Error *err = NULL;
>          if (seed_optarg != NULL) {
> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> index eb04fab8ab..84436d45c8 100644
> --- a/linux-user/mmap.c
> +++ b/linux-user/mmap.c
> @@ -299,20 +299,8 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
>      return true;
>  }
>  
> -#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
> -#ifdef TARGET_AARCH64
> -# define TASK_UNMAPPED_BASE  0x5500000000
> -#else
> -# define TASK_UNMAPPED_BASE  (1ul << 38)
> -#endif
> -#else
> -#ifdef TARGET_HPPA
> -# define TASK_UNMAPPED_BASE  0xfa000000
> -#else
> -# define TASK_UNMAPPED_BASE  0x40000000
> -#endif
> -#endif
> -abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
> +abi_ulong task_unmapped_base;
> +abi_ulong mmap_next_start;

I feel we could help ourselves a bit more by documenting these globals
and what they mean:

  task_unmapped_base represents the start of unmapped memory in the
  guests programs address space. It is generally a function of the size
  of the address space and it defined at the start of execution.
  mmap_next_start is the base address for the next anonymous mmap and is
  increased after each successful map, starting at task_unmapped_base.

One thing I'm slightly confused by is the ELF_ET_DYN_BASE can be above
this (or sometimes the same). Should the mapping of ELF segments be
handled with mmap_next_start? I assume once mmap_next_start meets the
mappings for the ELF segments we skip over until we get to more free
space after the program code?

>  
>  /*
>   * Subroutine of mmap_find_vma, used when we have pre-allocated
> @@ -391,7 +379,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
>  
>              if ((addr & (align - 1)) == 0) {
>                  /* Success.  */
> -                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
> +                if (start == mmap_next_start && addr >= task_unmapped_base) {
>                      mmap_next_start = addr + size;
>                  }
>                  return addr;
Richard Henderson Aug. 8, 2023, 3:16 p.m. UTC | #2
On 8/8/23 02:10, Alex Bennée wrote:
> One thing I'm slightly confused by is the ELF_ET_DYN_BASE can be above
> this (or sometimes the same). Should the mapping of ELF segments be
> handled with mmap_next_start? I assume once mmap_next_start meets the
> mappings for the ELF segments we skip over until we get to more free
> space after the program code?

ELF_ET_DYN_BASE is a hack imported from the kernel to put separation between an ET_DYN 
main binary and TASK_UNMAPPED_BASE, so that the brk can follow the binary and have space 
to grow.

All of this is part of the "legacy" memory layout, for which there is a personality flag.

For 8.2, I think we should work on implementing the "new" memory layout, which places 
everything top-down.  But most importantly it completely separates brk from the binary.


r~
Helge Deller Aug. 8, 2023, 3:35 p.m. UTC | #3
Hi Richard,

On 8/7/23 18:36, Richard Henderson wrote:
> Ensure that the chosen values for mmap_next_start and
> task_unmapped_base are within the guest address space.
>
> Tested-by: Helge Deller <deller@gmx.de>
> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

I've tested this whole series for quite some chroots.
Good thing is, that all targets do run and can execute a static hello-world
program.
So, overall that's good and I think the patch series should go in for 8.1.

Looking at the target's memmap I do see non-optimal heap-addresses for arm and
powerpc.
I know it's not directly related to your patches, but we should later
try to move the heap behind the executables where they can grow bigger.

Helge

armel-chroot  and armhf-chroot:
Linux p100 6.4.6-200.fc38.x86_64 #1 SMP PREEMPT_DYNAMIC Mon Jul 24 20:51:12 UTC 2023 armv7l GNU/Linux
00021000-00042000 rw-p 00000000 00:00 0                                  [heap]
00400000-00408000 r-xp 00000000 fd:00 801471                             /usr/bin/cat
00408000-0041f000 ---p 00000000 00:00 0
0041f000-00420000 r--p 0000f000 fd:00 801471                             /usr/bin/cat
00420000-00421000 rw-p 00010000 fd:00 801471                             /usr/bin/cat
40000000-40001000 ---p 00000000 00:00 0
40001000-40801000 rw-p 00000000 00:00 0                                  [stack]
40801000-40827000 r-xp 00000000 fd:00 839152                             /usr/lib/arm-linux-gnueabi/ld-linux.so.3
40827000-40828000 r--p 00026000 fd:00 839152                             /usr/lib/arm-linux-gnueabi/ld-linux.so.3
40828000-40829000 rw-p 00027000 fd:00 839152                             /usr/lib/arm-linux-gnueabi/ld-linux.so.3
...

powerpc-chroot
Linux p100 6.4.6-200.fc38.x86_64 #1 SMP PREEMPT_DYNAMIC Mon Jul 24 20:51:12 UTC 2023 ppc GNU/Linux
00021000-00043000 rw-p 00000000 00:00 0                                  [heap]
001c0000-003d5000 r-xp 00000000 fd:00 577250                             /usr/lib/powerpc-linux-gnu/libc.so.6
003d5000-003eb000 ---p 00215000 fd:00 577250                             /usr/lib/powerpc-linux-gnu/libc.so.6
003eb000-003f0000 r--p 0021b000 fd:00 577250                             /usr/lib/powerpc-linux-gnu/libc.so.6
003f0000-003f1000 rw-p 00220000 fd:00 577250                             /usr/lib/powerpc-linux-gnu/libc.so.6
003f1000-003fb000 rw-p 00000000 00:00 0
00400000-0040b000 r-xp 00000000 fd:00 535994                             /usr/bin/cat
0040b000-0041f000 ---p 00000000 00:00 0
0041f000-00420000 r--p 0000f000 fd:00 535994                             /usr/bin/cat
00420000-00421000 rw-p 00010000 fd:00 535994                             /usr/bin/cat
40000000-40001000 ---p 00000000 00:00 0
40001000-40801000 rw-p 00000000 00:00 0                                  [stack]
40801000-40834000 r-xp 00000000 fd:00 577246                             /usr/lib/powerpc-linux-gnu/ld.so.1
40834000-4084f000 ---p 00000000 00:00 0
4084f000-40851000 r--p 0003e000 fd:00 577246                             /usr/lib/powerpc-linux-gnu/ld.so.1
40851000-40852000 rw-p 00040000 fd:00 577246                             /usr/lib/powerpc-linux-gnu/ld.so.1
40852000-40853000 r-xp 00000000 00:00 0
40853000-40855000 rw-p 00000000 00:00 0
Alex Bennée Aug. 8, 2023, 4:59 p.m. UTC | #4
Richard Henderson <richard.henderson@linaro.org> writes:

> On 8/8/23 02:10, Alex Bennée wrote:
>> One thing I'm slightly confused by is the ELF_ET_DYN_BASE can be above
>> this (or sometimes the same). Should the mapping of ELF segments be
>> handled with mmap_next_start? I assume once mmap_next_start meets the
>> mappings for the ELF segments we skip over until we get to more free
>> space after the program code?
>
> ELF_ET_DYN_BASE is a hack imported from the kernel to put separation
> between an ET_DYN main binary and TASK_UNMAPPED_BASE, so that the brk
> can follow the binary and have space to grow.

yeach :-/

>
> All of this is part of the "legacy" memory layout, for which there is a personality flag.
>
> For 8.2, I think we should work on implementing the "new" memory
> layout, which places everything top-down.  But most importantly it
> completely separates brk from the binary.

The QEMU brk? The guest will have one emulated for it?

>
>
> r~
Richard Henderson Aug. 8, 2023, 5:40 p.m. UTC | #5
On 8/8/23 09:59, Alex Bennée wrote:
>> All of this is part of the "legacy" memory layout, for which there is a personality flag.
>>
>> For 8.2, I think we should work on implementing the "new" memory
>> layout, which places everything top-down.  But most importantly it
>> completely separates brk from the binary.
> 
> The QEMU brk? The guest will have one emulated for it?

In this context we're talking about guest memory layout.
So there is one brk: the guest one.


r~
diff mbox series

Patch

diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h
index 7265c2c116..fd456e024e 100644
--- a/linux-user/user-mmap.h
+++ b/linux-user/user-mmap.h
@@ -18,6 +18,23 @@ 
 #ifndef LINUX_USER_USER_MMAP_H
 #define LINUX_USER_USER_MMAP_H
 
+#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
+#ifdef TARGET_AARCH64
+# define TASK_UNMAPPED_BASE  0x5500000000
+#else
+# define TASK_UNMAPPED_BASE  (1ul << 38)
+#endif
+#else
+#ifdef TARGET_HPPA
+# define TASK_UNMAPPED_BASE  0xfa000000
+#else
+# define TASK_UNMAPPED_BASE  0x40000000
+#endif
+#endif
+
+extern abi_ulong task_unmapped_base;
+extern abi_ulong mmap_next_start;
+
 int target_mprotect(abi_ulong start, abi_ulong len, int prot);
 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
                      int flags, int fd, off_t offset);
@@ -26,7 +43,6 @@  abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
                        abi_ulong new_size, unsigned long flags,
                        abi_ulong new_addr);
 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice);
-extern abi_ulong mmap_next_start;
 abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
 void mmap_fork_start(void);
 void mmap_fork_end(int child);
diff --git a/linux-user/main.c b/linux-user/main.c
index 556956c363..be621dc792 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -821,6 +821,34 @@  int main(int argc, char **argv, char **envp)
         reserved_va = max_reserved_va;
     }
 
+    /*
+     * Temporarily disable
+     *   "comparison is always false due to limited range of data type"
+     * due to comparison between (possible) uint64_t and uintptr_t.
+     */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+
+    /*
+     * Select an initial value for task_unmapped_base that is in range.
+     */
+    if (reserved_va) {
+        if (TASK_UNMAPPED_BASE < reserved_va) {
+            task_unmapped_base = TASK_UNMAPPED_BASE;
+        } else {
+            /* The most common default formula is TASK_SIZE / 3. */
+            task_unmapped_base = TARGET_PAGE_ALIGN(reserved_va / 3);
+        }
+    } else if (TASK_UNMAPPED_BASE < UINTPTR_MAX) {
+        task_unmapped_base = TASK_UNMAPPED_BASE;
+    } else {
+        /* 32-bit host: pick something medium size. */
+        task_unmapped_base = 0x10000000;
+    }
+    mmap_next_start = task_unmapped_base;
+
+#pragma GCC diagnostic pop
+
     {
         Error *err = NULL;
         if (seed_optarg != NULL) {
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index eb04fab8ab..84436d45c8 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -299,20 +299,8 @@  static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
     return true;
 }
 
-#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
-#ifdef TARGET_AARCH64
-# define TASK_UNMAPPED_BASE  0x5500000000
-#else
-# define TASK_UNMAPPED_BASE  (1ul << 38)
-#endif
-#else
-#ifdef TARGET_HPPA
-# define TASK_UNMAPPED_BASE  0xfa000000
-#else
-# define TASK_UNMAPPED_BASE  0x40000000
-#endif
-#endif
-abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
+abi_ulong task_unmapped_base;
+abi_ulong mmap_next_start;
 
 /*
  * Subroutine of mmap_find_vma, used when we have pre-allocated
@@ -391,7 +379,7 @@  abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
 
             if ((addr & (align - 1)) == 0) {
                 /* Success.  */
-                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
+                if (start == mmap_next_start && addr >= task_unmapped_base) {
                     mmap_next_start = addr + size;
                 }
                 return addr;