diff mbox

ARM - Remove fixed map code buffer restriction

Message ID 20111212153730.GA9583@davesworkthinkpad
State Accepted
Headers show

Commit Message

Dr. David Alan Gilbert Dec. 12, 2011, 3:37 p.m. UTC
On ARM, don't map the code buffer at a fixed location, and fix up the
call/goto tcg routines to let it do long jumps.

Mapping the code buffer at a fixed address could sometimes result in it being
mapped over the top of the heap with pretty random results.

This diff is against v1.0.

Signed-off-by: Dr. David Alan Gilbert <david.gilbert@linaro.org>
---
 exec.c               |    4 +---
 tcg/arm/tcg-target.c |   31 ++++++++++++-------------------
 2 files changed, 13 insertions(+), 22 deletions(-)

Comments

Peter Maydell Dec. 12, 2011, 3:55 p.m. UTC | #1
CC'ing Andrzej, who is the tcg/arm maintainer.

On 12 December 2011 15:37, Dr. David Alan Gilbert
<david.gilbert@linaro.org> wrote:
> On ARM, don't map the code buffer at a fixed location, and fix up the
> call/goto tcg routines to let it do long jumps.
>
> Mapping the code buffer at a fixed address could sometimes result in it being
> mapped over the top of the heap with pretty random results.
>
> This diff is against v1.0.

Patches should always be against master, although we're still
close enough to 1.0 that this will probably apply anyway.

Comments like "This diff is against v1.0." go below the '---' so they
don't appear in the final commit log.

Code generally looks OK to me.

> Signed-off-by: Dr. David Alan Gilbert <david.gilbert@linaro.org>
> ---
>  exec.c               |    4 +---
>  tcg/arm/tcg-target.c |   31 ++++++++++++-------------------
>  2 files changed, 13 insertions(+), 22 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 6b92198..ef83da1 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -497,9 +497,7 @@ static void code_gen_alloc(unsigned long tb_size)
>         if (code_gen_buffer_size > (512 * 1024 * 1024))
>             code_gen_buffer_size = (512 * 1024 * 1024);
>  #elif defined(__arm__)
> -        /* Map the buffer below 32M, so we can use direct calls and branches */
> -        flags |= MAP_FIXED;
> -        start = (void *) 0x01000000UL;
> +        /* Keep the buffer no bigger than 16GB to branch between blocks */
>         if (code_gen_buffer_size > 16 * 1024 * 1024)
>             code_gen_buffer_size = 16 * 1024 * 1024;
>  #elif defined(__s390x__)
> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
> index e05a64f..730d913 100644
> --- a/tcg/arm/tcg-target.c
> +++ b/tcg/arm/tcg-target.c
> @@ -842,6 +842,12 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
>         tcg_out_st8_12(s, cond, rd, rn, offset);
>  }
>
> +/* The _goto case is normally between TBs within the same code buffer,
> +   and with the code buffer limited to 16GB we shouldn't need the long
> +   case.
> +
> +   .... except to the prologue that is in its own buffer.
> +     */
>  static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
>  {
>     int32_t val;
> @@ -855,22 +861,20 @@ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
>     if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd)
>         tcg_out_b(s, cond, val);
>     else {
> -#if 1
> -        tcg_abort();
> -#else
>         if (cond == COND_AL) {
>             tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
> -            tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */
> +            tcg_out32(s, addr);

I see these XXXs have been there since the ARM tcg target was
introduced. Does anybody know what they were referring to? Andrzej?

>         } else {
>             tcg_out_movi32(s, cond, TCG_REG_R8, val - 8);
>             tcg_out_dat_reg(s, cond, ARITH_ADD,
>                             TCG_REG_PC, TCG_REG_PC,
>                             TCG_REG_R8, SHIFT_IMM_LSL(0));
>         }
> -#endif
>     }
>  }
>
> +/* The call case is mostly used for helpers - so it's not unreasonable
> +   for them to be beyond branch range */
>  static inline void tcg_out_call(TCGContext *s, uint32_t addr)
>  {
>     int32_t val;
> @@ -887,20 +891,9 @@ static inline void tcg_out_call(TCGContext *s, uint32_t addr)
>             tcg_out_bl(s, COND_AL, val);
>         }
>     } else {
> -#if 1
> -        tcg_abort();
> -#else
> -        if (cond == COND_AL) {
> -            tcg_out_dat_imm(s, cond, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
> -            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
> -            tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */
> -        } else {
> -            tcg_out_movi32(s, cond, TCG_REG_R9, addr);
> -            tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0,
> -                            TCG_REG_PC, SHIFT_IMM_LSL(0));
> -            tcg_out_bx(s, cond, TCG_REG_R9);
> -        }
> -#endif
> +        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
> +        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
> +        tcg_out32(s, addr);
>     }
>  }
>

-- PMM
andrzej zaborowski Dec. 12, 2011, 5:24 p.m. UTC | #2
Hi,

On 12 December 2011 16:55, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 12 December 2011 15:37, Dr. David Alan Gilbert
> <david.gilbert@linaro.org> wrote:
>> On ARM, don't map the code buffer at a fixed location, and fix up the
>> call/goto tcg routines to let it do long jumps.
>>
>> Mapping the code buffer at a fixed address could sometimes result in it being
>> mapped over the top of the heap with pretty random results.
>>
>> This diff is against v1.0.
>
> Patches should always be against master, although we're still
> close enough to 1.0 that this will probably apply anyway.
>
> Comments like "This diff is against v1.0." go below the '---' so they
> don't appear in the final commit log.
>
> Code generally looks OK to me.

To me as well, I'll apply if there are no objections.  I remember
Peter asked about MAP_FIXED on other hosts, has there been a
resolution?

>
>> Signed-off-by: Dr. David Alan Gilbert <david.gilbert@linaro.org>
>> ---
>>  exec.c               |    4 +---
>>  tcg/arm/tcg-target.c |   31 ++++++++++++-------------------
>>  2 files changed, 13 insertions(+), 22 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 6b92198..ef83da1 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -497,9 +497,7 @@ static void code_gen_alloc(unsigned long tb_size)
>>         if (code_gen_buffer_size > (512 * 1024 * 1024))
>>             code_gen_buffer_size = (512 * 1024 * 1024);
>>  #elif defined(__arm__)
>> -        /* Map the buffer below 32M, so we can use direct calls and branches */
>> -        flags |= MAP_FIXED;
>> -        start = (void *) 0x01000000UL;
>> +        /* Keep the buffer no bigger than 16GB to branch between blocks */
>>         if (code_gen_buffer_size > 16 * 1024 * 1024)
>>             code_gen_buffer_size = 16 * 1024 * 1024;
>>  #elif defined(__s390x__)
>> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
>> index e05a64f..730d913 100644
>> --- a/tcg/arm/tcg-target.c
>> +++ b/tcg/arm/tcg-target.c
>> @@ -842,6 +842,12 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
>>         tcg_out_st8_12(s, cond, rd, rn, offset);
>>  }
>>
>> +/* The _goto case is normally between TBs within the same code buffer,
>> +   and with the code buffer limited to 16GB we shouldn't need the long
>> +   case.
>> +
>> +   .... except to the prologue that is in its own buffer.
>> +     */
>>  static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
>>  {
>>     int32_t val;
>> @@ -855,22 +861,20 @@ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
>>     if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd)
>>         tcg_out_b(s, cond, val);
>>     else {
>> -#if 1
>> -        tcg_abort();
>> -#else
>>         if (cond == COND_AL) {
>>             tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
>> -            tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */
>> +            tcg_out32(s, addr);
>
> I see these XXXs have been there since the ARM tcg target was
> introduced. Does anybody know what they were referring to? Andrzej?

David asked me about them, but I couldn't figure it out. (the part in
tcg_out_call apparently is just copy&pasted from tcg_ouy_goto)

BTW: I think we can also use the "ld" branch when we see the goto
target is in Thumb mode.

Cheers
Peter Maydell Dec. 12, 2011, 6:03 p.m. UTC | #3
On 12 December 2011 17:24, andrzej zaborowski <balrogg@gmail.com> wrote:
> BTW: I think we can also use the "ld" branch when we see the goto
> target is in Thumb mode.

The target of a goto is currently never Thumb (because gotos are
always to other TCG generated code and we only generate ARM insns).
If we did need to be able to do a goto-Thumb we'd want to support
the short-range version too. But I think we might as well leave it
until we actually need it.

-- PMM
andrzej zaborowski Dec. 12, 2011, 6:10 p.m. UTC | #4
On 12 December 2011 19:03, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 12 December 2011 17:24, andrzej zaborowski <balrogg@gmail.com> wrote:
>> BTW: I think we can also use the "ld" branch when we see the goto
>> target is in Thumb mode.
>
> The target of a goto is currently never Thumb (because gotos are
> always to other TCG generated code and we only generate ARM insns).

I'm aware of that, I just like functions that can do what their name
says well. :)

Cheers
Dr. David Alan Gilbert Dec. 12, 2011, 6:17 p.m. UTC | #5
On 12 December 2011 18:10, andrzej zaborowski <balrogg@gmail.com> wrote:
> On 12 December 2011 19:03, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 12 December 2011 17:24, andrzej zaborowski <balrogg@gmail.com> wrote:
>>> BTW: I think we can also use the "ld" branch when we see the goto
>>> target is in Thumb mode.
>>
>> The target of a goto is currently never Thumb (because gotos are
>> always to other TCG generated code and we only generate ARM insns).
>
> I'm aware of that, I just like functions that can do what their name
> says well. :)

It does have an assert which will catch it if you try, so no one
should get caught out
by it, and on ARMv7 the add is apparently an interworking branch, so I
think it might
even work.

Dave
andrzej zaborowski Dec. 14, 2011, 8:40 p.m. UTC | #6
On 12 December 2011 16:37, Dr. David Alan Gilbert
<david.gilbert@linaro.org> wrote:
> On ARM, don't map the code buffer at a fixed location, and fix up the
> call/goto tcg routines to let it do long jumps.
>
> Mapping the code buffer at a fixed address could sometimes result in it being
> mapped over the top of the heap with pretty random results.
>
> This diff is against v1.0.
>
> Signed-off-by: Dr. David Alan Gilbert <david.gilbert@linaro.org>
> ---
>  exec.c               |    4 +---
>  tcg/arm/tcg-target.c |   31 ++++++++++++-------------------
>  2 files changed, 13 insertions(+), 22 deletions(-)

Thanks, I applied this patch.

Cheers
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 6b92198..ef83da1 100644
--- a/exec.c
+++ b/exec.c
@@ -497,9 +497,7 @@  static void code_gen_alloc(unsigned long tb_size)
         if (code_gen_buffer_size > (512 * 1024 * 1024))
             code_gen_buffer_size = (512 * 1024 * 1024);
 #elif defined(__arm__)
-        /* Map the buffer below 32M, so we can use direct calls and branches */
-        flags |= MAP_FIXED;
-        start = (void *) 0x01000000UL;
+        /* Keep the buffer no bigger than 16GB to branch between blocks */
         if (code_gen_buffer_size > 16 * 1024 * 1024)
             code_gen_buffer_size = 16 * 1024 * 1024;
 #elif defined(__s390x__)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index e05a64f..730d913 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -842,6 +842,12 @@  static inline void tcg_out_st8(TCGContext *s, int cond,
         tcg_out_st8_12(s, cond, rd, rn, offset);
 }
 
+/* The _goto case is normally between TBs within the same code buffer,
+   and with the code buffer limited to 16GB we shouldn't need the long
+   case.
+
+   .... except to the prologue that is in its own buffer.
+     */
 static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
 {
     int32_t val;
@@ -855,22 +861,20 @@  static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
     if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd)
         tcg_out_b(s, cond, val);
     else {
-#if 1
-        tcg_abort();
-#else
         if (cond == COND_AL) {
             tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
-            tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */
+            tcg_out32(s, addr);
         } else {
             tcg_out_movi32(s, cond, TCG_REG_R8, val - 8);
             tcg_out_dat_reg(s, cond, ARITH_ADD,
                             TCG_REG_PC, TCG_REG_PC,
                             TCG_REG_R8, SHIFT_IMM_LSL(0));
         }
-#endif
     }
 }
 
+/* The call case is mostly used for helpers - so it's not unreasonable
+   for them to be beyond branch range */
 static inline void tcg_out_call(TCGContext *s, uint32_t addr)
 {
     int32_t val;
@@ -887,20 +891,9 @@  static inline void tcg_out_call(TCGContext *s, uint32_t addr)
             tcg_out_bl(s, COND_AL, val);
         }
     } else {
-#if 1
-        tcg_abort();
-#else
-        if (cond == COND_AL) {
-            tcg_out_dat_imm(s, cond, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
-            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
-            tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */
-        } else {
-            tcg_out_movi32(s, cond, TCG_REG_R9, addr);
-            tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0,
-                            TCG_REG_PC, SHIFT_IMM_LSL(0));
-            tcg_out_bx(s, cond, TCG_REG_R9);
-        }
-#endif
+        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
+        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
+        tcg_out32(s, addr);
     }
 }