diff mbox series

[3/4] tcg/aarch64: Emit BTI insns at jump landing pads

Message ID 20230816142516.469743-4-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg/aarch64: Enable BTI within the JIT | expand

Commit Message

Richard Henderson Aug. 16, 2023, 2:25 p.m. UTC
The prologue is entered via "call"; the epilogue, each tb,
and each goto_tb continuation point are all reached via "jump".

As tcg_out_goto_long is only used by tcg_out_exit_tb, merge
the two functions.  Change the indirect register used to
TCG_REG_TMP1, aka X16, so that the BTI condition created
is "jump" instead of "jump or call".

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.c.inc | 49 +++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 15 deletions(-)

Comments

Peter Maydell Sept. 12, 2023, 4:23 p.m. UTC | #1
On Wed, 16 Aug 2023 at 15:27, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The prologue is entered via "call"; the epilogue, each tb,
> and each goto_tb continuation point are all reached via "jump".
>
> As tcg_out_goto_long is only used by tcg_out_exit_tb, merge
> the two functions.  Change the indirect register used to
> TCG_REG_TMP1, aka X16, so that the BTI condition created
> is "jump" instead of "jump or call".

TCG_REG_TMP1 is X17, not X16...

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

>  static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
>  {
> +    const tcg_insn_unit *target;
> +    ptrdiff_t offset;
> +
>      /* Reuse the zeroing that exists for goto_ptr.  */
>      if (a0 == 0) {
> -        tcg_out_goto_long(s, tcg_code_gen_epilogue);
> +        target = tcg_code_gen_epilogue;
>      } else {
>          tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
> -        tcg_out_goto_long(s, tb_ret_addr);
> +        target = tb_ret_addr;
> +    }
> +
> +    offset = tcg_pcrel_diff(s, target) >> 2;
> +    if (offset == sextract64(offset, 0, 26)) {
> +        tcg_out_insn(s, 3206, B, offset);
> +    } else {
> +        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP1, (intptr_t)target);
> +        tcg_out_insn(s, 3207, BR, TCG_REG_TMP1);

Since it's now important that the tempreg we have here is
one of X16/X17 in order to get the right BTI behaviour,
I think a build-time assert of this would be helpful.
That will catch the possibility that we forget about this
and decide to rearrange which registers we use as
tempregs later.

>      }
>  }
>
> @@ -1970,6 +1984,7 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
>      tcg_out32(s, I3206_B);
>      tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
>      set_jmp_reset_offset(s, which);
> +    tcg_out_bti(s, BTI_J);
>  }

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

"Did we miss anywhere that should have had a BTI insn"
is a bit tricky to review, but I assume you've done enough
testing on a BTI-enabled host to catch that.

thanks
-- PMM
diff mbox series

Patch

diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 8d71ac68f6..fca5baea57 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -602,6 +602,10 @@  typedef enum {
     DMB_ISH         = 0xd50338bf,
     DMB_LD          = 0x00000100,
     DMB_ST          = 0x00000200,
+
+    BTI_C           = 0xd503245f,
+    BTI_J           = 0xd503249f,
+    BTI_JC          = 0xd50324df,
 } AArch64Insn;
 
 static inline uint32_t tcg_in32(TCGContext *s)
@@ -843,6 +847,17 @@  static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
               | rn << 5 | (rd & 0x1f));
 }
 
+static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
+{
+    /*
+     * While BTI insns are nops on hosts without FEAT_BTI,
+     * there is no point in emitting them in that case either.
+     */
+    if (cpuinfo & CPUINFO_BTI) {
+        tcg_out32(s, insn);
+    }
+}
+
 /* Register to register move using ORR (shifted register with no shift). */
 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 {
@@ -1351,18 +1366,6 @@  static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
     tcg_out_insn(s, 3206, B, offset);
 }
 
-static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
-{
-    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
-    if (offset == sextract64(offset, 0, 26)) {
-        tcg_out_insn(s, 3206, B, offset);
-    } else {
-        /* Choose X9 as a call-clobbered non-LR temporary. */
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
-        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
-    }
-}
-
 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
 {
     ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
@@ -1947,12 +1950,23 @@  static const tcg_insn_unit *tb_ret_addr;
 
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
 {
+    const tcg_insn_unit *target;
+    ptrdiff_t offset;
+
     /* Reuse the zeroing that exists for goto_ptr.  */
     if (a0 == 0) {
-        tcg_out_goto_long(s, tcg_code_gen_epilogue);
+        target = tcg_code_gen_epilogue;
     } else {
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
-        tcg_out_goto_long(s, tb_ret_addr);
+        target = tb_ret_addr;
+    }
+
+    offset = tcg_pcrel_diff(s, target) >> 2;
+    if (offset == sextract64(offset, 0, 26)) {
+        tcg_out_insn(s, 3206, B, offset);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP1, (intptr_t)target);
+        tcg_out_insn(s, 3207, BR, TCG_REG_TMP1);
     }
 }
 
@@ -1970,6 +1984,7 @@  static void tcg_out_goto_tb(TCGContext *s, int which)
     tcg_out32(s, I3206_B);
     tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
     set_jmp_reset_offset(s, which);
+    tcg_out_bti(s, BTI_J);
 }
 
 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
@@ -3062,6 +3077,8 @@  static void tcg_target_qemu_prologue(TCGContext *s)
 {
     TCGReg r;
 
+    tcg_out_bti(s, BTI_C);
+
     /* Push (FP, LR) and allocate space for all saved registers.  */
     tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
                  TCG_REG_SP, -PUSH_SIZE, 1, 1);
@@ -3102,10 +3119,12 @@  static void tcg_target_qemu_prologue(TCGContext *s)
      * and fall through to the rest of the epilogue.
      */
     tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+    tcg_out_bti(s, BTI_J);
     tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
 
     /* TB epilogue */
     tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+    tcg_out_bti(s, BTI_J);
 
     /* Remove TCG locals stack space.  */
     tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
@@ -3125,7 +3144,7 @@  static void tcg_target_qemu_prologue(TCGContext *s)
 
 static void tcg_out_tb_start(TCGContext *s)
 {
-    /* nothing to do */
+    tcg_out_bti(s, BTI_J);
 }
 
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)