diff mbox series

[v3,06/14] tcg/arm: Support unaligned access for softmmu

Message ID 20210818212912.396794-7-richard.henderson@linaro.org
State New
Headers show
Series tcg/arm: Unaligned access and other cleanup | expand

Commit Message

Richard Henderson Aug. 18, 2021, 9:29 p.m. UTC
From armv6, the architecture supports unaligned accesses.
All we need to do is perform the correct alignment check
in tcg_out_tlb_read and not use LDRD/STRD when the access
is not aligned.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/arm/tcg-target.c.inc | 69 ++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 31 deletions(-)

-- 
2.25.1

Comments

Peter Maydell Aug. 20, 2021, 1:34 p.m. UTC | #1
On Wed, 18 Aug 2021 at 22:32, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> From armv6, the architecture supports unaligned accesses.

> All we need to do is perform the correct alignment check

> in tcg_out_tlb_read and not use LDRD/STRD when the access

> is not aligned.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> @@ -1578,27 +1576,32 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,

>

>      /*

>       * Check alignment, check comparators.

> -     * Do this in no more than 3 insns.  Use MOVW for v7, if possible,

> +     * Do this in 2-4 insns.  Use MOVW for v7, if possible,

>       * to reduce the number of sequential conditional instructions.

>       * Almost all guests have at least 4k pages, which means that we need

>       * to clear at least 9 bits even for an 8-byte memory, which means it

>       * isn't worth checking for an immediate operand for BIC.

>       */

> +    /* For unaligned accesses, test the page of the last byte. */

> +    t_addr = addrlo;

> +    if (a_mask < s_mask) {

> +        t_addr = TCG_REG_R0;

> +        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,

> +                        addrlo, s_mask - a_mask);

> +    }


I don't understand what this comment means or why we're doing the
addition. If we know we need to check eg whether the address is 2-aligned,
why aren't we just checking whether it's 2-aligned ? Could you
expand on the explanation a bit?


thanks
-- PMM
Richard Henderson Aug. 20, 2021, 5:19 p.m. UTC | #2
On 8/20/21 3:34 AM, Peter Maydell wrote:
> On Wed, 18 Aug 2021 at 22:32, Richard Henderson

> <richard.henderson@linaro.org> wrote:

>>

>>  From armv6, the architecture supports unaligned accesses.

>> All we need to do is perform the correct alignment check

>> in tcg_out_tlb_read and not use LDRD/STRD when the access

>> is not aligned.

>>

>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

>> @@ -1578,27 +1576,32 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,

>>

>>       /*

>>        * Check alignment, check comparators.

>> -     * Do this in no more than 3 insns.  Use MOVW for v7, if possible,

>> +     * Do this in 2-4 insns.  Use MOVW for v7, if possible,

>>        * to reduce the number of sequential conditional instructions.

>>        * Almost all guests have at least 4k pages, which means that we need

>>        * to clear at least 9 bits even for an 8-byte memory, which means it

>>        * isn't worth checking for an immediate operand for BIC.

>>        */

>> +    /* For unaligned accesses, test the page of the last byte. */

>> +    t_addr = addrlo;

>> +    if (a_mask < s_mask) {

>> +        t_addr = TCG_REG_R0;

>> +        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,

>> +                        addrlo, s_mask - a_mask);

>> +    }

> 

> I don't understand what this comment means or why we're doing the

> addition. If we know we need to check eg whether the address is 2-aligned,

> why aren't we just checking whether it's 2-aligned ? Could you

> expand on the explanation a bit?


We want to detect the page crossing case of a misaligned access.

We began computing the softtlb data with the address of the start access (addrlo).  We 
then compute the address of the last (aligned) portion of the access.  For a 4-byte access 
that is 1-byte aligned, we add 3 - 0 = 3, finding the last byte; for a 2-byte aligned 
access we add 3 - 1 = 2; for a 4-byte aligned access we (logically) add 3 - 3 = 0.

This second quantity retains the alignment we need to test and also rolls over to the next 
page iff the access does. When we compare against the comparator in the tlb, a bit set 
within the alignment will cause failure as will a differing page number.


r~
diff mbox series

Patch

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 0c7e4f8411..c55167cc84 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -34,13 +34,6 @@  bool use_idiv_instructions;
 bool use_neon_instructions;
 #endif
 
-/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
-#ifdef CONFIG_SOFTMMU
-# define USING_SOFTMMU 1
-#else
-# define USING_SOFTMMU 0
-#endif
-
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
@@ -1526,15 +1519,20 @@  static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
     int fast_off = TLB_MASK_TABLE_OFS(mem_index);
     int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
     int table_off = fast_off + offsetof(CPUTLBDescFast, table);
-    unsigned s_bits = opc & MO_SIZE;
-    unsigned a_bits = get_alignment_bits(opc);
+    unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
+    unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
+    TCGReg t_addr;
 
     /*
-     * We don't support inline unaligned acceses, but we can easily
-     * support overalignment checks.
+     * For v7, support for unaligned accesses is mandatory.
+     * For v6, support for unaligned accesses is enabled by SCTLR.U,
+     *     which is enabled by (at least) Linux and FreeBSD.
+     * For v4 and v5, unaligned accesses are... complicated, and
+     *     unhelped by Linux having a global not per-process flag
+     *     for unaligned handling.
      */
-    if (a_bits < s_bits) {
-        a_bits = s_bits;
+    if (!use_armv6_instructions && a_mask < s_mask) {
+        a_mask = s_mask;
     }
 
     /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}.  */
@@ -1578,27 +1576,32 @@  static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
 
     /*
      * Check alignment, check comparators.
-     * Do this in no more than 3 insns.  Use MOVW for v7, if possible,
+     * Do this in 2-4 insns.  Use MOVW for v7, if possible,
      * to reduce the number of sequential conditional instructions.
      * Almost all guests have at least 4k pages, which means that we need
      * to clear at least 9 bits even for an 8-byte memory, which means it
      * isn't worth checking for an immediate operand for BIC.
      */
+    /* For unaligned accesses, test the page of the last byte. */
+    t_addr = addrlo;
+    if (a_mask < s_mask) {
+        t_addr = TCG_REG_R0;
+        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
+                        addrlo, s_mask - a_mask);
+    }
     if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
-        tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
-
-        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
+        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
         tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
-                        addrlo, TCG_REG_TMP, 0);
+                        t_addr, TCG_REG_TMP, 0);
         tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
     } else {
-        if (a_bits) {
-            tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
-                            (1 << a_bits) - 1);
+        if (a_mask) {
+            tcg_debug_assert(a_mask <= 0xff);
+            tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
         }
-        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo,
+        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
                         SHIFT_IMM_LSR(TARGET_PAGE_BITS));
-        tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
+        tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
                         0, TCG_REG_R2, TCG_REG_TMP,
                         SHIFT_IMM_LSL(TARGET_PAGE_BITS));
     }
@@ -1763,8 +1766,9 @@  static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
         tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
         break;
     case MO_Q:
-        /* Avoid ldrd for user-only emulation, to handle unaligned.  */
-        if (USING_SOFTMMU && use_armv6_instructions
+        /* LDRD requires alignment; double-check that. */
+        if (use_armv6_instructions
+            && get_alignment_bits(opc) >= MO_64
             && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
         } else if (datalo != addend) {
@@ -1806,8 +1810,9 @@  static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc,
         tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_Q:
-        /* Avoid ldrd for user-only emulation, to handle unaligned.  */
-        if (USING_SOFTMMU && use_armv6_instructions
+        /* LDRD requires alignment; double-check that. */
+        if (use_armv6_instructions
+            && get_alignment_bits(opc) >= MO_64
             && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
         } else if (datalo == addrlo) {
@@ -1882,8 +1887,9 @@  static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, MemOp opc,
         tcg_out_st32_r(s, cond, datalo, addrlo, addend);
         break;
     case MO_64:
-        /* Avoid strd for user-only emulation, to handle unaligned.  */
-        if (USING_SOFTMMU && use_armv6_instructions
+        /* STRD requires alignment; double-check that. */
+        if (use_armv6_instructions
+            && get_alignment_bits(opc) >= MO_64
             && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_strd_r(s, cond, datalo, addrlo, addend);
         } else {
@@ -1914,8 +1920,9 @@  static inline void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc,
         tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_64:
-        /* Avoid strd for user-only emulation, to handle unaligned.  */
-        if (USING_SOFTMMU && use_armv6_instructions
+        /* STRD requires alignment; double-check that. */
+        if (use_armv6_instructions
+            && get_alignment_bits(opc) >= MO_64
             && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
         } else {