diff mbox

[02/13] target-arm: A64: add support for logical (shifted register)

Message ID 1386280289-27636-3-git-send-email-peter.maydell@linaro.org
State Superseded
Headers show

Commit Message

Peter Maydell Dec. 5, 2013, 9:51 p.m. UTC
From: Alexander Graf <agraf@suse.de>

Add support for the instructions described in "C3.5.10 Logical
(shifted register)".

We store the flags in the same locations as the 32 bit decoder.
This is slightly awkward when calculating 64 bit results, but seems
a better tradeoff than having to rework the whole 32 bit decoder
and also make 32 bit result calculation in A64 awkward.

Signed-off-by: Alexander Graf <agraf@suse.de>
[claudio: some refactoring to avoid hidden allocation of temps,
	  rework flags, use enums for shift types,
	  renaming of functions]
Signed-off-by: Claudio Fontana <claudio.fontana@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/translate-a64.c |  170 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 164 insertions(+), 6 deletions(-)

Comments

Richard Henderson Dec. 5, 2013, 10:39 p.m. UTC | #1
On 12/06/2013 10:51 AM, Peter Maydell wrote:
> +    if (invert) {
> +        tcg_gen_not_i64(tcg_rm, tcg_rm);
> +    }
> +
> +    tcg_rd = cpu_reg(s, rd);
> +    tcg_rn = cpu_reg(s, rn);
> +
> +    switch (opc) {
> +    case 0: /* AND, BIC */
> +    case 3: /* ANDS, BICS */
> +        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
> +        break;
> +    case 1: /* ORR, ORN */
> +        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
> +        break;
> +    case 2: /* EOR, EON */
> +        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
> +        break;
> +    default:
> +        assert(FALSE);
> +        break;
> +    }

While correct, surely better to work with tcg and select on opc:invert to
generate andc/orc/eqv?

Also, isn't MOV (register) canonical for ORR (rn=31 && shift_amount=0), and MVN
(register) canonical for ORN (rn=31 && shift_amount=0), and both therefore also
worth a special case?


r~
Alex Bennée Dec. 6, 2013, 9:36 a.m. UTC | #2
rth@twiddle.net writes:

> On 12/06/2013 10:51 AM, Peter Maydell wrote:
>> +    if (invert) {
>> +        tcg_gen_not_i64(tcg_rm, tcg_rm);
>> +    }
>> +
>> +    tcg_rd = cpu_reg(s, rd);
>> +    tcg_rn = cpu_reg(s, rn);
>> +
>> +    switch (opc) {
>> +    case 0: /* AND, BIC */
>> +    case 3: /* ANDS, BICS */
>> +        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
>> +        break;
>> +    case 1: /* ORR, ORN */
>> +        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
>> +        break;
>> +    case 2: /* EOR, EON */
>> +        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
>> +        break;
>> +    default:
>> +        assert(FALSE);
>> +        break;
>> +    }
>
> While correct, surely better to work with tcg and select on opc:invert to
> generate andc/orc/eqv?

Shouldn't the TCG optimiser/back-end just be smart enough to figure it
out? It seems clearer to express the tcg ops in terms of the front-end's
meaning?

> Also, isn't MOV (register) canonical for ORR (rn=31 && shift_amount=0), and MVN
> (register) canonical for ORN (rn=31 && shift_amount=0), and both therefore also
> worth a special case?

I suspect I'm being overly cheeky to expect the optimiser to detect and
optimise for that case as the ZR is a const ;-)


Cheers,

--
Alex Bennée
QEMU/KVM Hacker for Linaro
Richard Henderson Dec. 6, 2013, 4:49 p.m. UTC | #3
On 12/06/2013 10:36 PM, Alex Bennée wrote:
>> While correct, surely better to work with tcg and select on opc:invert to
>> generate andc/orc/eqv?
> 
> Shouldn't the TCG optimiser/back-end just be smart enough to figure it
> out? It seems clearer to express the tcg ops in terms of the front-end's
> meaning?

No, the TCG optimizer is really quite stupid.  It only does constant folding
and dead code elimination.  No peepholing or combination sorts of opts.

>> Also, isn't MOV (register) canonical for ORR (rn=31 && shift_amount=0), and MVN
>> (register) canonical for ORN (rn=31 && shift_amount=0), and both therefore also
>> worth a special case?
> 
> I suspect I'm being overly cheeky to expect the optimiser to detect and
> optimise for that case as the ZR is a const ;-)

It would.  But since register-register move is a rather common operation, it
will pay off to not require the optimizer to clean up that special case.

Thus I only recommend special casing the official aliases, not any operation
that could mathematically be considered an identity.


r~
diff mbox

Patch

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index a9c1803..8a13806 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -36,7 +36,7 @@ 
 
 static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
-static TCGv_i32 pstate;
+static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 
 static const char *regnames[] = {
     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
@@ -45,6 +45,13 @@  static const char *regnames[] = {
     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
 };
 
+enum a64_shift_type {
+    A64_SHIFT_TYPE_LSL = 0,
+    A64_SHIFT_TYPE_LSR = 1,
+    A64_SHIFT_TYPE_ASR = 2,
+    A64_SHIFT_TYPE_ROR = 3
+};
+
 /* initialize TCG globals.  */
 void a64_translate_init(void)
 {
@@ -59,9 +66,10 @@  void a64_translate_init(void)
                                           regnames[i]);
     }
 
-    pstate = tcg_global_mem_new_i32(TCG_AREG0,
-                                    offsetof(CPUARMState, pstate),
-                                    "pstate");
+    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
+    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
+    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
+    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
 }
 
 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
@@ -221,6 +229,33 @@  static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
     return v;
 }
 
+/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
+ * than the 32 bit equivalent.
+ */
+static inline void gen_set_NZ64(TCGv_i64 result)
+{
+    TCGv_i64 flag = tcg_temp_new_i64();
+
+    tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
+    tcg_gen_trunc_i64_i32(cpu_ZF, flag);
+    tcg_gen_shri_i64(flag, result, 32);
+    tcg_gen_trunc_i64_i32(cpu_NF, flag);
+    tcg_temp_free_i64(flag);
+}
+
+/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
+static inline void gen_logic_CC(int sf, TCGv_i64 result)
+{
+    if (sf) {
+        gen_set_NZ64(result);
+    } else {
+        tcg_gen_trunc_i64_i32(cpu_ZF, result);
+        tcg_gen_trunc_i64_i32(cpu_NF, result);
+    }
+    tcg_gen_movi_i32(cpu_CF, 0);
+    tcg_gen_movi_i32(cpu_VF, 0);
+}
+
 /*
  * the instruction disassembly implemented here matches
  * the instruction encoding classifications in chapter 3 (C3)
@@ -682,10 +717,133 @@  static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
     }
 }
 
-/* Logical (shifted register) */
+/* Shift a TCGv src by TCGv shift_amount, put result in dst.
+ * Note that it is the caller's responsibility to ensure that the
+ * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
+ * mandated semantics for out of range shifts.
+ */
+static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
+                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
+{
+    switch (shift_type) {
+    case A64_SHIFT_TYPE_LSL:
+        tcg_gen_shl_i64(dst, src, shift_amount);
+        break;
+    case A64_SHIFT_TYPE_LSR:
+        tcg_gen_shr_i64(dst, src, shift_amount);
+        break;
+    case A64_SHIFT_TYPE_ASR:
+        if (!sf) {
+            tcg_gen_ext32s_i64(dst, src);
+        }
+        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
+        break;
+    case A64_SHIFT_TYPE_ROR:
+        if (sf) {
+            tcg_gen_rotr_i64(dst, src, shift_amount);
+        } else {
+            TCGv_i32 t0, t1;
+            t0 = tcg_temp_new_i32();
+            t1 = tcg_temp_new_i32();
+            tcg_gen_trunc_i64_i32(t0, src);
+            tcg_gen_trunc_i64_i32(t1, shift_amount);
+            tcg_gen_rotr_i32(t0, t0, t1);
+            tcg_gen_extu_i32_i64(dst, t0);
+            tcg_temp_free_i32(t0);
+            tcg_temp_free_i32(t1);
+        }
+        break;
+    default:
+        assert(FALSE); /* all shift types should be handled */
+        break;
+    }
+
+    if (!sf) { /* zero extend final result */
+        tcg_gen_ext32u_i64(dst, dst);
+    }
+}
+
+/* Shift a TCGv src by immediate, put result in dst.
+ * The shift amount must be in range (this should always be true as the
+ * relevant instructions will UNDEF on bad shift immediates).
+ */
+static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
+                          enum a64_shift_type shift_type, unsigned int shift_i)
+{
+    assert(shift_i < (sf ? 64 : 32));
+
+    if (shift_i == 0) {
+        tcg_gen_mov_i64(dst, src);
+    } else {
+        TCGv_i64 shift_const;
+
+        shift_const = tcg_const_i64(shift_i);
+        shift_reg(dst, src, sf, shift_type, shift_const);
+        tcg_temp_free_i64(shift_const);
+    }
+}
+
+/* C3.5.10 Logical (shifted register)
+ *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
+ * +----+-----+-----------+-------+---+------+--------+------+------+
+ * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
+ * +----+-----+-----------+-------+---+------+--------+------+------+
+ */
 static void disas_logic_reg(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
+    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
+
+    sf = extract32(insn, 31, 1);
+    opc = extract32(insn, 29, 2);
+    shift_type = extract32(insn, 22, 2);
+    invert = extract32(insn, 21, 1);
+    rm = extract32(insn, 16, 5);
+    shift_amount = extract32(insn, 10, 6);
+    rn = extract32(insn, 5, 5);
+    rd = extract32(insn, 0, 5);
+
+    if (!sf && (shift_amount & (1 << 5))) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_rm = read_cpu_reg(s, rm, sf);
+
+    if (shift_amount) {
+        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
+    }
+
+    if (invert) {
+        tcg_gen_not_i64(tcg_rm, tcg_rm);
+    }
+
+    tcg_rd = cpu_reg(s, rd);
+    tcg_rn = cpu_reg(s, rn);
+
+    switch (opc) {
+    case 0: /* AND, BIC */
+    case 3: /* ANDS, BICS */
+        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
+        break;
+    case 1: /* ORR, ORN */
+        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
+        break;
+    case 2: /* EOR, EON */
+        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
+        break;
+    default:
+        assert(FALSE);
+        break;
+    }
+
+    if (!sf) {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+    }
+
+    if (opc == 3) {
+        gen_logic_CC(sf, tcg_rd);
+    }
 }
 
 /* Add/subtract (extended register) */