@@ -93,9 +93,16 @@ static void arm_cpu_reset(CPUState *s)
env->pstate = PSTATE_MODE_EL0t;
/* Userspace expects access to CTL_EL0 and the cache ops */
env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI;
+ /* and to the FP/Neon instructions */
+ env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 2, 3);
#else
env->pstate = PSTATE_MODE_EL1h;
#endif
+ } else {
+#if defined(CONFIG_USER_ONLY)
+ /* Userspace expects access to cp10 and cp11 for FP/Neon */
+ env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 4, 0xf);
+#endif
}
#if defined(CONFIG_USER_ONLY)
@@ -713,6 +713,13 @@ static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm,
| (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
}
+static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_thumb)
+{
+ return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT)
+ | (is_thumb ? 0 : ARM_EL_IL)
+ | (cv << 24) | (cond << 20);
+}
+
enum arm_cpu_mode {
ARM_CPU_MODE_USR = 0x10,
ARM_CPU_MODE_FIQ = 0x11,
@@ -1266,6 +1273,8 @@ static inline int cpu_mmu_index (CPUARMState *env)
/* Bit usage when in AArch64 state */
#define ARM_TBFLAG_AA64_EL_SHIFT 0
#define ARM_TBFLAG_AA64_EL_MASK (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
+#define ARM_TBFLAG_AA64_FPEN_SHIFT 2
+#define ARM_TBFLAG_AA64_FPEN_MASK (1 << ARM_TBFLAG_AA64_FPEN_SHIFT)
/* some convenience accessor macros */
#define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -1286,14 +1295,21 @@ static inline int cpu_mmu_index (CPUARMState *env)
(((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
#define ARM_TBFLAG_AA64_EL(F) \
(((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
+#define ARM_TBFLAG_AA64_FPEN(F) \
+ (((F) & ARM_TBFLAG_AA64_FPEN_MASK) >> ARM_TBFLAG_AA64_FPEN_SHIFT)
static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
target_ulong *cs_base, int *flags)
{
+ int fpen = extract32(env->cp15.c1_coproc, 20, 2);
+
if (is_a64(env)) {
*pc = env->pc;
*flags = ARM_TBFLAG_AARCH64_STATE_MASK
| (arm_current_pl(env) << ARM_TBFLAG_AA64_EL_SHIFT);
+ if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
+ *flags |= ARM_TBFLAG_AA64_FPEN_MASK;
+ }
} else {
int privmode;
*pc = env->regs[15];
@@ -871,6 +871,23 @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
tcg_temp_free_i64(tcg_tmp);
}
+/* Check that FP/Neon access is enabled. If it is, return
+ * true. If not, emit code to generate an appropriate exception,
+ * and return false; the caller should not emit any code for
+ * the instruction. Note that this check must happen after all
+ * unallocated-encoding checks (otherwise the syndrome information
+ * for the resulting exception will be incorrect).
+ */
+static inline bool fp_access_check(DisasContext *s)
+{
+ if (s->cpacr_fpen) {
+ return true;
+ }
+
+ gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false));
+ return false;
+}
+
/*
* This utility function is for doing register extension with an
* optional shift. You will likely want to pass a temporary for the
@@ -1709,6 +1726,9 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
return;
}
size = 2 + opc;
+ if (!fp_access_check(s)) {
+ return;
+ }
} else {
if (opc == 3) {
/* PRFM (literal) : prefetch */
@@ -1818,6 +1838,10 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
break;
}
+ if (is_vector && !fp_access_check(s)) {
+ return;
+ }
+
offset <<= size;
if (rn == 31) {
@@ -1909,6 +1933,9 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
return;
}
is_store = ((opc & 1) == 0);
+ if (!fp_access_check(s)) {
+ return;
+ }
} else {
if (size == 3 && opc == 2) {
/* PRFM - prefetch */
@@ -2024,6 +2051,9 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
return;
}
is_store = !extract32(opc, 0, 1);
+ if (!fp_access_check(s)) {
+ return;
+ }
} else {
if (size == 3 && opc == 2) {
/* PRFM - prefetch */
@@ -2104,6 +2134,9 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
return;
}
is_store = !extract32(opc, 0, 1);
+ if (!fp_access_check(s)) {
+ return;
+ }
} else {
if (size == 3 && opc == 2) {
/* PRFM - prefetch */
@@ -2261,6 +2294,10 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (rn == 31) {
gen_check_sp_alignment(s);
}
@@ -2387,6 +2424,10 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
g_assert_not_reached();
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
ebytes = 1 << scale;
if (rn == 31) {
@@ -3864,6 +3905,10 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
}
@@ -3892,6 +3937,10 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (cond < 0x0e) { /* not always */
int label_match = gen_new_label();
label_continue = gen_new_label();
@@ -3948,6 +3997,10 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (cond < 0x0e) { /* not always */
int label_match = gen_new_label();
label_continue = gen_new_label();
@@ -4165,6 +4218,10 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
break;
}
@@ -4174,9 +4231,17 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
/* 32-to-32 and 64-to-64 ops */
switch (type) {
case 0:
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_fp_1src_single(s, opcode, rd, rn);
break;
case 1:
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_fp_1src_double(s, opcode, rd, rn);
break;
default:
@@ -4316,9 +4381,15 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn)
switch (type) {
case 0:
+ if (!fp_access_check(s)) {
+ return;
+ }
handle_fp_2src_single(s, opcode, rd, rn, rm);
break;
case 1:
+ if (!fp_access_check(s)) {
+ return;
+ }
handle_fp_2src_double(s, opcode, rd, rn, rm);
break;
default:
@@ -4420,9 +4491,15 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
switch (type) {
case 0:
+ if (!fp_access_check(s)) {
+ return;
+ }
handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
break;
case 1:
+ if (!fp_access_check(s)) {
+ return;
+ }
handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
break;
default:
@@ -4449,6 +4526,10 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
/* The imm8 encodes the sign bit, enough bits to represent
* an exponent in the range 01....1xx to 10....0xx,
* and the most significant 4 bits of the mantissa; see
@@ -4635,6 +4716,10 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
}
@@ -4734,6 +4819,9 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
break;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
handle_fmov(s, rd, rn, type, itof);
} else {
/* actual FP conversions */
@@ -4744,6 +4832,9 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
}
}
@@ -4844,6 +4935,10 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
tcg_resh = tcg_temp_new_i64();
tcg_resl = tcg_temp_new_i64();
@@ -4914,6 +5009,10 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
/* This does a table lookup: for every byte element in the input
* we index into a table formed from up to four vector registers,
* and then the output is the result of the lookups. Our helper
@@ -4984,6 +5083,10 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
tcg_resl = tcg_const_i64(0);
tcg_resh = tcg_const_i64(0);
tcg_res = tcg_temp_new_i64();
@@ -5117,6 +5220,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
esize = 8 << size;
elements = (is_q ? 128 : 64) / esize;
@@ -5249,6 +5356,10 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
index = imm5 >> (size + 1);
tmp = tcg_temp_new_i64();
@@ -5283,6 +5394,10 @@ static void handle_simd_dupes(DisasContext *s, int rd, int rn,
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
index = imm5 >> (size + 1);
/* This instruction just extracts the specified element and
@@ -5315,6 +5430,11 @@ static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
unallocated_encoding(s);
return;
}
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
for (i = 0; i < elements; i++) {
write_vec_element(s, cpu_reg(s, rn), rd, i, size);
}
@@ -5344,6 +5464,11 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn,
unallocated_encoding(s);
return;
}
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
dst_index = extract32(imm5, 1+size, 5);
src_index = extract32(imm4, size, 4);
@@ -5376,6 +5501,10 @@ static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
idx = extract32(imm5, 1 + size, 4 - size);
write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
}
@@ -5413,6 +5542,11 @@ static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
return;
}
}
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
element = extract32(imm5, 1+size, 4);
tcg_rd = cpu_reg(s, rd);
@@ -5505,6 +5639,10 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
/* See AdvSIMDExpandImm() in ARM ARM */
switch (cmode_3_1) {
case 0: /* Replicate(Zeros(24):imm8, 2) */
@@ -5653,6 +5791,10 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
TCGV_UNUSED_PTR(fpst);
break;
case 0xc: /* FMAXNMP */
@@ -5665,6 +5807,10 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
size = extract32(size, 0, 1) ? 3 : 2;
fpst = get_fpstatus_ptr();
break;
@@ -5867,6 +6013,10 @@ static void handle_scalar_simd_shri(DisasContext *s,
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
switch (opcode) {
case 0x02: /* SSRA / USRA (accumulate) */
accumulate = true;
@@ -5917,6 +6067,10 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert,
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
tcg_rn = read_fp_dreg(s, rn);
tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
@@ -5995,6 +6149,10 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (size == 2) {
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
@@ -6379,6 +6537,10 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
return;
}
@@ -6411,6 +6573,10 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
tcg_rd = tcg_temp_new_i64();
if (size == 3) {
@@ -6561,7 +6727,13 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
int size, int rn, int rd)
{
bool is_double = (size == 3);
- TCGv_ptr fpst = get_fpstatus_ptr();
+ TCGv_ptr fpst;
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ fpst = get_fpstatus_ptr();
if (is_double) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
@@ -6743,6 +6915,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (size == 3) {
TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
TCGv_i64 tcg_rd = tcg_temp_new_i64();
@@ -6785,6 +6961,10 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
switch (opcode) {
case 0x02: /* SSRA / USRA (accumulate) */
accumulate = true;
@@ -6849,6 +7029,10 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
for (i = 0; i < elements; i++) {
read_vec_element(s, tcg_rn, rn, i, size);
if (insert) {
@@ -6884,6 +7068,10 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
/* For the LL variants the store is larger than the load,
* so if rd == rn we would overwrite parts of our input.
* So load everything right now and use shifts in the main loop.
@@ -7173,6 +7361,10 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
break;
default:
@@ -7191,11 +7383,15 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
int size = extract32(insn, 22, 2);
bool is_u = extract32(insn, 29, 1);
bool is_q = extract32(insn, 30, 1);
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_res[2];
+ TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
int pass;
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ tcg_op1 = tcg_temp_new_i64();
+ tcg_op2 = tcg_temp_new_i64();
tcg_res[0] = tcg_temp_new_i64();
tcg_res[1] = tcg_temp_new_i64();
@@ -7298,6 +7494,10 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
TCGV_UNUSED_PTR(fpst);
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
/* These operations work on the concatenated rm:rn, with each pair of
* adjacent elements being operated on to produce an element in the result.
*/
@@ -7490,6 +7690,10 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
case 0x5f: /* FDIV */
case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
return;
default:
@@ -7544,6 +7748,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
break;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (size == 3) {
for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
@@ -7936,6 +8144,10 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (size == 0) {
/* Special case bytes, use bswap op on each group of elements */
int groups = dsize / (8 << grp_size);
@@ -8031,6 +8243,10 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd);
return;
case 0x2: /* SADDLP, UADDLP */
@@ -8139,6 +8355,10 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (size == 3) {
/* All 64-bit element operations can be shared with scalar 2misc */
int pass;
@@ -8400,6 +8620,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
}
+ if (!fp_access_check(s)) {
+ return;
+ }
+
if (is_fp) {
fpst = get_fpstatus_ptr();
} else {
@@ -8898,7 +9122,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
#if !defined(CONFIG_USER_ONLY)
dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
#endif
- dc->vfp_enabled = 0;
+ dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags);
dc->vec_len = 0;
dc->vec_stride = 0;
dc->cp_regs = cpu->cp_regs;
@@ -20,7 +20,8 @@ typedef struct DisasContext {
#if !defined(CONFIG_USER_ONLY)
int user;
#endif
- int vfp_enabled;
+ bool cpacr_fpen; /* FP enabled via CPACR.FPEN */
+ bool vfp_enabled; /* FP enabled via FPSCR.EN */
int vec_len;
int vec_stride;
/* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
For the A64 instruction set, the only FP/Neon disable trap is the CPACR FPEN bits, which may indicate "enabled", "disabled" or "disabled for EL0". Add a bit to the AArch64 tb flags indicating whether FP/Neon access is currently enabled and make the decoder emit code to raise exceptions on use of FP/Neon insns if it is not. We use a new flag in DisasContext rather than borrowing the existing vfp_enabled flag because the A32/T32 decoder is going to need both. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- I'm aware this is a rather hard to review patch; sorry. I have done an exhaustive check that we have fp access checks in all code paths with the aid of the assertions added in the next patch plus the code-coverage hack patch I posted to the list earlier. For the record, this patch is correct for all commits up to: target-arm: A64: Implement remaining 3-same instructions If we add further SIMD instructions before this patch hits master, it will need additional fp access check hunks... --- target-arm/cpu.c | 7 ++ target-arm/cpu.h | 16 ++++ target-arm/translate-a64.c | 234 ++++++++++++++++++++++++++++++++++++++++++++- target-arm/translate.h | 3 +- 4 files changed, 254 insertions(+), 6 deletions(-)