Message ID | 20200214181547.21408-12-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/arm: vfp feature and decodetree cleanup | expand |
On Fri, 14 Feb 2020 at 18:16, Richard Henderson <richard.henderson@linaro.org> wrote: > > All remaining tests for VFP4 are for fused multiply-add insns. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/cpu.h | 5 +++++ > target/arm/translate-vfp.inc.c | 12 ++++++++---- > target/arm/translate.c | 2 +- > 3 files changed, 14 insertions(+), 5 deletions(-) > > diff --git a/target/arm/cpu.h b/target/arm/cpu.h > index 4ff28418df..f27b8e35df 100644 > --- a/target/arm/cpu.h > +++ b/target/arm/cpu.h > @@ -3468,6 +3468,11 @@ static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) > return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1; > } > > +static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id) > +{ > + return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0; > +} This is tricky, because the SIMDFMAC register field indicates "do we have fused-multiply-accumulate for either VFP or Neon", so in a VFP-no-Neon core or a Neon-no-VFP core it will be 1 but can't be used on its own as a gate on "should this insn be present". Currently in the part of arm_cpu_realize() which handles the user having selected vfp=off and/or neon=off we do allow (for AArch32 cores) both of those combinations. trans_VFM_dp already tests aa32_fpdp_v2, so I think the main thing we need to do is add a test on aa32_fpsp_v2 to trans_VFM_sp. We clear the SIMDFMAC field to 0 in the !has_neon condition, and I think that should actually be in the !neon && !vfp part. I propose to squash in the following and beef up the commit message: diff --git a/target/arm/cpu.h b/target/arm/cpu.h index f641478fc80..d4c73a20b6a 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3506,6 +3506,13 @@ static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1; } +/* + * Note that this ID register field covers both VFP and Neon FMAC, + * so should usually be tested in combination with some other + * check that confirms the presence of whichever of VFP or Neon is + * relevant, to avoid accidentally enabling a Neon feature on + * a VFP-no-Neon core or vice-versa. + */ static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id) { return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c index d5a75c265ac..95ada81ebae 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1510,7 +1510,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) u = FIELD_DP32(u, MVFR1, SIMDINT, 0); u = FIELD_DP32(u, MVFR1, SIMDSP, 0); u = FIELD_DP32(u, MVFR1, SIMDHP, 0); - u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0); cpu->isar.mvfr1 = u; u = cpu->isar.mvfr2; @@ -1533,6 +1532,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) u = cpu->isar.mvfr0; u = FIELD_DP32(u, MVFR0, SIMDREG, 0); cpu->isar.mvfr0 = u; + + /* Despite the name, this field covers both VFP and Neon */ + u = cpu->isar.mvfr1; + u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0); + cpu->isar.mvfr1; } if (arm_feature(env, ARM_FEATURE_M) && !cpu->has_dsp) { diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c index f6f7601fe2a..69052d840a4 100644 --- a/target/arm/translate-vfp.inc.c +++ b/target/arm/translate-vfp.inc.c @@ -1805,8 +1805,13 @@ static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a) * Present in VFPv4 only. * In v7A, UNPREDICTABLE with non-zero vector length/stride; from * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. + * Note that we can't rely on the SIMDFMAC check alone, because + * in a Neon-no-VFP core that ID register field will be non-zero. */ - if (!dc_isar_feature(aa32_simdfmac, s)) { + if (!dc_isar_feature(aa32_simdfmac, s) || + !dc_isar_feature(aa32_fpsp_v2, s)) { + return false; + } return false; } if (s->vec_len != 0 || s->vec_stride != 0) { thanks -- PMM
On Thu, 20 Feb 2020 at 16:37, Peter Maydell <peter.maydell@linaro.org> wrote: > > On Fri, 14 Feb 2020 at 18:16, Richard Henderson > <richard.henderson@linaro.org> wrote: > > > > All remaining tests for VFP4 are for fused multiply-add insns. > > > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > > --- > > target/arm/cpu.h | 5 +++++ > > target/arm/translate-vfp.inc.c | 12 ++++++++---- > > target/arm/translate.c | 2 +- > > 3 files changed, 14 insertions(+), 5 deletions(-) > > > > diff --git a/target/arm/cpu.h b/target/arm/cpu.h > > index 4ff28418df..f27b8e35df 100644 > > --- a/target/arm/cpu.h > > +++ b/target/arm/cpu.h > > @@ -3468,6 +3468,11 @@ static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) > > return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1; > > } > > > > +static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id) > > +{ > > + return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0; > > +} > > This is tricky, because the SIMDFMAC register > field indicates "do we have fused-multiply-accumulate > for either VFP or Neon", so in a VFP-no-Neon core or > a Neon-no-VFP core it will be 1 but can't be used on its > own as a gate on "should this insn be present". > > Currently in the part of arm_cpu_realize() which handles > the user having selected vfp=off and/or neon=off we > do allow (for AArch32 cores) both of those combinations. > > trans_VFM_dp already tests aa32_fpdp_v2, so I think the > main thing we need to do is add a test on aa32_fpsp_v2 to > trans_VFM_sp. > > We clear the SIMDFMAC field to 0 in the !has_neon condition, > and I think that should actually be in the !neon && !vfp part. > > I propose to squash in the following and beef up the commit message: > > + /* Despite the name, this field covers both VFP and Neon */ > + u = cpu->isar.mvfr1; > + u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0); > + cpu->isar.mvfr1; ... "cpu->isar.mvfr1 = u;", obviously. > - if (!dc_isar_feature(aa32_simdfmac, s)) { > + if (!dc_isar_feature(aa32_simdfmac, s) || > + !dc_isar_feature(aa32_fpsp_v2, s)) { > + return false; > + } and not that extra "}". -- PMM
On 2/20/20 8:37 AM, Peter Maydell wrote: > This is tricky, because the SIMDFMAC register > field indicates "do we have fused-multiply-accumulate > for either VFP or Neon", so in a VFP-no-Neon core or > a Neon-no-VFP core it will be 1 but can't be used on its > own as a gate on "should this insn be present". > > Currently in the part of arm_cpu_realize() which handles > the user having selected vfp=off and/or neon=off we > do allow (for AArch32 cores) both of those combinations. > > trans_VFM_dp already tests aa32_fpdp_v2, so I think the > main thing we need to do is add a test on aa32_fpsp_v2 to > trans_VFM_sp. > > We clear the SIMDFMAC field to 0 in the !has_neon condition, > and I think that should actually be in the !neon && !vfp part. > > I propose to squash in the following and beef up the commit message: Good catch. Makes sense. r~
diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 4ff28418df..f27b8e35df 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3468,6 +3468,11 @@ static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1; } +static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0; +} + static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id) { return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 1; diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c index 8913320259..f6f7601fe2 100644 --- a/target/arm/translate-vfp.inc.c +++ b/target/arm/translate-vfp.inc.c @@ -1806,8 +1806,10 @@ static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a) * In v7A, UNPREDICTABLE with non-zero vector length/stride; from * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. */ - if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || - (s->vec_len != 0 || s->vec_stride != 0)) { + if (!dc_isar_feature(aa32_simdfmac, s)) { + return false; + } + if (s->vec_len != 0 || s->vec_stride != 0) { return false; } @@ -1864,8 +1866,10 @@ static bool trans_VFM_dp(DisasContext *s, arg_VFM_dp *a) * In v7A, UNPREDICTABLE with non-zero vector length/stride; from * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. */ - if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || - (s->vec_len != 0 || s->vec_stride != 0)) { + if (!dc_isar_feature(aa32_simdfmac, s)) { + return false; + } + if (s->vec_len != 0 || s->vec_stride != 0) { return false; } diff --git a/target/arm/translate.c b/target/arm/translate.c index 3b9bf13933..0da780102c 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -4877,7 +4877,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } break; case NEON_3R_VFM_VQRDMLSH: - if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) { + if (!dc_isar_feature(aa32_simdfmac, s)) { return 1; } break;
All remaining tests for VFP4 are for fused multiply-add insns. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/cpu.h | 5 +++++ target/arm/translate-vfp.inc.c | 12 ++++++++---- target/arm/translate.c | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) -- 2.20.1