@@ -1013,8 +1013,36 @@ static int do_recip_sqrt_estimate(int a)
return estimate;
}
+static int do_recip_sqrt_estimate_incprec(int a)
+{
+ /*
+ * The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
+ * in terms of an infinite-precision floating point calculation of a
+ * square root. We implement this using the same kind of pure integer
+ * algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
+ */
+ int64_t b, estimate;
-static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
+ assert(1024 <= a && a < 4096);
+ if (a < 2048) {
+ a = a * 2 + 1;
+ } else {
+ a = (a >> 1) << 1;
+ a = (a + 1) * 2;
+ }
+ b = 8192;
+ while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
+ b += 1;
+ }
+ estimate = (b + 1) / 2;
+
+ assert(4096 <= estimate && estimate < 8192);
+
+ return estimate;
+}
+
+static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
+ bool increasedprecision)
{
int estimate;
uint32_t scaled;
@@ -1027,17 +1055,32 @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
frac = extract64(frac, 0, 51) << 1;
}
- if (*exp & 1) {
- /* scaled = UInt('01':fraction<51:45>) */
- scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
+ if (increasedprecision) {
+ if (*exp & 1) {
+ /* scaled = UInt('01':fraction<51:42>) */
+ scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
+ } else {
+ /* scaled = UInt('1':fraction<51:41>) */
+ scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
+ }
+ estimate = do_recip_sqrt_estimate_incprec(scaled);
} else {
- /* scaled = UInt('1':fraction<51:44>) */
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ if (*exp & 1) {
+ /* scaled = UInt('01':fraction<51:45>) */
+ scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
+ } else {
+ /* scaled = UInt('1':fraction<51:44>) */
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ }
+ estimate = do_recip_sqrt_estimate(scaled);
}
- estimate = do_recip_sqrt_estimate(scaled);
*exp = (exp_off - *exp) / 2;
- return extract64(estimate, 0, 8) << 44;
+ if (increasedprecision) {
+ return extract64(estimate, 0, 12) << 40;
+ } else {
+ return extract64(estimate, 0, 8) << 44;
+ }
}
uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
@@ -1076,7 +1119,7 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
f64_frac = ((uint64_t) f16_frac) << (52 - 10);
- f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
+ f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
val = deposit32(0, 15, 1, f16_sign);
@@ -1125,12 +1168,20 @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
f64_frac = ((uint64_t) f32_frac) << 29;
- f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
+ f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
+ /*
+ * result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
+ * or for increased precision
+ * result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
+ */
val = deposit32(0, 31, 1, f32_sign);
val = deposit32(val, 23, 8, f32_exp);
- val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
+ if (rpres) {
+ val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
+ } else {
+ val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
+ }
return make_float32(val);
}
@@ -1174,7 +1225,7 @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
return float64_zero;
}
- f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
+ f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
val = deposit64(0, 61, 1, f64_sign);
Implement the increased precision variation of FRSQRTE. In the pseudocode this corresponds to the handling of the "increasedprecision" boolean in the FPRSqrtEstimate() and RecipSqrtEstimate() functions. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- target/arm/vfp_helper.c | 77 ++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 13 deletions(-)