From patchwork Tue Feb 15 13:44:41 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Maydell X-Patchwork-Id: 145 Return-Path: Delivered-To: unknown Received: from imap.gmail.com (74.125.159.109) by localhost6.localdomain6 with IMAP4-SSL; 08 Jun 2011 14:40:20 -0000 Delivered-To: patches@linaro.org Received: by 10.146.83.12 with SMTP id g12cs344780yab; Tue, 15 Feb 2011 05:44:55 -0800 (PST) Received: by 10.90.81.19 with SMTP id e19mr6160467agb.69.1297777495656; Tue, 15 Feb 2011 05:44:55 -0800 (PST) Received: from mnementh.archaic.org.uk (mnementh.archaic.org.uk [81.2.115.146]) by mx.google.com with ESMTPS id a16si7434264anp.108.2011.02.15.05.44.54 (version=TLSv1/SSLv3 cipher=OTHER); Tue, 15 Feb 2011 05:44:55 -0800 (PST) Received-SPF: pass (google.com: best guess record for domain of pm215@archaic.org.uk designates 81.2.115.146 as permitted sender) client-ip=81.2.115.146; Authentication-Results: mx.google.com; spf=pass (google.com: best guess record for domain of pm215@archaic.org.uk designates 81.2.115.146 as permitted sender) smtp.mail=pm215@archaic.org.uk Received: from pm215 by mnementh.archaic.org.uk with local (Exim 4.72) (envelope-from ) id 1PpLD0-0001OK-Ce; Tue, 15 Feb 2011 13:44:50 +0000 From: Peter Maydell To: qemu-devel@nongnu.org Cc: patches@linaro.org, Christophe Lyon Subject: [PATCH 01/10] target-arm: Fix rounding constant addition for Neon shifts Date: Tue, 15 Feb 2011 13:44:41 +0000 Message-Id: <1297777490-5323-2-git-send-email-peter.maydell@linaro.org> X-Mailer: git-send-email 1.7.2.3 In-Reply-To: <1297777490-5323-1-git-send-email-peter.maydell@linaro.org> References: <1297777490-5323-1-git-send-email-peter.maydell@linaro.org> From: Christophe Lyon Handle cases where adding the rounding constant could overflow in Neon shift instructions: VRSHR, VRSRA, VQRSHRN, VQRSHRUN, VRSHRN. Signed-off-by: Christophe Lyon [peter.maydell@linaro.org: fix handling of large shifts in rshl_s32, calculate signed saturated value as other functions do.] Signed-off-by: Peter Maydell --- target-arm/neon_helper.c | 139 ++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 127 insertions(+), 12 deletions(-) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index dc09968..cc63636 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -558,9 +558,28 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) }} while (0) NEON_VOP(rshl_s8, neon_s8, 4) NEON_VOP(rshl_s16, neon_s16, 2) -NEON_VOP(rshl_s32, neon_s32, 1) #undef NEON_FN +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator. */ +uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) +{ + int32_t dest; + int32_t val = (int32_t)valop; + int8_t shift = (int8_t)shiftop; + if ((shift >= 32) || (shift <= -32)) { + dest = 0; + } else if (shift < 0) { + int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + } + return dest; +} + +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) { int8_t shift = (int8_t)shiftop; @@ -574,7 +593,16 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) val++; val >>= 1; } else if (shift < 0) { - val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; + val >>= (-shift - 1); + if (val == INT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x4000000000000000LL; + } else { + val++; + val >>= 1; + } } else { val <<= shift; } @@ -596,9 +624,29 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) }} while (0) NEON_VOP(rshl_u8, neon_u8, 4) NEON_VOP(rshl_u16, neon_u16, 2) -NEON_VOP(rshl_u32, neon_u32, 1) #undef NEON_FN +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator. */ +uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) +{ + uint32_t dest; + int8_t shift = (int8_t)shiftop; + if (shift >= 32 || shift < -32) { + dest = 0; + } else if (shift == -32) { + dest = val >> 31; + } else if (shift < 0) { + uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + } + return dest; +} + +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; @@ -607,9 +655,17 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) } else if (shift == -64) { /* Rounding a 1-bit result just preserves that bit. */ val >>= 63; - } if (shift < 0) { - val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; - val >>= -shift; + } else if (shift < 0) { + val >>= (-shift - 1); + if (val == UINT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x8000000000000000ULL; + } else { + val++; + val >>= 1; + } } else { val <<= shift; } @@ -784,14 +840,43 @@ uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) }} while (0) NEON_VOP_ENV(qrshl_u8, neon_u8, 4) NEON_VOP_ENV(qrshl_u16, neon_u16, 2) -NEON_VOP_ENV(qrshl_u32, neon_u32, 1) #undef NEON_FN +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator. */ +uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop) +{ + uint32_t dest; + int8_t shift = (int8_t)shiftop; + if (shift < 0) { + uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + if ((dest >> shift) != val) { + SET_QC(); + dest = ~0; + } + } + return dest; +} + +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) { int8_t shift = (int8_t)shiftop; if (shift < 0) { - val = (val + (1 << (-1 - shift))) >> -shift; + val >>= (-shift - 1); + if (val == UINT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x8000000000000000ULL; + } else { + val++; + val >>= 1; + } } else { \ uint64_t tmp = val; val <<= shift; @@ -817,22 +902,52 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) }} while (0) NEON_VOP_ENV(qrshl_s8, neon_s8, 4) NEON_VOP_ENV(qrshl_s16, neon_s16, 2) -NEON_VOP_ENV(qrshl_s32, neon_s32, 1) #undef NEON_FN +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator. */ +uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop) +{ + int32_t dest; + int32_t val = (int32_t)valop; + int8_t shift = (int8_t)shiftop; + if (shift < 0) { + int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + if ((dest >> shift) != val) { + SET_QC(); + dest = (val >> 31) ^ ~SIGNBIT; + } + } + return dest; +} + +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; int64_t val = valop; if (shift < 0) { - val = (val + (1 << (-1 - shift))) >> -shift; + val >>= (-shift - 1); + if (val == INT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x4000000000000000ULL; + } else { + val++; + val >>= 1; + } } else { - int64_t tmp = val;; + int64_t tmp = val; val <<= shift; if ((val >> shift) != tmp) { SET_QC(); - val = tmp >> 31; + val = (tmp >> 63) ^ ~SIGNBIT64; } } return val;