From patchwork Sat Dec 9 23:27:47 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Sandiford X-Patchwork-Id: 121305 Delivered-To: patch@linaro.org Received: by 10.140.22.227 with SMTP id 90csp1168953qgn; Sat, 9 Dec 2017 15:28:06 -0800 (PST) X-Google-Smtp-Source: AGs4zMZkpXfwQzhVdATuREdxS0c6HNosf7sARDmnuVBkcD1MlnIl1QWcTtraLTXkcbVPTu125G2o X-Received: by 10.159.194.138 with SMTP id y10mr34510556pln.85.1512862086675; Sat, 09 Dec 2017 15:28:06 -0800 (PST) ARC-Seal: i=1; a=rsa-sha256; t=1512862086; cv=none; d=google.com; s=arc-20160816; b=OZuL8pUvfB56A0s5JpeB3NlYB/Trx2AIFho2rHvw4s+roryL0XKbW1FixMxKDu8Yhp WaDW1OOF3ZL7tgcwJBjc2DX/8q8XMUZ+mpQghtLDUbIsLyRTmIH0MD48t+3n0p76joz2 n4mbRVGXRUnXDsIG0qK+yasge03wa95wnNDFyobh0MYy7JaS0cMu5a7Lo/GH9wsPAp5v EW6vIE/S5niV8K5xO+rrGrsT/YyVd2niuUdPU2us49MSaQpDtEqF6wajHW9KIxthkGWQ +bgxn4hagnPDcHBC5ILYvVqMuDW63ac62uuRruJ655rU/sX6MrLW7h+IDGKIjTA6nG7f Yqeg== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=mime-version:user-agent:message-id:in-reply-to:date:references :subject:cc:mail-followup-to:to:from:delivered-to:sender:list-help :list-post:list-archive:list-unsubscribe:list-id:precedence :mailing-list:dkim-signature:domainkey-signature :arc-authentication-results; bh=KIXL1kW9NDen3xwvQQirLGdpe+IFK1DodLkapB7Fsbg=; b=kD6bUggII/yIuAOHHbxa9o76vlwnwrRSmoRXFdkJk4bHATl2mPRMBIqsSHqdAZRqbV pBthDh7G11YISwVRqXydjN9wGyHvGRZY92CCbmxLrxdVoI80lsQg3qE8ViHTVk/jA7TA Z6WrD86pIdpoS1FCm2G9nAg7whSZApzrD/wo2a+/drODNOKd914Iy7JErZGdxSne3P6z ryS9OIcIp+1CR/4rlOd6InRCHhE8VcBpAtB+Ka23os/12QxDMffpSUCA53Z+NK27zo9d Hhu3Yg9bAOsk9wwby+RILC3KthD0b0s4PBq3GCANSrAlO3/mJ/V1acjwl1rT0h16H4HS 7sLQ== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@gcc.gnu.org header.s=default header.b=qPa0Bfks; spf=pass (google.com: domain of gcc-patches-return-468863-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=gcc-patches-return-468863-patch=linaro.org@gcc.gnu.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id k63si7728150pgk.199.2017.12.09.15.28.06 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Sat, 09 Dec 2017 15:28:06 -0800 (PST) Received-SPF: pass (google.com: domain of gcc-patches-return-468863-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@gcc.gnu.org header.s=default header.b=qPa0Bfks; spf=pass (google.com: domain of gcc-patches-return-468863-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=gcc-patches-return-468863-patch=linaro.org@gcc.gnu.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:references:date:in-reply-to:message-id :mime-version:content-type; q=dns; s=default; b=Q0CVFHnOpW7AudPI HA7ZzbqhcoEd/zlF92x/qtSetDlGsBiHAavyqwS28U11sLFXn7D0LKnvOneoJUnQ jzd/7S2MY+TR8cNaXotg/HmRdq9GUWzgpuB6rZTrexLCID5UqmWiYQzF8HxvBZZH Oqq6DYzF2ibUErELarv7djEf84s= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:references:date:in-reply-to:message-id :mime-version:content-type; s=default; bh=O6FLhJDwvzxnutq6uyL4AE fsLY0=; b=qPa0BfksjsKzYal8MnzWvTRl1oMIhtkKY8ZK11VhdNjrBNmFMU11bf Pm2GZMF6zJNx0XSX0kEh0QVQ+C6bjvhy0M9Mf8KJc75QIQbg9fqvmHJTHzr+OrEd KtmUH1exj5iJIXsXz6bPp4AuCf53/iloov3b70n8/YJkP8/v77YeE= Received: (qmail 112300 invoked by alias); 9 Dec 2017 23:27:53 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 112290 invoked by uid 89); 9 Dec 2017 23:27:53 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-11.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy= X-HELO: mail-wm0-f46.google.com Received: from mail-wm0-f46.google.com (HELO mail-wm0-f46.google.com) (74.125.82.46) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Sat, 09 Dec 2017 23:27:51 +0000 Received: by mail-wm0-f46.google.com with SMTP id 64so8323895wme.3 for ; Sat, 09 Dec 2017 15:27:51 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:mail-followup-to:cc:subject:references :date:in-reply-to:message-id:user-agent:mime-version; bh=KIXL1kW9NDen3xwvQQirLGdpe+IFK1DodLkapB7Fsbg=; b=m/x2JDJXiEsGW8NkesrM/ae0CzSwgPojYAZXA6JwjA9FHTLgEgInfdVQl6+i2s0nX5 TK9KS7wOWslk86FEgxpjrTnpnrWOY0wVqnjgzVSmw5AbVpxTteZ6A1USatZJ+7qwg5Xn 5yZDS4b6dh/2zRtUmug6kW4z5C/o5qHC1AqmC/kuVnVkWg82GuvSzSlokcB+HNJrNRvi Yp+47tEUQvg4mHlQT1fBBGdajCZPSIMLGyWNtaCKH8wfXYKwt0397gU9JmGjaxatpOqw ShE36LoSTQY3EIKIBbTRzpfh8fJFK3LkamODiwFeYytatQrXnkmEkUkucTfi8i0su4t9 IBgw== X-Gm-Message-State: AKGB3mK8JGL5Mqm69jBjFbprWRViJXr+GPT6l1OmnqVq1ck8k4QW2kTw Kat4fHqv0TVbg7XMeJHmMWH4BA== X-Received: by 10.28.15.201 with SMTP id 192mr6595411wmp.97.1512862069271; Sat, 09 Dec 2017 15:27:49 -0800 (PST) Received: from localhost ([2.25.234.120]) by smtp.gmail.com with ESMTPSA id p200sm5322947wmd.9.2017.12.09.15.27.47 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Sat, 09 Dec 2017 15:27:48 -0800 (PST) From: Richard Sandiford To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.earnshaw@arm.com, james.greenhalgh@arm.com, marcus.shawcroft@arm.com, richard.sandiford@linaro.org Cc: richard.earnshaw@arm.com, james.greenhalgh@arm.com, marcus.shawcroft@arm.com Subject: [13/13] [AArch64] Use vec_perm_indices helper routines References: <87indfmrgt.fsf@linaro.org> Date: Sat, 09 Dec 2017 23:27:47 +0000 In-Reply-To: <87indfmrgt.fsf@linaro.org> (Richard Sandiford's message of "Sat, 09 Dec 2017 23:06:26 +0000") Message-ID: <87shcjjxcc.fsf@linaro.org> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.3 (gnu/linux) MIME-Version: 1.0 This patch makes the AArch64 vec_perm_const code use the new vec_perm_indices routines, instead of checking each element individually. This means that they extend naturally to variable-length vectors. Also, aarch64_evpc_dup was the only function that generated rtl when testing_p is true, and that looked accidental. The patch adds the missing check and then replaces the gen_rtx_REG/start_sequence/ end_sequence stuff with an assert that no rtl is generated. Tested on aarch64-linux-gnu. Also tested by making sure that there were no assembly output differences for aarch64_be-linux-gnu or aarch64_be-linux-gnu. OK to install? Richard 2017-12-09 Richard Sandiford gcc/ * config/aarch64/aarch64.c (aarch64_evpc_trn): Use d.perm.series_p instead of checking each element individually. (aarch64_evpc_uzp): Likewise. (aarch64_evpc_zip): Likewise. (aarch64_evpc_ext): Likewise. (aarch64_evpc_rev): Likewise. (aarch64_evpc_dup): Test the encoding for a single duplicated element, instead of checking each element individually. Return true without generating rtl if (aarch64_vectorize_vec_perm_const): Use all_from_input_p to test whether all selected elements come from the same input, instead of checking each element individually. Remove calls to gen_rtx_REG, start_sequence and end_sequence and instead assert that no rtl is generated. Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2017-12-09 22:48:47.535824832 +0000 +++ gcc/config/aarch64/aarch64.c 2017-12-09 22:49:00.139270410 +0000 @@ -13295,7 +13295,7 @@ aarch64_expand_vec_perm (rtx target, rtx static bool aarch64_evpc_trn (struct expand_vec_perm_d *d) { - unsigned int i, odd, mask, nelt = d->perm.length (); + unsigned int odd, nelt = d->perm.length (); rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13304,21 +13304,11 @@ aarch64_evpc_trn (struct expand_vec_perm /* Note that these are little-endian tests. We correct for big-endian later. */ - if (d->perm[0] == 0) - odd = 0; - else if (d->perm[0] == 1) - odd = 1; - else + odd = d->perm[0]; + if ((odd != 0 && odd != 1) + || !d->perm.series_p (0, 2, odd, 2) + || !d->perm.series_p (1, 2, nelt + odd, 2)) return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt; i += 2) - { - if (d->perm[i] != i + odd) - return false; - if (d->perm[i + 1] != ((i + nelt + odd) & mask)) - return false; - } /* Success! */ if (d->testing_p) @@ -13342,7 +13332,7 @@ aarch64_evpc_trn (struct expand_vec_perm static bool aarch64_evpc_uzp (struct expand_vec_perm_d *d) { - unsigned int i, odd, mask, nelt = d->perm.length (); + unsigned int odd; rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13351,20 +13341,10 @@ aarch64_evpc_uzp (struct expand_vec_perm /* Note that these are little-endian tests. We correct for big-endian later. */ - if (d->perm[0] == 0) - odd = 0; - else if (d->perm[0] == 1) - odd = 1; - else + odd = d->perm[0]; + if ((odd != 0 && odd != 1) + || !d->perm.series_p (0, 1, odd, 2)) return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt; i++) - { - unsigned elt = (i * 2 + odd) & mask; - if (d->perm[i] != elt) - return false; - } /* Success! */ if (d->testing_p) @@ -13388,7 +13368,7 @@ aarch64_evpc_uzp (struct expand_vec_perm static bool aarch64_evpc_zip (struct expand_vec_perm_d *d) { - unsigned int i, high, mask, nelt = d->perm.length (); + unsigned int high, nelt = d->perm.length (); rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13397,25 +13377,11 @@ aarch64_evpc_zip (struct expand_vec_perm /* Note that these are little-endian tests. We correct for big-endian later. */ - high = nelt / 2; - if (d->perm[0] == high) - /* Do Nothing. */ - ; - else if (d->perm[0] == 0) - high = 0; - else + high = d->perm[0]; + if ((high != 0 && high * 2 != nelt) + || !d->perm.series_p (0, 2, high, 1) + || !d->perm.series_p (1, 2, high + nelt, 1)) return false; - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); - - for (i = 0; i < nelt / 2; i++) - { - unsigned elt = (i + high) & mask; - if (d->perm[i * 2] != elt) - return false; - elt = (elt + nelt) & mask; - if (d->perm[i * 2 + 1] != elt) - return false; - } /* Success! */ if (d->testing_p) @@ -13440,23 +13406,14 @@ aarch64_evpc_zip (struct expand_vec_perm static bool aarch64_evpc_ext (struct expand_vec_perm_d *d) { - unsigned int i, nelt = d->perm.length (); + unsigned int nelt = d->perm.length (); rtx offset; unsigned int location = d->perm[0]; /* Always < nelt. */ /* Check if the extracted indices are increasing by one. */ - for (i = 1; i < nelt; i++) - { - unsigned int required = location + i; - if (d->one_vector_p) - { - /* We'll pass the same vector in twice, so allow indices to wrap. */ - required &= (nelt - 1); - } - if (d->perm[i] != required) - return false; - } + if (!d->perm.series_p (0, 1, location, 1)) + return false; /* Success! */ if (d->testing_p) @@ -13488,7 +13445,7 @@ aarch64_evpc_ext (struct expand_vec_perm static bool aarch64_evpc_rev (struct expand_vec_perm_d *d) { - unsigned int i, j, diff, size, unspec, nelt = d->perm.length (); + unsigned int i, diff, size, unspec; if (!d->one_vector_p) return false; @@ -13504,18 +13461,10 @@ aarch64_evpc_rev (struct expand_vec_perm else return false; - for (i = 0; i < nelt ; i += diff + 1) - for (j = 0; j <= diff; j += 1) - { - /* This is guaranteed to be true as the value of diff - is 7, 3, 1 and we should have enough elements in the - queue to generate this. Getting a vector mask with a - value of diff other than these values implies that - something is wrong by the time we get here. */ - gcc_assert (i + j < nelt); - if (d->perm[i + j] != i + diff - j) - return false; - } + unsigned int step = diff + 1; + for (i = 0; i < step; ++i) + if (!d->perm.series_p (i, step, diff - i, step)) + return false; /* Success! */ if (d->testing_p) @@ -13532,15 +13481,17 @@ aarch64_evpc_dup (struct expand_vec_perm rtx out = d->target; rtx in0; machine_mode vmode = d->vmode; - unsigned int i, elt, nelt = d->perm.length (); + unsigned int elt; rtx lane; + if (d->perm.encoding ().encoded_nelts () != 1) + return false; + + /* Success! */ + if (d->testing_p) + return true; + elt = d->perm[0]; - for (i = 1; i < nelt; i++) - { - if (elt != d->perm[i]) - return false; - } /* The generic preparation in aarch64_expand_vec_perm_const_1 swaps the operand order and the permute indices if it finds @@ -13628,61 +13579,37 @@ aarch64_vectorize_vec_perm_const (machin rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; - unsigned int i, which; - d.vmode = vmode; - d.target = target; - d.op0 = op0; - d.op1 = op1; - d.testing_p = !target; - - /* Calculate whether all elements are in one vector. */ - unsigned int nelt = sel.length (); - for (i = which = 0; i < nelt; ++i) + /* Check whether the mask can be applied to a single vector. */ + if (op0 && rtx_equal_p (op0, op1)) + d.one_vector_p = true; + else if (sel.all_from_input_p (0)) { - unsigned int ei = sel[i] & (2 * nelt - 1); - which |= (ei < nelt ? 1 : 2); + d.one_vector_p = true; + op1 = op0; } - - switch (which) + else if (sel.all_from_input_p (1)) { - default: - gcc_unreachable (); - - case 3: - d.one_vector_p = false; - if (d.testing_p || !rtx_equal_p (op0, op1)) - break; - - /* The elements of PERM do not suggest that only the first operand - is used, but both operands are identical. Allow easier matching - of the permutation by folding the permutation into the single - input vector. */ - /* Fall Through. */ - case 2: - d.op0 = op1; - d.one_vector_p = true; - break; - - case 1: - d.op1 = op0; d.one_vector_p = true; - break; + op0 = op1; } + else + d.one_vector_p = false; - d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt); + d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, + sel.nelts_per_input ()); + d.vmode = vmode; + d.target = target; + d.op0 = op0; + d.op1 = op1; + d.testing_p = !target; if (!d.testing_p) return aarch64_expand_vec_perm_const_1 (&d); - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_vector_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - - start_sequence (); + rtx_insn *last = get_last_insn (); bool ret = aarch64_expand_vec_perm_const_1 (&d); - end_sequence (); + gcc_assert (last == get_last_insn ()); return ret; }