@@ -32,6 +32,13 @@ extern size_t strlen(const char *);
VECT_VAR(expected, int, 16, 4) -> expected_int16x4
VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
*/
+/* Some instructions don't exist on ARM.
+ Use this macro to guard against them. */
+#ifdef __aarch64__
+#define AARCH64_ONLY(X) X
+#else
+#define AARCH64_ONLY(X)
+#endif
#define xSTR(X) #X
#define STR(X) xSTR(X)
@@ -92,6 +99,13 @@ extern size_t strlen(const char *);
fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \
}
+#if defined (__ARM_FEATURE_CRYPTO)
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+ CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#else
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#endif
+
/* Floating-point variant. */
#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
{ \
@@ -184,6 +198,9 @@ extern ARRAY(expected, uint, 32, 2);
extern ARRAY(expected, uint, 64, 1);
extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 1);
+#endif
extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, hfloat, 64, 1);
@@ -197,11 +214,14 @@ extern ARRAY(expected, uint, 32, 4);
extern ARRAY(expected, uint, 64, 2);
extern ARRAY(expected, poly, 8, 16);
extern ARRAY(expected, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 2);
+#endif
extern ARRAY(expected, hfloat, 16, 8);
extern ARRAY(expected, hfloat, 32, 4);
extern ARRAY(expected, hfloat, 64, 2);
-#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment) \
+#define CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name,EXPECTED,comment) \
{ \
CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
@@ -228,6 +248,13 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
+#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); \
+ } \
+
/* Check results against EXPECTED. Operates on all possible vector types. */
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
@@ -398,6 +425,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 1);
CLEAN(result, poly, 8, 8);
CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 4);
#endif
@@ -413,6 +443,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 2);
CLEAN(result, poly, 8, 16);
CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 8);
#endif
@@ -438,6 +471,13 @@ static void clean_results (void)
#define DECL_VARIABLE(VAR, T1, W, N) \
VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
+ DECL_VARIABLE(VAR, T1, W, N)
+#else
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
+#endif
+
/* Declare only 64 bits signed variants. */
#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
DECL_VARIABLE(VAR, int, 8, 8); \
@@ -473,6 +513,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 16, 4); \
DECL_VARIABLE(VAR, float, 32, 2)
#else
@@ -481,6 +522,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 32, 2)
#endif
@@ -491,6 +533,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 16, 8); \
DECL_VARIABLE(VAR, float, 32, 4)
#else
@@ -499,6 +542,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 32, 4)
#endif
/* Declare all variants. */
@@ -531,6 +575,13 @@ static void clean_results (void)
/* Helpers to call macros with 1 constant and 5 variable
arguments. */
+#if defined (__ARM_FEATURE_CRYPTO)
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
+ MACRO(VAR1, VAR2, T1, T2, T3, W, N)
+#else
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
+#endif
+
#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
MACRO(VAR, , int, s, 8, 8); \
MACRO(VAR, , int, s, 16, 4); \
@@ -601,13 +652,15 @@ static void clean_results (void)
TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, , poly, p, 8, 8); \
- MACRO(VAR1, VAR2, , poly, p, 16, 4)
+ MACRO(VAR1, VAR2, , poly, p, 16, 4); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
- MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+ MACRO(VAR1, VAR2, q, poly, p, 16, 8); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
new file mode 100644
@@ -0,0 +1,302 @@
+/* This file contains tests for the VLD{X}, VLD{X}_DUP and VSLI. */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* { dg-options "-march=armv8-a+crypto" } */
+/* { dg-skip-if "" { arm*-*-* } } */
+
+/* Expected results: vld1. */
+VECT_VAR_DECL (vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+
+/* Expected results: vld1_dup. */
+VECT_VAR_DECL (vld1_dup_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_dup_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_dup_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld1_dup_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld1_dup_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld1_dup_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+
+/* Expected results: vldX. */
+VECT_VAR_DECL (vld2_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld2_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld3_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld4_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld4_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vldX_dup. */
+VECT_VAR_DECL (vld2_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld2_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld3_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld4_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld4_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_dup_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vsli. */
+VECT_VAR_DECL (vsli_expected,poly,64,1) [] = { 0x10 };
+VECT_VAR_DECL (vsli_expected,poly,64,2) [] = { 0x7ffffffffffff0,
+ 0x7ffffffffffff1 };
+VECT_VAR_DECL (vsli_expected_max_shift,poly,64,1) [] = { 0x7ffffffffffffff0 };
+VECT_VAR_DECL (vsli_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+
+int main (void)
+{
+ int i;
+
+ /* vld1_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLD1/VLD1Q"
+
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \
+ VECT_VAR (VAR, T1, W, N) = vld1##Q##_##T2##W (VECT_VAR (BUF, T1, W, N)); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), VECT_VAR (VAR, T1, W, N))
+
+ DECL_VARIABLE (vld1_vector, poly, 64, 1);
+ DECL_VARIABLE (vld1_vector, poly, 64, 2);
+
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ VLOAD (vld1_vector, buffer, , poly, p, 64, 1);
+ VLOAD (vld1_vector, buffer, q, poly, p, 64, 2);
+
+ TEST_VLD1 (vld1_vector, buffer, , poly, p, 64, 1);
+ TEST_VLD1 (vld1_vector, buffer, q, poly, p, 64, 2);
+
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_expected, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_expected, "");
+
+ /* vld1_dup_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
+
+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \
+ VECT_VAR (VAR, T1, W, N) = \
+ vld1##Q##_dup_##T2##W (&VECT_VAR (BUF, T1, W, N)[i]); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), VECT_VAR (VAR, T1, W, N))
+
+ DECL_VARIABLE (vld1_dup_vector, poly, 64, 1);
+ DECL_VARIABLE (vld1_dup_vector, poly, 64, 2);
+
+ /* Try to read different places from the input buffer. */
+ for (i=0; i<3; i++)
+ {
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ TEST_VLD1_DUP (vld1_dup_vector, buffer_dup, , poly, p, 64, 1);
+ TEST_VLD1_DUP (vld1_dup_vector, buffer_dup, q, poly, p, 64, 2);
+
+ switch (i)
+ {
+ case 0:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected0, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected0, "");
+ break;
+ case 1:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected1, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected1, "");
+ break;
+ case 2:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected2, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected2, "");
+ break;
+ default:
+ abort ();
+ }
+ }
+
+ /* vldX_p64 tests. */
+#define DECL_VLDX(T1, W, N, X) \
+ VECT_ARRAY_TYPE (T1, W, N, X) VECT_ARRAY_VAR (vldX_vector, T1, W, N, X); \
+ VECT_VAR_DECL (vldX_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX(Q, T1, T2, W, N, X) \
+ VECT_ARRAY_VAR (vldX_vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size X. */ \
+ vld##X##Q##_##T2##W (VECT_ARRAY_VAR (buffer_vld##X, T1, W, N, X)); \
+ vst##X##Q##_##T2##W (VECT_VAR (vldX_result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR (vldX_vector, T1, W, N, X)); \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vldX_result_bis_##X, T1, W, N), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ &(VECT_VAR (vldX_result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ DECL_VLDX (poly, 64, 1, 2);
+ DECL_VLDX (poly, 64, 1, 3);
+ DECL_VLDX (poly, 64, 1, 4);
+
+ VECT_ARRAY_INIT2 (buffer_vld2, poly, 64, 1);
+ PAD (buffer_vld2_pad, poly, 64, 1);
+ VECT_ARRAY_INIT3 (buffer_vld3, poly, 64, 1);
+ PAD (buffer_vld3_pad, poly, 64, 1);
+ VECT_ARRAY_INIT4 (buffer_vld4, poly, 64, 1);
+ PAD (buffer_vld4_pad, poly, 64, 1);
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2/VLD2Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 2, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3/VLD3Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 3, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 3, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4/VLD4Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 4);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_2, "chunk 2");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_3, "chunk 3");
+
+ /* vldX_dup_p64 tests. */
+#define DECL_VLDX_DUP(T1, W, N, X) \
+ VECT_ARRAY_TYPE (T1, W, N, X) VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X); \
+ VECT_VAR_DECL (vldX_dup_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \
+ VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X) = \
+ vld##X##Q##_dup_##T2##W (&VECT_VAR (buffer_dup, T1, W, N)[0]); \
+ \
+ vst##X##Q##_##T2##W (VECT_VAR (vldX_dup_result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X)); \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vldX_dup_result_bis_##X, T1, W, N), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y) \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ &(VECT_VAR (vldX_dup_result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ DECL_VLDX_DUP (poly, 64, 1, 2);
+ DECL_VLDX_DUP (poly, 64, 1, 3);
+ DECL_VLDX_DUP (poly, 64, 1, 4);
+
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 2, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 3, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 3, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 4);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_2, "chunk 2");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3");
+
+ /* vsli_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VSLI"
+
+#define TEST_VSXI1(INSN, Q, T1, T2, W, N, V) \
+ VECT_VAR (vsXi_vector_res, T1, W, N) = \
+ INSN##Q##_n_##T2##W (VECT_VAR (vsXi_vector, T1, W, N), \
+ VECT_VAR (vsXi_vector2, T1, W, N), \
+ V); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vsXi_vector_res, T1, W, N))
+
+#define TEST_VSXI(INSN, Q, T1, T2, W, N, V) \
+ TEST_VSXI1 (INSN, Q, T1, T2, W, N, V)
+
+ DECL_VARIABLE (vsXi_vector, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector, poly, 64, 2);
+ DECL_VARIABLE (vsXi_vector2, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector2, poly, 64, 2);
+ DECL_VARIABLE (vsXi_vector_res, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector_res, poly, 64, 2);
+
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ VLOAD (vsXi_vector, buffer, , poly, p, 64, 1);
+ VLOAD (vsXi_vector, buffer, q, poly, p, 64, 2);
+
+ VDUP (vsXi_vector2, , poly, p, 64, 1, 2);
+ VDUP (vsXi_vector2, q, poly, p, 64, 2, 3);
+
+ TEST_VSXI (vsli, , poly, p, 64, 1, 3);
+ TEST_VSXI (vsli, q, poly, p, 64, 2, 53);
+
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vsli_expected, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vsli_expected, "");
+
+ /* Test cases with maximum shift amount. */
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ TEST_VSXI (vsli, , poly, p, 64, 1, 63);
+ TEST_VSXI (vsli, q, poly, p, 64, 2, 63);
+
+#define COMMENT "(max shift amount)"
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vsli_expected_max_shift, COMMENT);
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vsli_expected_max_shift, COMMENT);
+
+ return 0;
+}
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -25,6 +26,9 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0x66, 0x66, 0x66, 0x66 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x77 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0x40533333, 0x40533333 };
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
@@ -62,6 +66,9 @@ void exec_vcombine (void)
VDUP(vector64_b, , uint, u, 64, 1, 0x88);
VDUP(vector64_b, , poly, p, 8, 8, 0x55);
VDUP(vector64_b, , poly, p, 16, 4, 0x66);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VDUP(vector64_b, , poly, p, 64, 1, 0x77);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VDUP(vector64_b, , float, f, 16, 4, 2.25);
#endif
@@ -80,6 +87,9 @@ void exec_vcombine (void)
TEST_VCOMBINE(uint, u, 64, 1, 2);
TEST_VCOMBINE(poly, p, 8, 8, 16);
TEST_VCOMBINE(poly, p, 16, 4, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VCOMBINE(poly, p, 64, 1, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VCOMBINE(float, f, 16, 4, 8);
#endif
@@ -95,6 +105,9 @@ void exec_vcombine (void)
CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, "");
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
0x78, 0x56, 0x34, 0x12 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0x123456789abcdef0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 };
@@ -49,6 +53,9 @@ FNNAME (INSN_NAME)
DECL_VAL(val, uint, 64, 1);
DECL_VAL(val, poly, 8, 8);
DECL_VAL(val, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DECL_VAL(val, poly, 64, 1);
+#endif
DECL_VARIABLE(vector_res, int, 8, 8);
DECL_VARIABLE(vector_res, int, 16, 4);
@@ -64,6 +71,9 @@ FNNAME (INSN_NAME)
DECL_VARIABLE(vector_res, uint, 64, 1);
DECL_VARIABLE(vector_res, poly, 8, 8);
DECL_VARIABLE(vector_res, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DECL_VARIABLE(vector_res, poly, 64, 1);
+#endif
clean_results ();
@@ -82,6 +92,9 @@ FNNAME (INSN_NAME)
VECT_VAR(val, uint, 64, 1) = 0x123456789abcdef0ULL;
VECT_VAR(val, poly, 8, 8) = 0x123456789abcdef0ULL;
VECT_VAR(val, poly, 16, 4) = 0x123456789abcdef0ULL;
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_VAR(val, poly, 64, 1) = 0x123456789abcdef0ULL;
+#endif
TEST_VCREATE(int, s, 8, 8);
TEST_VCREATE(int, s, 16, 4);
@@ -97,6 +110,9 @@ FNNAME (INSN_NAME)
TEST_VCREATE(uint, u, 64, 1);
TEST_VCREATE(poly, p, 8, 8);
TEST_VCREATE(poly, p, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VCREATE(poly, p, 64, 1);
+#endif
CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
@@ -108,6 +124,9 @@ FNNAME (INSN_NAME)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* We test vdup and vmov in the same place since they are aliases. */
@@ -23,6 +24,11 @@ VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
0xcc00, 0xcc00 };
#endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0,
@@ -77,6 +83,11 @@ VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
0xcb80, 0xcb80 };
#endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1,
@@ -131,6 +142,11 @@ VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
0xcb00, 0xcb00 };
#endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
@@ -167,6 +183,12 @@ VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
0xc1600000, 0xc1600000 };
+#if defined(__aarch64__)
+#define CHECK_EXEC_RESULTS(M, E, C) CHECK_RESULTS_NAMED_NO_FP16(M, E, C)
+#else
+#define CHECK_EXEC_RESULTS(M, E, C) CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(M, E , C)
+#endif
+
#define TEST_MSG "VDUP/VDUPQ"
void exec_vdup_vmov (void)
{
@@ -204,6 +226,9 @@ void exec_vdup_vmov (void)
#if defined (FP16_SUPPORTED)
TEST_VDUP(, float, f, 16, 4);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VDUP(, poly, p, 64, 1));
+#endif
TEST_VDUP(, float, f, 32, 2);
TEST_VDUP(q, int, s, 8, 16);
@@ -219,18 +244,21 @@ void exec_vdup_vmov (void)
#if defined (FP16_SUPPORTED)
TEST_VDUP(q, float, f, 16, 8);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VDUP(q, poly, p, 64, 2));
+#endif
TEST_VDUP(q, float, f, 32, 4);
#if defined (FP16_SUPPORTED)
switch (i) {
case 0:
- CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
break;
case 1:
- CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
break;
case 2:
- CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
break;
default:
abort();
@@ -271,6 +299,9 @@ void exec_vdup_vmov (void)
#if defined (FP16_SUPPORTED)
TEST_VMOV(, float, f, 16, 4);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VMOV(, poly, p, 64, 1));
+#endif
TEST_VMOV(, float, f, 32, 2);
TEST_VMOV(q, int, s, 8, 16);
@@ -286,6 +317,9 @@ void exec_vdup_vmov (void)
#if defined (FP16_SUPPORTED)
TEST_VMOV(q, float, f, 16, 8);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VMOV(q, poly, p, 64, 2));
+#endif
TEST_VMOV(q, float, f, 32, 4);
#if defined (FP16_SUPPORTED)
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
@@ -31,6 +32,10 @@ VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
0xfffffff1, 0xfffffff1 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
@@ -53,6 +58,9 @@ VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
0xca80, 0xca80,
0xca80, 0xca80 };
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
@@ -92,6 +100,9 @@ void exec_vdup_lane (void)
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VDUP_LANE(, poly, p, 64, 1, 1, 0);
+#endif
TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
@@ -107,6 +118,9 @@ void exec_vdup_lane (void)
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VDUP_LANE(q, poly, p, 64, 2, 1, 0);
+#endif
TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
#if defined (FP16_SUPPORTED)
@@ -131,6 +145,9 @@ VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
0xf7, 0xf7, 0xf7, 0xf7 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
@@ -159,6 +176,10 @@ VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
0xfffffff0, 0xfffffff0 };
VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
@@ -204,6 +225,9 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
TEST_VDUP_LANEQ(, uint, u, 64, 1, 2, 0);
TEST_VDUP_LANEQ(, poly, p, 8, 8, 16, 7);
TEST_VDUP_LANEQ(, poly, p, 16, 4, 8, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VDUP_LANEQ(, poly, p, 64, 1, 2, 0));
+#endif
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANEQ(, float, f, 16, 4, 8, 3);
#endif
@@ -219,6 +243,9 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
TEST_VDUP_LANEQ(q, uint, u, 64, 2, 2, 0);
TEST_VDUP_LANEQ(q, poly, p, 8, 16, 16, 5);
TEST_VDUP_LANEQ(q, poly, p, 16, 8, 8, 1);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VDUP_LANEQ(q, poly, p, 64, 2, 2, 0));
+#endif
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANEQ(q, float, f, 16, 8, 8, 7);
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -50,6 +54,9 @@ void exec_vget_high (void)
TEST_VGET_HIGH(uint, u, 64, 1, 2);
TEST_VGET_HIGH(poly, p, 8, 8, 16);
TEST_VGET_HIGH(poly, p, 16, 4, 8);
+ #if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_HIGH(poly, p, 64, 1, 2);
+ #endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_HIGH(float, f, 16, 4, 8);
#endif
@@ -65,6 +72,7 @@ void exec_vget_high (void)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
}
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
int8_t expected_s8 = 0xf7;
@@ -13,6 +14,9 @@ uint32_t expected_u32 = 0xfffffff1;
uint64_t expected_u64 = 0xfffffffffffffff0;
poly8_t expected_p8 = 0xf6;
poly16_t expected_p16 = 0xfff2;
+#if defined (__ARM_FEATURE_CRYPTO)
+poly64_t expected_p64 = 0xfffffffffffffff0;
+#endif
hfloat16_t expected_f16 = 0xcb80;
hfloat32_t expected_f32 = 0xc1700000;
@@ -26,6 +30,9 @@ uint32_t expectedq_u32 = 0xfffffff2;
uint64_t expectedq_u64 = 0xfffffffffffffff1;
poly8_t expectedq_p8 = 0xfe;
poly16_t expectedq_p16 = 0xfff6;
+#if defined (__ARM_FEATURE_CRYPTO)
+poly64_t expectedq_p64 = 0xfffffffffffffff1;
+#endif
hfloat16_t expectedq_f16 = 0xca80;
hfloat32_t expectedq_f32 = 0xc1500000;
@@ -89,6 +96,9 @@ void exec_vget_lane (void)
VAR_DECL(var, uint, 64);
VAR_DECL(var, poly, 8);
VAR_DECL(var, poly, 16);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VAR_DECL(var, poly, 64);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VAR_DECL(var, float, 16);
#endif
@@ -114,6 +124,9 @@ void exec_vget_lane (void)
TEST_VGET_LANE(, uint, u, 64, 1, 0);
TEST_VGET_LANE(, poly, p, 8, 8, 6);
TEST_VGET_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_LANE(, poly, p, 64, 1, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LANE_FP(, float, f, 16, 4, 1);
#endif
@@ -129,6 +142,9 @@ void exec_vget_lane (void)
TEST_VGET_LANE(q, uint, u, 64, 2, 1);
TEST_VGET_LANE(q, poly, p, 8, 16, 14);
TEST_VGET_LANE(q, poly, p, 16, 8, 6);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VGET_LANE(q, poly, p, 64, 2, 1));
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LANE_FP(q, float, f, 16, 8, 3);
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -50,6 +54,9 @@ void exec_vget_low (void)
TEST_VGET_LOW(uint, u, 64, 1, 2);
TEST_VGET_LOW(poly, p, 8, 8, 16);
TEST_VGET_LOW(poly, p, 16, 4, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_LOW(poly, p, 64, 1, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LOW(float, f, 16, 4, 8);
#endif
@@ -65,6 +72,9 @@ void exec_vget_low (void)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -33,7 +37,7 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
- 0xfff3, 0xfff4, 0xfff5,
+ 0xfff3, 0xfff4, 0xfff5,
0xfff6, 0xfff7 };
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
@@ -45,6 +49,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
/* Chunk 0. */
@@ -17,6 +18,9 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,16,4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
@@ -45,6 +49,10 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,16,8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00,
0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
@@ -64,6 +72,9 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,16,4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
@@ -92,6 +103,10 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,16,8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80,
0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
@@ -111,6 +126,9 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,16,4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
@@ -139,6 +157,10 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,16,8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00,
0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,11 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -61,6 +67,11 @@ VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -104,6 +115,11 @@ VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -147,6 +163,11 @@ VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -193,6 +214,11 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
0xfffa, 0xfffb };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,64,2) [] = { 0xfffffffffffffff4,
+ 0xfffffffffffffff5 };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
@@ -238,6 +264,11 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -281,6 +312,11 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -324,6 +360,11 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,64,2) [] = { 0xfffffffffffffff4,
+ 0xfffffffffffffff5 };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
@@ -367,6 +408,11 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,64,2) [] = { 0xfffffffffffffff6,
+ 0xfffffffffffffff7 };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
@@ -424,27 +470,42 @@ void exec_vldX (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VLDX_CRYPTO(T1, W, N, X) \
+ DECL_VLDX(T1, W, N, X)
+#define TEST_VLDX_CRYPTO(Q, T1, T2, W, N, X) \
+ TEST_VLDX(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y) \
+ TEST_EXTRA_CHUNK(T1, W, N, X,Y)
+#else
+#define DECL_VLDX_CRYPTO(T1, W, N, X)
+#define TEST_VLDX_CRYPTO(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant. */
-#define DECL_ALL_VLDX_NO_FP16(X) \
- DECL_VLDX(int, 8, 8, X); \
- DECL_VLDX(int, 16, 4, X); \
- DECL_VLDX(int, 32, 2, X); \
- DECL_VLDX(int, 64, 1, X); \
- DECL_VLDX(uint, 8, 8, X); \
- DECL_VLDX(uint, 16, 4, X); \
- DECL_VLDX(uint, 32, 2, X); \
- DECL_VLDX(uint, 64, 1, X); \
- DECL_VLDX(poly, 8, 8, X); \
- DECL_VLDX(poly, 16, 4, X); \
- DECL_VLDX(float, 32, 2, X); \
- DECL_VLDX(int, 8, 16, X); \
- DECL_VLDX(int, 16, 8, X); \
- DECL_VLDX(int, 32, 4, X); \
- DECL_VLDX(uint, 8, 16, X); \
- DECL_VLDX(uint, 16, 8, X); \
- DECL_VLDX(uint, 32, 4, X); \
- DECL_VLDX(poly, 8, 16, X); \
- DECL_VLDX(poly, 16, 8, X); \
+#define DECL_ALL_VLDX_NO_FP16(X) \
+ DECL_VLDX(int, 8, 8, X); \
+ DECL_VLDX(int, 16, 4, X); \
+ DECL_VLDX(int, 32, 2, X); \
+ DECL_VLDX(int, 64, 1, X); \
+ DECL_VLDX(uint, 8, 8, X); \
+ DECL_VLDX(uint, 16, 4, X); \
+ DECL_VLDX(uint, 32, 2, X); \
+ DECL_VLDX(uint, 64, 1, X); \
+ DECL_VLDX(poly, 8, 8, X); \
+ DECL_VLDX(poly, 16, 4, X); \
+ DECL_VLDX_CRYPTO(poly, 64, 1, X); \
+ DECL_VLDX(float, 32, 2, X); \
+ DECL_VLDX(int, 8, 16, X); \
+ DECL_VLDX(int, 16, 8, X); \
+ DECL_VLDX(int, 32, 4, X); \
+ DECL_VLDX(uint, 8, 16, X); \
+ DECL_VLDX(uint, 16, 8, X); \
+ DECL_VLDX(uint, 32, 4, X); \
+ DECL_VLDX(poly, 8, 16, X); \
+ DECL_VLDX(poly, 16, 8, X); \
+ AARCH64_ONLY(DECL_VLDX_CRYPTO(poly, 64, 2, X)); \
DECL_VLDX(float, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -456,26 +517,28 @@ void exec_vldX (void)
#define DECL_ALL_VLDX(X) DECL_ALL_VLDX_NO_FP16(X)
#endif
-#define TEST_ALL_VLDX_NO_FP16(X) \
- TEST_VLDX(, int, s, 8, 8, X); \
- TEST_VLDX(, int, s, 16, 4, X); \
- TEST_VLDX(, int, s, 32, 2, X); \
- TEST_VLDX(, int, s, 64, 1, X); \
- TEST_VLDX(, uint, u, 8, 8, X); \
- TEST_VLDX(, uint, u, 16, 4, X); \
- TEST_VLDX(, uint, u, 32, 2, X); \
- TEST_VLDX(, uint, u, 64, 1, X); \
- TEST_VLDX(, poly, p, 8, 8, X); \
- TEST_VLDX(, poly, p, 16, 4, X); \
- TEST_VLDX(, float, f, 32, 2, X); \
- TEST_VLDX(q, int, s, 8, 16, X); \
- TEST_VLDX(q, int, s, 16, 8, X); \
- TEST_VLDX(q, int, s, 32, 4, X); \
- TEST_VLDX(q, uint, u, 8, 16, X); \
- TEST_VLDX(q, uint, u, 16, 8, X); \
- TEST_VLDX(q, uint, u, 32, 4, X); \
- TEST_VLDX(q, poly, p, 8, 16, X); \
- TEST_VLDX(q, poly, p, 16, 8, X); \
+#define TEST_ALL_VLDX_NO_FP16(X) \
+ TEST_VLDX(, int, s, 8, 8, X); \
+ TEST_VLDX(, int, s, 16, 4, X); \
+ TEST_VLDX(, int, s, 32, 2, X); \
+ TEST_VLDX(, int, s, 64, 1, X); \
+ TEST_VLDX(, uint, u, 8, 8, X); \
+ TEST_VLDX(, uint, u, 16, 4, X); \
+ TEST_VLDX(, uint, u, 32, 2, X); \
+ TEST_VLDX(, uint, u, 64, 1, X); \
+ TEST_VLDX(, poly, p, 8, 8, X); \
+ TEST_VLDX(, poly, p, 16, 4, X); \
+ TEST_VLDX_CRYPTO(, poly, p, 64, 1, X); \
+ TEST_VLDX(, float, f, 32, 2, X); \
+ TEST_VLDX(q, int, s, 8, 16, X); \
+ TEST_VLDX(q, int, s, 16, 8, X); \
+ TEST_VLDX(q, int, s, 32, 4, X); \
+ TEST_VLDX(q, uint, u, 8, 16, X); \
+ TEST_VLDX(q, uint, u, 16, 8, X); \
+ TEST_VLDX(q, uint, u, 32, 4, X); \
+ TEST_VLDX(q, poly, p, 8, 16, X); \
+ TEST_VLDX(q, poly, p, 16, 8, X); \
+ AARCH64_ONLY(TEST_VLDX_CRYPTO(q, poly, p, 64, 2, X)); \
TEST_VLDX(q, float, f, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -487,26 +550,28 @@ void exec_vldX (void)
#define TEST_ALL_VLDX(X) TEST_ALL_VLDX_NO_FP16(X)
#endif
-#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \
- TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
- TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
- TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \
- TEST_EXTRA_CHUNK(int, 64, 1, X, Y); \
- TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \
- TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \
- TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
- TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
- TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
- TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
- TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
- TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \
- TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
- TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
- TEST_EXTRA_CHUNK(uint, 8, 16, X, Y); \
- TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
- TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
- TEST_EXTRA_CHUNK(poly, 8, 16, X, Y); \
- TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \
+ TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
+ TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \
+ TEST_EXTRA_CHUNK(int, 64, 1, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
+ TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
+ TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y); \
+ TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
+ TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \
+ TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
+ TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 8, 16, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
+ TEST_EXTRA_CHUNK(poly, 8, 16, X, Y); \
+ TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+ AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 2, X, Y)); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -519,27 +584,29 @@ void exec_vldX (void)
#endif
/* vldX supports all vector types except [u]int64x2. */
-#define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment) \
- CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
- CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
- CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
- CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \
- CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
- CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
- CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
- CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
- CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
- CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
- CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
- \
- CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
- CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
- CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
- CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \
- CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
- CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
- CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
- CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+#define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment) \
+ CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
+ \
+ CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment)); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -580,6 +647,10 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+ PAD(buffer_vld2_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
PAD(buffer_vld2_pad, float, 16, 4);
@@ -607,6 +678,10 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined (__aarch64__)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 2);
+ PAD(buffer_vld2_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
PAD(buffer_vld2_pad, float, 16, 8);
@@ -635,6 +710,10 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+ PAD(buffer_vld3_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
PAD(buffer_vld3_pad, float, 16, 4);
@@ -662,6 +741,10 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 2);
+ PAD(buffer_vld3_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
PAD(buffer_vld3_pad, float, 16, 8);
@@ -690,6 +773,10 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+ PAD(buffer_vld4_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
PAD(buffer_vld4_pad, float, 16, 4);
@@ -717,6 +804,10 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined (__aarch64__)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 2);
+ PAD(buffer_vld4_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
PAD(buffer_vld4_pad, float, 16, 8);
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,9 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = {0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -36,6 +40,9 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -56,6 +63,9 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
0xf1, 0xf2, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff2, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xcc00 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -76,6 +86,9 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
0xf0, 0xf1, 0xf2, 0xf0 };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2,
0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xcb80, 0xcb00, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 };
@@ -96,6 +109,9 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
0xf2, 0xf0, 0xf1, 0xf2 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0,
0xfff1, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xcc00, 0xcb80, 0xcb00 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 };
@@ -114,6 +130,9 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -131,6 +150,9 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -148,6 +170,9 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -165,6 +190,9 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -197,6 +225,16 @@ void exec_vldX_dup (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define TEST_VLDX_DUP_CRYPTO(Q, T1, T2, W, N, X) TEST_VLDX_DUP(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y) TEST_EXTRA_CHUNK(T1, W, N, X,Y)
+#define DECL_VLDX_DUP_CRYPTO(T1, W, N, X) DECL_VLDX_DUP(T1, W, N, X)
+#else
+#define TEST_VLDX_DUP_CRYPTO(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y)
+#define DECL_VLDX_DUP_CRYPTO(T1, W, N, X)
+#endif
+
#define DECL_ALL_VLDX_DUP_NO_FP16(X) \
DECL_VLDX_DUP(int, 8, 8, X); \
DECL_VLDX_DUP(int, 16, 4, X); \
@@ -208,6 +246,7 @@ void exec_vldX_dup (void)
DECL_VLDX_DUP(uint, 64, 1, X); \
DECL_VLDX_DUP(poly, 8, 8, X); \
DECL_VLDX_DUP(poly, 16, 4, X); \
+ DECL_VLDX_DUP_CRYPTO(poly, 64, 1, X); \
DECL_VLDX_DUP(float, 32, 2, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -229,6 +268,7 @@ void exec_vldX_dup (void)
TEST_VLDX_DUP(, uint, u, 64, 1, X); \
TEST_VLDX_DUP(, poly, p, 8, 8, X); \
TEST_VLDX_DUP(, poly, p, 16, 4, X); \
+ TEST_VLDX_DUP_CRYPTO(, poly, p, 64, 1, X); \
TEST_VLDX_DUP(, float, f, 32, 2, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -250,6 +290,7 @@ void exec_vldX_dup (void)
TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -272,6 +313,7 @@ void exec_vldX_dup (void)
CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -313,6 +355,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+ PAD(buffer_vld2_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
PAD(buffer_vld2_pad, float, 16, 4);
@@ -340,6 +386,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 2);
+ PAD(buffer_vld2_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
PAD(buffer_vld2_pad, float, 16, 8);
@@ -368,6 +418,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+ PAD(buffer_vld3_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
PAD(buffer_vld3_pad, float, 16, 4);
@@ -395,6 +449,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 2);
+ PAD(buffer_vld3_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
PAD(buffer_vld3_pad, float, 16, 8);
@@ -423,6 +481,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+ PAD(buffer_vld4_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
PAD(buffer_vld4_pad, float, 16, 4);
@@ -450,6 +512,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 2);
+ PAD(buffer_vld4_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
PAD(buffer_vld4_pad, float, 16, 8);
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,11 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -47,6 +53,11 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -76,6 +87,11 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -105,6 +121,11 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xf0, 0xf1, 0xf2, 0xaa };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -134,6 +155,11 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
@@ -163,6 +189,11 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -192,6 +223,11 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -221,6 +257,11 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -250,6 +291,11 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
@@ -279,6 +325,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
#endif
@@ -295,6 +344,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
#endif
@@ -311,6 +363,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
#endif
@@ -356,6 +411,16 @@ void exec_vldX_lane (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VLDX_LANE_CRYPTO(T1, W, N, X) DECL_VLDX_LANE(T1, W, N, X)
+#define TEST_VLDX_LANE_CRYPTO(Q, T1, T2, W, N, X, L) TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y) TEST_EXTRA_CHUNK(T1, W, N, X, Y)
+#else
+#define DECL_VLDX_LANE_CRYPTO(T1, W, N, X)
+#define TEST_VLDX_LANE_CRYPTO(Q, T1, T2, W, N, X, L)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant. */
#define DECL_ALL_VLDX_LANE_NO_FP16(X) \
DECL_VLDX_LANE(int, 8, 8, X); \
@@ -366,11 +431,13 @@ void exec_vldX_lane (void)
DECL_VLDX_LANE(uint, 32, 2, X); \
DECL_VLDX_LANE(poly, 8, 8, X); \
DECL_VLDX_LANE(poly, 16, 4, X); \
+ AARCH64_ONLY(DECL_VLDX_LANE(poly, 64, 1, X)); \
DECL_VLDX_LANE(int, 16, 8, X); \
DECL_VLDX_LANE(int, 32, 4, X); \
DECL_VLDX_LANE(uint, 16, 8, X); \
DECL_VLDX_LANE(uint, 32, 4, X); \
DECL_VLDX_LANE(poly, 16, 8, X); \
+ AARCH64_ONLY(DECL_VLDX_LANE_CRYPTO(poly, 64, 2, X)); \
DECL_VLDX_LANE(float, 32, 2, X); \
DECL_VLDX_LANE(float, 32, 4, X)
@@ -400,11 +467,13 @@ void exec_vldX_lane (void)
TEST_VLDX_LANE(, uint, u, 32, 2, X, 1); \
TEST_VLDX_LANE(, poly, p, 8, 8, X, 4); \
TEST_VLDX_LANE(, poly, p, 16, 4, X, 3); \
+ AARCH64_ONLY(TEST_VLDX_LANE_CRYPTO(, poly, p, 64, 1, X, 0));\
TEST_VLDX_LANE(q, int, s, 16, 8, X, 6); \
TEST_VLDX_LANE(q, int, s, 32, 4, X, 2); \
TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \
TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \
TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5); \
+ AARCH64_ONLY(TEST_VLDX_LANE_CRYPTO(q, poly, p, 64, 2, X, 0));\
TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \
TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
@@ -426,11 +495,13 @@ void exec_vldX_lane (void)
TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y)); \
TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+ AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 2, X, Y)); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
@@ -453,12 +524,14 @@ void exec_vldX_lane (void)
CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment)); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment)); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -489,11 +562,17 @@ void exec_vldX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(DUMMY_ARRAY(buffer_src, poly, 64, 1, 4));
+#endif
DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(DUMMY_ARRAY(buffer_src, poly, 64, 2, 4));
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
DUMMY_ARRAY(buffer_src, float, 16, 8, 4);
@@ -1,6 +1,7 @@
/* This file contains tests for the vreinterpret *p128 intrinsics. */
/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* { dg-add-options arm_crypto } */
#include <arm_neon.h>
@@ -78,9 +79,7 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
int main (void)
{
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
- DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();
@@ -1,6 +1,7 @@
/* This file contains tests for the vreinterpret *p64 intrinsics. */
/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* { dg-add-options arm_crypto } */
#include <arm_neon.h>
@@ -121,11 +122,7 @@ int main (void)
CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
- DECL_VARIABLE(vreint_vector, poly, 64, 1);
- DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x33, 0x33, 0x33,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
@@ -25,7 +29,7 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff5, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0x33333333,
- 0x33333333, 0x33333333 };
+ 0x33333333, 0x33333333 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x3333333333333333 };
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
@@ -43,6 +47,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0x3333333333333333 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc900, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333,
@@ -72,6 +80,9 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(, uint, u, 64, 1, 0);
TEST_VST1_LANE(, poly, p, 8, 8, 6);
TEST_VST1_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VST1_LANE(, poly, p, 64, 1, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VST1_LANE(, float, f, 16, 4, 1);
#endif
@@ -87,6 +98,9 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(q, uint, u, 64, 2, 0);
TEST_VST1_LANE(q, poly, p, 8, 16, 10);
TEST_VST1_LANE(q, poly, p, 16, 8, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VST1_LANE(q, poly, p, 64, 2, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VST1_LANE(q, float, f, 16, 8, 6);
#endif
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results for vst2, chunk 0. */
VECT_VAR_DECL(expected_st2_0,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
@@ -14,6 +15,9 @@ VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
@@ -42,6 +46,9 @@ VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st2_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -68,6 +75,9 @@ VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0 };
VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
@@ -97,6 +107,9 @@ VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 };
VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st3_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 };
VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -123,6 +136,9 @@ VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_st3_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -149,6 +165,9 @@ VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
@@ -178,6 +197,9 @@ VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st4_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -204,6 +226,9 @@ VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_st4_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -230,6 +255,9 @@ VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_st4_3,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -256,6 +284,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
#endif
@@ -272,6 +303,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
#endif
@@ -288,6 +322,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
#endif
@@ -336,6 +373,19 @@ void exec_vstX_lane (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y) \
+ TEST_EXTRA_CHUNK(T1, W, N, X, Y)
+#define TEST_VSTX_LANE_CRYPTO(Q, T1, T2, W, N, X, L) \
+ TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)
+#define DECL_VSTX_LANE_CRYPTO(T1, W, N, X) \
+ DECL_VSTX_LANE(T1, W, N, X)
+#else
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y)
+#define TEST_VSTX_LANE_CRYPTO(Q, T1, T2, W, N, X, L)
+#define DECL_VSTX_LANE_CRYPTO(T1, W, N, X)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant,
nor 128 bits vectors of int8/uint8/poly8. */
#define DECL_ALL_VSTX_LANE_NO_FP16(X) \
@@ -347,12 +397,14 @@ void exec_vstX_lane (void)
DECL_VSTX_LANE(uint, 32, 2, X); \
DECL_VSTX_LANE(poly, 8, 8, X); \
DECL_VSTX_LANE(poly, 16, 4, X); \
+ DECL_VSTX_LANE_CRYPTO(poly, 64, 1, X); \
DECL_VSTX_LANE(float, 32, 2, X); \
DECL_VSTX_LANE(int, 16, 8, X); \
DECL_VSTX_LANE(int, 32, 4, X); \
DECL_VSTX_LANE(uint, 16, 8, X); \
DECL_VSTX_LANE(uint, 32, 4, X); \
DECL_VSTX_LANE(poly, 16, 8, X); \
+ DECL_VSTX_LANE_CRYPTO(poly, 64, 2, X); \
DECL_VSTX_LANE(float, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -378,6 +430,7 @@ void exec_vstX_lane (void)
TEST_VSTX_LANE(, uint, u, 32, 2, X, 1); \
TEST_VSTX_LANE(, poly, p, 8, 8, X, 4); \
TEST_VSTX_LANE(, poly, p, 16, 4, X, 3); \
+ AARCH64_ONLY(TEST_VSTX_LANE_CRYPTO(, poly, p, 64, 1, X, 0));\
TEST_VSTX_LANE(q, int, s, 16, 8, X, 6); \
TEST_VSTX_LANE(q, int, s, 32, 4, X, 2); \
TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5); \
@@ -403,6 +456,7 @@ void exec_vstX_lane (void)
TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y)); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
@@ -434,6 +488,9 @@ void exec_vstX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
#endif
@@ -462,6 +519,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st2_0, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT);
@@ -485,6 +543,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st2_1, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT);
@@ -514,6 +573,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_0, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT);
@@ -538,6 +598,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_1, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT);
@@ -562,6 +623,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_2, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT);
@@ -591,6 +653,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_0, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT);
@@ -615,6 +678,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_1, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT);
@@ -639,6 +703,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_2, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT);
@@ -663,6 +728,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_3, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT);