@@ -59,6 +59,13 @@ static inline uint64_t expand_pred_b(uint8_t byte)
return expand_pred_b_data[byte];
}
+/* Similarly for half-word elements. */
+extern const uint64_t expand_pred_h_data[0x55 + 1];
+static inline uint64_t expand_pred_h(uint8_t byte)
+{
+ return expand_pred_h_data[byte & 0x55];
+}
+
static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
{
uint64_t *d = vd + opr_sz;
@@ -103,35 +103,6 @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
return flags;
}
-/* Similarly for half-word elements.
- * for (i = 0; i < 256; ++i) {
- * unsigned long m = 0;
- * if (i & 0xaa) {
- * continue;
- * }
- * for (j = 0; j < 8; j += 2) {
- * if ((i >> j) & 1) {
- * m |= 0xfffful << (j << 3);
- * }
- * }
- * printf("[0x%x] = 0x%016lx,\n", i, m);
- * }
- */
-static inline uint64_t expand_pred_h(uint8_t byte)
-{
- static const uint64_t word[] = {
- [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000,
- [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000,
- [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000,
- [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000,
- [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000,
- [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000,
- [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000,
- [0x55] = 0xffffffffffffffff,
- };
- return word[byte & 0x55];
-}
-
/* Similarly for single word elements. */
static inline uint64_t expand_pred_s(uint8_t byte)
{
@@ -127,6 +127,32 @@ const uint64_t expand_pred_b_data[256] = {
0xffffffffffffffff,
};
+/*
+ * Similarly for half-word elements.
+ * for (i = 0; i < 256; ++i) {
+ * unsigned long m = 0;
+ * if (i & 0xaa) {
+ * continue;
+ * }
+ * for (j = 0; j < 8; j += 2) {
+ * if ((i >> j) & 1) {
+ * m |= 0xfffful << (j << 3);
+ * }
+ * }
+ * printf("[0x%x] = 0x%016lx,\n", i, m);
+ * }
+ */
+const uint64_t expand_pred_h_data[0x55 + 1] = {
+ [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000,
+ [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000,
+ [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000,
+ [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000,
+ [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000,
+ [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000,
+ [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000,
+ [0x55] = 0xffffffffffffffff,
+};
+
/* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */
int8_t do_sqrdmlah_b(int8_t src1, int8_t src2, int8_t src3,
bool neg, bool round)