@@ -1321,6 +1321,8 @@ FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */
bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr);
uint64_t mte_check1(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra);
+uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
+ uint64_t ptr, uintptr_t ra);
static inline int allocation_tag_from_addr(uint64_t ptr)
{
@@ -500,7 +500,170 @@ uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr)
/*
* Perform an MTE checked access for multiple logical accesses.
*/
+
+/**
+ * checkN:
+ * @tag: tag memory to test
+ * @odd: true to begin testing at tags at odd nibble
+ * @cmp: the tag to compare against
+ * @count: number of tags to test
+ *
+ * Return the number of successful tests.
+ * Thus a return value < @count indicates a failure.
+ *
+ * A note about sizes: count is expected to be small.
+ *
+ * The most common use will be LDP/STP of two integer registers,
+ * which means 16 bytes of memory touching at most 2 tags, but
+ * often the access is aligned and thus just 1 tag.
+ *
+ * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory,
+ * touching at most 5 tags. SVE LDR/STR (vector) with the default
+ * vector length is also 64 bytes; the maximum architectural length
+ * is 256 bytes touching at most 9 tags.
+ *
+ * The loop below uses 7 logical operations and 1 memory operation
+ * per tag pair. An implementation that loads an aligned word and
+ * uses masking to ignore adjacent tags requires 18 logical operations
+ * and thus does not begin to pay off until 6 tags.
+ * Which, according to the survey above, is unlikely to be common.
+ */
+static int checkN(uint8_t *mem, int odd, int cmp, int count)
+{
+ int n = 0, diff;
+
+ /* Replicate the test tag and compare. */
+ cmp *= 0x11;
+ diff = *mem++ ^ cmp;
+
+ if (odd) {
+ goto start_odd;
+ }
+
+ while (1) {
+ /* Test even tag. */
+ if (unlikely((diff) & 0x0f)) {
+ break;
+ }
+ if (++n == count) {
+ break;
+ }
+
+ start_odd:
+ /* Test odd tag. */
+ if (unlikely((diff) & 0xf0)) {
+ break;
+ }
+ if (++n == count) {
+ break;
+ }
+
+ diff = *mem++ ^ cmp;
+ }
+ return n;
+}
+
+uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
+ uint64_t ptr, uintptr_t ra)
+{
+ int mmu_idx, ptr_tag, bit55;
+ uint64_t ptr_last, ptr_end, prev_page, next_page;
+ uint64_t tag_first, tag_end;
+ uint64_t tag_byte_first, tag_byte_end;
+ uint32_t esize, total, tag_count, tag_size, n, c;
+ uint8_t *mem1, *mem2;
+ MMUAccessType type;
+
+ bit55 = extract64(ptr, 55, 1);
+
+ /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+ if (unlikely(!tbi_check(desc, bit55))) {
+ return ptr;
+ }
+
+ ptr_tag = allocation_tag_from_addr(ptr);
+
+ if (tcma_check(desc, bit55, ptr_tag)) {
+ goto done;
+ }
+
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
+ esize = FIELD_EX32(desc, MTEDESC, ESIZE);
+ total = FIELD_EX32(desc, MTEDESC, TSIZE);
+
+ /* Find the addr of the end of the access, and of the last element. */
+ ptr_end = ptr + total;
+ ptr_last = ptr_end - esize;
+
+ /* Round the bounds to the tag granule, and compute the number of tags. */
+ tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
+ tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE);
+ tag_count = (tag_end - tag_first) / TAG_GRANULE;
+
+ /* Round the bounds to twice the tag granule, and compute the bytes. */
+ tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE);
+ tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE);
+
+ /* Locate the page boundaries. */
+ prev_page = ptr & TARGET_PAGE_MASK;
+ next_page = prev_page + TARGET_PAGE_SIZE;
+
+ if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) {
+ /* Memory access stays on one page. */
+ tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE);
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total,
+ MMU_DATA_LOAD, tag_size, ra);
+ if (!mem1) {
+ goto done;
+ }
+ /* Perform all of the comparisons. */
+ n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count);
+ } else {
+ /* Memory access crosses to next page. */
+ tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE);
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr,
+ MMU_DATA_LOAD, tag_size, ra);
+
+ tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE);
+ mem2 = allocation_tag_mem(env, mmu_idx, next_page, type,
+ ptr_end - next_page,
+ MMU_DATA_LOAD, tag_size, ra);
+
+ /*
+ * Perform all of the comparisons.
+ * Note the possible but unlikely case of the operation spanning
+ * two pages that do not both have tagging enabled.
+ */
+ n = c = (next_page - tag_first) / TAG_GRANULE;
+ if (mem1) {
+ n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c);
+ }
+ if (n == c) {
+ if (!mem2) {
+ goto done;
+ }
+ n += checkN(mem2, 0, ptr_tag, tag_count - c);
+ }
+ }
+
+ /*
+ * If we failed, we know which granule. Compute the element that
+ * is first in that granule, and signal failure on that element.
+ */
+ if (unlikely(n < tag_count)) {
+ uint64_t fail_ofs;
+
+ fail_ofs = tag_first + n * TAG_GRANULE - ptr;
+ fail_ofs = ROUND_UP(fail_ofs, esize);
+ mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra);
+ }
+
+ done:
+ return useronly_clean_ptr(ptr);
+}
+
uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr)
{
- return ptr;
+ return mte_checkN(env, desc, ptr, GETPC());
}