Message ID | 20210416183106.1516563-2-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/arm mte fixes | expand |
On Fri, 16 Apr 2021 at 19:31, Richard Henderson <richard.henderson@linaro.org> wrote: > > We were incorrectly assuming that only the first byte of an MTE access > is checked against the tags. But per the ARM, unaligned accesses are > pre-decomposed into single-byte accesses. So by the time we reach the > actual MTE check in the ARM pseudocode, all accesses are aligned. > > Therefore, the first failure is always either the first byte of the > access, or the first byte of the granule. > > In addition, some of the arithmetic is off for last-first -> count. > This does not become directly visible until a later patch that passes > single bytes into this function, so ptr == ptr_last. > > Buglink: https://bugs.launchpad.net/bugs/1921948 > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/mte_helper.c | 38 +++++++++++++++++--------------------- > 1 file changed, 17 insertions(+), 21 deletions(-) > > diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c > index 8be17e1b70..c87717127c 100644 > --- a/target/arm/mte_helper.c > +++ b/target/arm/mte_helper.c > @@ -757,10 +757,10 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, > uint64_t ptr, uintptr_t ra) > { > int mmu_idx, ptr_tag, bit55; > - uint64_t ptr_last, ptr_end, prev_page, next_page; > - uint64_t tag_first, tag_end; > - uint64_t tag_byte_first, tag_byte_end; > - uint32_t esize, total, tag_count, tag_size, n, c; > + uint64_t ptr_last, prev_page, next_page; > + uint64_t tag_first, tag_last; > + uint64_t tag_byte_first, tag_byte_last; > + uint32_t total, tag_count, tag_size, n, c; > uint8_t *mem1, *mem2; > MMUAccessType type; > > @@ -779,29 +779,27 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, > > mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); > type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; > - esize = FIELD_EX32(desc, MTEDESC, ESIZE); > total = FIELD_EX32(desc, MTEDESC, TSIZE); > > /* Find the addr of the end of the access, and of the last element. */ > - ptr_end = ptr + total; > - ptr_last = ptr_end - esize; > + ptr_last = ptr + total - 1; Code change means the comment needs updating, since we're no longer finding two things. Change to just /* Find the addr of the end of the access */ ? > > /* Round the bounds to the tag granule, and compute the number of tags. */ > tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); > - tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE); > - tag_count = (tag_end - tag_first) / TAG_GRANULE; > + tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE); > + tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; > > /* Round the bounds to twice the tag granule, and compute the bytes. */ > tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); > - tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE); > + tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE); > > /* Locate the page boundaries. */ > prev_page = ptr & TARGET_PAGE_MASK; > next_page = prev_page + TARGET_PAGE_SIZE; > > - if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) { > + if (likely(tag_last - prev_page <= TARGET_PAGE_SIZE)) { > /* Memory access stays on one page. */ > - tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE); > + tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; > mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, > MMU_DATA_LOAD, tag_size, ra); > if (!mem1) { > @@ -815,9 +813,9 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, > mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, > MMU_DATA_LOAD, tag_size, ra); > > - tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE); > + tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1; > mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, > - ptr_end - next_page, > + ptr_last - next_page + 1, > MMU_DATA_LOAD, tag_size, ra); > > /* > @@ -838,15 +836,13 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, > } > > /* > - * If we failed, we know which granule. Compute the element that > - * is first in that granule, and signal failure on that element. > + * If we failed, we know which granule. For the first granule, the > + * failure address is @ptr, the first byte accessed. Otherwise the > + * failure address is the first byte of the nth granule. > */ > if (unlikely(n < tag_count)) { > - uint64_t fail_ofs; > - > - fail_ofs = tag_first + n * TAG_GRANULE - ptr; > - fail_ofs = ROUND_UP(fail_ofs, esize); > - mte_check_fail(env, desc, ptr + fail_ofs, ra); > + uint64_t fault = (n == 0 ? ptr : tag_first + n * TAG_GRANULE); > + mte_check_fail(env, desc, fault, ra); > } > This pointer arithmetic makes my head hurt. But I think it's right now. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> thanks -- PMM
On 4/19/21 9:26 AM, Peter Maydell wrote: >> @@ -779,29 +779,27 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, >> >> mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); >> type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; >> - esize = FIELD_EX32(desc, MTEDESC, ESIZE); >> total = FIELD_EX32(desc, MTEDESC, TSIZE); >> >> /* Find the addr of the end of the access, and of the last element. */ >> - ptr_end = ptr + total; >> - ptr_last = ptr_end - esize; >> + ptr_last = ptr + total - 1; > > Code change means the comment needs updating, since we're no longer > finding two things. Change to just > /* Find the addr of the end of the access */ > > ? Yep, good catch. r~
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 8be17e1b70..c87717127c 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -757,10 +757,10 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) { int mmu_idx, ptr_tag, bit55; - uint64_t ptr_last, ptr_end, prev_page, next_page; - uint64_t tag_first, tag_end; - uint64_t tag_byte_first, tag_byte_end; - uint32_t esize, total, tag_count, tag_size, n, c; + uint64_t ptr_last, prev_page, next_page; + uint64_t tag_first, tag_last; + uint64_t tag_byte_first, tag_byte_last; + uint32_t total, tag_count, tag_size, n, c; uint8_t *mem1, *mem2; MMUAccessType type; @@ -779,29 +779,27 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; - esize = FIELD_EX32(desc, MTEDESC, ESIZE); total = FIELD_EX32(desc, MTEDESC, TSIZE); /* Find the addr of the end of the access, and of the last element. */ - ptr_end = ptr + total; - ptr_last = ptr_end - esize; + ptr_last = ptr + total - 1; /* Round the bounds to the tag granule, and compute the number of tags. */ tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); - tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE); - tag_count = (tag_end - tag_first) / TAG_GRANULE; + tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE); + tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; /* Round the bounds to twice the tag granule, and compute the bytes. */ tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); - tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE); + tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE); /* Locate the page boundaries. */ prev_page = ptr & TARGET_PAGE_MASK; next_page = prev_page + TARGET_PAGE_SIZE; - if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) { + if (likely(tag_last - prev_page <= TARGET_PAGE_SIZE)) { /* Memory access stays on one page. */ - tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE); + tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, MMU_DATA_LOAD, tag_size, ra); if (!mem1) { @@ -815,9 +813,9 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, MMU_DATA_LOAD, tag_size, ra); - tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE); + tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1; mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, - ptr_end - next_page, + ptr_last - next_page + 1, MMU_DATA_LOAD, tag_size, ra); /* @@ -838,15 +836,13 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, } /* - * If we failed, we know which granule. Compute the element that - * is first in that granule, and signal failure on that element. + * If we failed, we know which granule. For the first granule, the + * failure address is @ptr, the first byte accessed. Otherwise the + * failure address is the first byte of the nth granule. */ if (unlikely(n < tag_count)) { - uint64_t fail_ofs; - - fail_ofs = tag_first + n * TAG_GRANULE - ptr; - fail_ofs = ROUND_UP(fail_ofs, esize); - mte_check_fail(env, desc, ptr + fail_ofs, ra); + uint64_t fault = (n == 0 ? ptr : tag_first + n * TAG_GRANULE); + mte_check_fail(env, desc, fault, ra); } done:
We were incorrectly assuming that only the first byte of an MTE access is checked against the tags. But per the ARM, unaligned accesses are pre-decomposed into single-byte accesses. So by the time we reach the actual MTE check in the ARM pseudocode, all accesses are aligned. Therefore, the first failure is always either the first byte of the access, or the first byte of the granule. In addition, some of the arithmetic is off for last-first -> count. This does not become directly visible until a later patch that passes single bytes into this function, so ptr == ptr_last. Buglink: https://bugs.launchpad.net/bugs/1921948 Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/mte_helper.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) -- 2.25.1