@@ -2654,17 +2654,47 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
*/
TCGLabel *fail_label = gen_new_label();
TCGLabel *done_label = gen_new_label();
- TCGv_i64 tmp, dirty_addr, clean_addr;
+ TCGv_i64 tmp, clean_addr;
MemOp memop;
- memop = (size + is_pair) | MO_ALIGN;
- memop = finalize_memop(s, memop);
-
- dirty_addr = cpu_reg_sp(s, rn);
- clean_addr = gen_mte_check1(s, dirty_addr, true, rn != 31, memop);
+ /*
+ * FIXME: We are out of spec here. We have recorded only the address
+ * from load_exclusive, not the entire range, and we assume that the
+ * size of the access on both sides match. The architecture allows the
+ * store to be smaller than the load, so long as the stored bytes are
+ * within the range recorded by the load.
+ */
+ /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
+ clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
+ /*
+ * The write, and any associated faults, only happen if the virtual
+ * and physical addresses pass the exclusive monitor check. These
+ * faults are exceedingly unlikely, because normally the guest uses
+ * the exact same address register for the load_exclusive, and we
+ * would have recognized these faults there.
+ *
+ * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
+ * unaligned 4-byte write within the range of an aligned 8-byte load.
+ * With LSE2, the store would need to cross a 16-byte boundary when the
+ * load did not, which would mean the store is outside the range
+ * recorded for the monitor, which would have failed a corrected monitor
+ * check above. For now, we assume no size change and retain the
+ * MO_ALIGN to let tcg know what we checked in the load_exclusive.
+ *
+ * It is possible to trigger an MTE fault, by performing the load with
+ * a virtual address with a valid tag and performing the store with the
+ * same virtual address and a different invalid tag.
+ */
+ memop = size + is_pair;
+ if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
+ memop |= MO_ALIGN;
+ }
+ memop = finalize_memop(s, memop);
+ gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
+
tmp = tcg_temp_new_i64();
if (is_pair) {
if (size == 2) {
Push the mte check behind the exclusive_addr check. Document the several ways that we are still out of spec with this implementation. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/translate-a64.c | 42 ++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-)