@@ -100,6 +100,16 @@ typedef enum {
#endif
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
+#if defined(_WIN64)
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_VEC
+#elif TCG_TARGET_REG_BITS == 64
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
+#else
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
+#endif
extern bool have_bmi1;
extern bool have_popcnt;
@@ -115,6 +115,11 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
case TCG_CALL_RET_NORMAL:
tcg_debug_assert(slot >= 0 && slot <= 1);
return slot ? TCG_REG_EDX : TCG_REG_EAX;
+#ifdef _WIN64
+ case TCG_CALL_RET_BY_VEC:
+ tcg_debug_assert(slot == 0);
+ return TCG_REG_XMM0;
+#endif
default:
g_assert_not_reached();
}
@@ -1188,9 +1193,16 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
* The gvec infrastructure is asserts that v128 vector loads
* and stores use a 16-byte aligned offset. Validate that the
* final pointer is aligned by using an insn that will SIGSEGV.
+ *
+ * This specific instance is also used by TCG_CALL_RET_BY_VEC,
+ * for _WIN64, which must have SSE2 but may not have AVX.
*/
tcg_debug_assert(arg >= 16);
- tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
+ if (have_avx1) {
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVDQA_WxVx, arg, arg1, arg2);
+ }
break;
case TCG_TYPE_V256:
/*
@@ -1677,6 +1689,22 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
const TCGHelperInfo *info)
{
tcg_out_branch(s, 1, dest);
+
+#ifndef _WIN32
+ if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) {
+ /*
+ * The sysv i386 abi for struct return places a reference as the
+ * first argument of the stack, and pops that argument with the
+ * return statement. Since we want to retain the aligned stack
+ * pointer for the callee, we do not want to actually push that
+ * argument before the call but rely on the normal store to the
+ * stack slot. But we do need to compensate for the pop in order
+ * to reset our correct stack pointer value.
+ * Pushing a garbage value back onto the stack is quickest.
+ */
+ tcg_out_push(s, TCG_REG_EAX);
+ }
+#endif
}
static void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest)