Message ID | 20190522222821.23850-17-richard.henderson@linaro.org |
---|---|
State | Accepted |
Commit | 11e2bfef799024be4a08fcf6797fe0b22fb16b58 |
Headers | show |
Series | tcg queued patches | expand |
On 23.05.19 00:28, Richard Henderson wrote: > This instruction raises #GP, aka SIGSEGV, if the effective address > is not aligned to 16-bytes. > > We have assertions in tcg-op-gvec.c that the offset from ENV is > aligned, for vector types <= V128. But the offset itself does not > validate that the final pointer is aligned -- one must also remember > to use the QEMU_ALIGNED() attribute on the vector member within ENV. > > PowerPC Altivec has vector load/store instructions that silently > discard the low 4 bits of the address, making alignment mistakes > difficult to discover. Aid that by making the most popular host > visibly signal the error. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/i386/tcg-target.inc.c | 24 ++++++++++++++++++++++-- > 1 file changed, 22 insertions(+), 2 deletions(-) > > diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c > index 6ec5e60448..c0443da4af 100644 > --- a/tcg/i386/tcg-target.inc.c > +++ b/tcg/i386/tcg-target.inc.c > @@ -1082,14 +1082,24 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, > } > /* FALLTHRU */ > case TCG_TYPE_V64: > + /* There is no instruction that can validate 8-byte alignment. */ > tcg_debug_assert(ret >= 16); > tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2); > break; > case TCG_TYPE_V128: > + /* > + * The gvec infrastructure is asserts that v128 vector loads > + * and stores use a 16-byte aligned offset. Validate that the > + * final pointer is aligned by using an insn that will SIGSEGV. > + */ > tcg_debug_assert(ret >= 16); > - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx, ret, 0, arg1, arg2); > + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2); > break; > case TCG_TYPE_V256: > + /* > + * The gvec infrastructure only requires 16-byte alignment, > + * so here we must use an unaligned load. > + */ > tcg_debug_assert(ret >= 16); > tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL, > ret, 0, arg1, arg2); > @@ -1117,14 +1127,24 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, > } > /* FALLTHRU */ > case TCG_TYPE_V64: > + /* There is no instruction that can validate 8-byte alignment. */ > tcg_debug_assert(arg >= 16); > tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2); > break; > case TCG_TYPE_V128: > + /* > + * The gvec infrastructure is asserts that v128 vector loads > + * and stores use a 16-byte aligned offset. Validate that the > + * final pointer is aligned by using an insn that will SIGSEGV. > + */ > tcg_debug_assert(arg >= 16); > - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx, arg, 0, arg1, arg2); > + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); > break; > case TCG_TYPE_V256: > + /* > + * The gvec infrastructure only requires 16-byte alignment, > + * so here we must use an unaligned store. > + */ > tcg_debug_assert(arg >= 16); > tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL, > arg, 0, arg1, arg2); > This is the problematic patch. Haven't looked into the details yet, so I can't tell what's wrong. Maybe really an alignemnt issue? -- Thanks, David / dhildenb
On 28.05.19 19:28, David Hildenbrand wrote: > On 23.05.19 00:28, Richard Henderson wrote: >> This instruction raises #GP, aka SIGSEGV, if the effective address >> is not aligned to 16-bytes. >> >> We have assertions in tcg-op-gvec.c that the offset from ENV is >> aligned, for vector types <= V128. But the offset itself does not >> validate that the final pointer is aligned -- one must also remember >> to use the QEMU_ALIGNED() attribute on the vector member within ENV. >> >> PowerPC Altivec has vector load/store instructions that silently >> discard the low 4 bits of the address, making alignment mistakes >> difficult to discover. Aid that by making the most popular host >> visibly signal the error. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> tcg/i386/tcg-target.inc.c | 24 ++++++++++++++++++++++-- >> 1 file changed, 22 insertions(+), 2 deletions(-) >> >> diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c >> index 6ec5e60448..c0443da4af 100644 >> --- a/tcg/i386/tcg-target.inc.c >> +++ b/tcg/i386/tcg-target.inc.c >> @@ -1082,14 +1082,24 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, >> } >> /* FALLTHRU */ >> case TCG_TYPE_V64: >> + /* There is no instruction that can validate 8-byte alignment. */ >> tcg_debug_assert(ret >= 16); >> tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2); >> break; >> case TCG_TYPE_V128: >> + /* >> + * The gvec infrastructure is asserts that v128 vector loads >> + * and stores use a 16-byte aligned offset. Validate that the >> + * final pointer is aligned by using an insn that will SIGSEGV. >> + */ >> tcg_debug_assert(ret >= 16); >> - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx, ret, 0, arg1, arg2); >> + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2); >> break; >> case TCG_TYPE_V256: >> + /* >> + * The gvec infrastructure only requires 16-byte alignment, >> + * so here we must use an unaligned load. >> + */ >> tcg_debug_assert(ret >= 16); >> tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL, >> ret, 0, arg1, arg2); >> @@ -1117,14 +1127,24 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, >> } >> /* FALLTHRU */ >> case TCG_TYPE_V64: >> + /* There is no instruction that can validate 8-byte alignment. */ >> tcg_debug_assert(arg >= 16); >> tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2); >> break; >> case TCG_TYPE_V128: >> + /* >> + * The gvec infrastructure is asserts that v128 vector loads >> + * and stores use a 16-byte aligned offset. Validate that the >> + * final pointer is aligned by using an insn that will SIGSEGV. >> + */ >> tcg_debug_assert(arg >= 16); >> - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx, arg, 0, arg1, arg2); >> + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); >> break; >> case TCG_TYPE_V256: >> + /* >> + * The gvec infrastructure only requires 16-byte alignment, >> + * so here we must use an unaligned store. >> + */ >> tcg_debug_assert(arg >= 16); >> tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL, >> arg, 0, arg1, arg2); >> > > This is the problematic patch. Haven't looked into the details yet, so I > can't tell what's wrong. Maybe really an alignemnt issue? > Okay, looks like "vregs" in "struct CPUS390XState" is always aligned to 8, but not to 16 bytes. And that in return is the case, because "CPUS390XState env" is not aligned to 16 bytes in "struct S390CPU" !!!!!!!! CPU: 0x55a5e3046ef0 !!!!!!!! ENV: 0x55a5e304f1a8 !!!!!!!! VREGS: 0x55a5e304f228 !!!!!!!! CPU: 0x55a5e3070bb0 !!!!!!!! ENV: 0x55a5e3078e68 !!!!!!!! VREGS: 0x55a5e3078ee8 !!!!!!!! CPU: 0x55a5e3098310 !!!!!!!! ENV: 0x55a5e30a05c8 !!!!!!!! VREGS: 0x55a5e30a0648 !!!!!!!! CPU: 0x55a5e30c0730 !!!!!!!! ENV: 0x55a5e30c89e8 !!!!!!!! VREGS: 0x55a5e30c8a68 !!!!!!!! CPU: 0x55a5e30e7c90 !!!!!!!! ENV: 0x55a5e30eff48 !!!!!!!! VREGS: 0x55a5e30effc8 !!!!!!!! CPU: 0x55a5e310eea0 !!!!!!!! ENV: 0x55a5e3117158 !!!!!!!! VREGS: 0x55a5e31171d8 !!!!!!!! CPU: 0x55a5e31361e0 !!!!!!!! ENV: 0x55a5e313e498 !!!!!!!! VREGS: 0x55a5e313e518 !!!!!!!! CPU: 0x55a5e315d520 !!!!!!!! ENV: 0x55a5e31657d8 !!!!!!!! VREGS: 0x55a5e3165858 vregs is defined as: CPU_DoubleU vregs[32][2]; We either have to switch to a type that has a natural alignment of 16 bytes, or enforce alignment of "CPUS390XState env" to 16 bytes. What do you suggest? -- Thanks, David / dhildenb
On 28.05.19 20:33, David Hildenbrand wrote: > On 28.05.19 19:28, David Hildenbrand wrote: >> On 23.05.19 00:28, Richard Henderson wrote: >>> This instruction raises #GP, aka SIGSEGV, if the effective address >>> is not aligned to 16-bytes. >>> >>> We have assertions in tcg-op-gvec.c that the offset from ENV is >>> aligned, for vector types <= V128. But the offset itself does not >>> validate that the final pointer is aligned -- one must also remember >>> to use the QEMU_ALIGNED() attribute on the vector member within ENV. >>> >>> PowerPC Altivec has vector load/store instructions that silently >>> discard the low 4 bits of the address, making alignment mistakes >>> difficult to discover. Aid that by making the most popular host >>> visibly signal the error. >>> >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >>> --- >>> tcg/i386/tcg-target.inc.c | 24 ++++++++++++++++++++++-- >>> 1 file changed, 22 insertions(+), 2 deletions(-) >>> >>> diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c >>> index 6ec5e60448..c0443da4af 100644 >>> --- a/tcg/i386/tcg-target.inc.c >>> +++ b/tcg/i386/tcg-target.inc.c >>> @@ -1082,14 +1082,24 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, >>> } >>> /* FALLTHRU */ >>> case TCG_TYPE_V64: >>> + /* There is no instruction that can validate 8-byte alignment. */ >>> tcg_debug_assert(ret >= 16); >>> tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2); >>> break; >>> case TCG_TYPE_V128: >>> + /* >>> + * The gvec infrastructure is asserts that v128 vector loads >>> + * and stores use a 16-byte aligned offset. Validate that the >>> + * final pointer is aligned by using an insn that will SIGSEGV. >>> + */ >>> tcg_debug_assert(ret >= 16); >>> - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx, ret, 0, arg1, arg2); >>> + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2); >>> break; >>> case TCG_TYPE_V256: >>> + /* >>> + * The gvec infrastructure only requires 16-byte alignment, >>> + * so here we must use an unaligned load. >>> + */ >>> tcg_debug_assert(ret >= 16); >>> tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL, >>> ret, 0, arg1, arg2); >>> @@ -1117,14 +1127,24 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, >>> } >>> /* FALLTHRU */ >>> case TCG_TYPE_V64: >>> + /* There is no instruction that can validate 8-byte alignment. */ >>> tcg_debug_assert(arg >= 16); >>> tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2); >>> break; >>> case TCG_TYPE_V128: >>> + /* >>> + * The gvec infrastructure is asserts that v128 vector loads >>> + * and stores use a 16-byte aligned offset. Validate that the >>> + * final pointer is aligned by using an insn that will SIGSEGV. >>> + */ >>> tcg_debug_assert(arg >= 16); >>> - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx, arg, 0, arg1, arg2); >>> + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); >>> break; >>> case TCG_TYPE_V256: >>> + /* >>> + * The gvec infrastructure only requires 16-byte alignment, >>> + * so here we must use an unaligned store. >>> + */ >>> tcg_debug_assert(arg >= 16); >>> tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL, >>> arg, 0, arg1, arg2); >>> >> >> This is the problematic patch. Haven't looked into the details yet, so I >> can't tell what's wrong. Maybe really an alignemnt issue? >> > > Okay, looks like "vregs" in "struct CPUS390XState" is always aligned to > 8, but not to 16 bytes. > > And that in return is the case, because "CPUS390XState env" is not > aligned to 16 bytes in "struct S390CPU" > > > !!!!!!!! CPU: 0x55a5e3046ef0 > !!!!!!!! ENV: 0x55a5e304f1a8 > !!!!!!!! VREGS: 0x55a5e304f228 > !!!!!!!! CPU: 0x55a5e3070bb0 > !!!!!!!! ENV: 0x55a5e3078e68 > !!!!!!!! VREGS: 0x55a5e3078ee8 > !!!!!!!! CPU: 0x55a5e3098310 > !!!!!!!! ENV: 0x55a5e30a05c8 > !!!!!!!! VREGS: 0x55a5e30a0648 > !!!!!!!! CPU: 0x55a5e30c0730 > !!!!!!!! ENV: 0x55a5e30c89e8 > !!!!!!!! VREGS: 0x55a5e30c8a68 > !!!!!!!! CPU: 0x55a5e30e7c90 > !!!!!!!! ENV: 0x55a5e30eff48 > !!!!!!!! VREGS: 0x55a5e30effc8 > !!!!!!!! CPU: 0x55a5e310eea0 > !!!!!!!! ENV: 0x55a5e3117158 > !!!!!!!! VREGS: 0x55a5e31171d8 > !!!!!!!! CPU: 0x55a5e31361e0 > !!!!!!!! ENV: 0x55a5e313e498 > !!!!!!!! VREGS: 0x55a5e313e518 > !!!!!!!! CPU: 0x55a5e315d520 > !!!!!!!! ENV: 0x55a5e31657d8 > !!!!!!!! VREGS: 0x55a5e3165858 > > vregs is defined as: > > CPU_DoubleU vregs[32][2]; > > We either have to switch to a type that has a natural alignment of 16 > bytes, or enforce alignment of "CPUS390XState env" to 16 bytes. > > What do you suggest? FWIW, this seems to be the easiest way: diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index f0d9a6a36d..d363ae0fb3 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -66,7 +66,7 @@ struct CPUS390XState { * The floating point registers are part of the vector registers. * vregs[0][0] -> vregs[15][0] are 16 floating point registers */ - CPU_DoubleU vregs[32][2]; /* vector registers */ + CPU_DoubleU vregs[32][2] QEMU_ALIGNED(16); /* vector registers */ uint32_t aregs[16]; /* access registers */ uint8_t riccb[64]; /* runtime instrumentation control */ uint64_t gscb[4]; /* guarded storage control */ Makes it work for me again. -- Thanks, David / dhildenb
On 5/28/19 1:46 PM, David Hildenbrand wrote: > FWIW, this seems to be the easiest way: > > diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h > index f0d9a6a36d..d363ae0fb3 100644 > --- a/target/s390x/cpu.h > +++ b/target/s390x/cpu.h > @@ -66,7 +66,7 @@ struct CPUS390XState { > * The floating point registers are part of the vector registers. > * vregs[0][0] -> vregs[15][0] are 16 floating point registers > */ > - CPU_DoubleU vregs[32][2]; /* vector registers */ > + CPU_DoubleU vregs[32][2] QEMU_ALIGNED(16); /* vector registers */ > uint32_t aregs[16]; /* access registers */ > uint8_t riccb[64]; /* runtime instrumentation control */ > uint64_t gscb[4]; /* guarded storage control */ > > > Makes it work for me again. That's the right fix, and exactly the bug that I was hoping to find with 11e2bfef7990 ("tcg/i386: Use MOVDQA for TCG_TYPE_V128 load/store"). r~
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 6ec5e60448..c0443da4af 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -1082,14 +1082,24 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, } /* FALLTHRU */ case TCG_TYPE_V64: + /* There is no instruction that can validate 8-byte alignment. */ tcg_debug_assert(ret >= 16); tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2); break; case TCG_TYPE_V128: + /* + * The gvec infrastructure is asserts that v128 vector loads + * and stores use a 16-byte aligned offset. Validate that the + * final pointer is aligned by using an insn that will SIGSEGV. + */ tcg_debug_assert(ret >= 16); - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx, ret, 0, arg1, arg2); + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2); break; case TCG_TYPE_V256: + /* + * The gvec infrastructure only requires 16-byte alignment, + * so here we must use an unaligned load. + */ tcg_debug_assert(ret >= 16); tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL, ret, 0, arg1, arg2); @@ -1117,14 +1127,24 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, } /* FALLTHRU */ case TCG_TYPE_V64: + /* There is no instruction that can validate 8-byte alignment. */ tcg_debug_assert(arg >= 16); tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2); break; case TCG_TYPE_V128: + /* + * The gvec infrastructure is asserts that v128 vector loads + * and stores use a 16-byte aligned offset. Validate that the + * final pointer is aligned by using an insn that will SIGSEGV. + */ tcg_debug_assert(arg >= 16); - tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx, arg, 0, arg1, arg2); + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); break; case TCG_TYPE_V256: + /* + * The gvec infrastructure only requires 16-byte alignment, + * so here we must use an unaligned store. + */ tcg_debug_assert(arg >= 16); tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL, arg, 0, arg1, arg2);
This instruction raises #GP, aka SIGSEGV, if the effective address is not aligned to 16-bytes. We have assertions in tcg-op-gvec.c that the offset from ENV is aligned, for vector types <= V128. But the offset itself does not validate that the final pointer is aligned -- one must also remember to use the QEMU_ALIGNED() attribute on the vector member within ENV. PowerPC Altivec has vector load/store instructions that silently discard the low 4 bits of the address, making alignment mistakes difficult to discover. Aid that by making the most popular host visibly signal the error. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/i386/tcg-target.inc.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) -- 2.17.1