Message ID | 20200812065339.2030527-3-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/arm: Implement an IMPDEF pauth algorithm | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > Without hardware acceleration, a cryptographically strong > algorithm is too expensive for pauth_computepac. > > Even with hardware accel, we are not currently expecting > to link the linux-user binaries to any crypto libraries, > and doing so would generally make the --static build fail. > > So choose XXH64 as a reasonably quick and decent hash. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/pauth_helper.c | 75 ++++++++++++++++++++++++++++++++++++--- > 1 file changed, 70 insertions(+), 5 deletions(-) > > diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c > index 6dbab03768..f1a4389465 100644 > --- a/target/arm/pauth_helper.c > +++ b/target/arm/pauth_helper.c > @@ -207,8 +207,8 @@ static uint64_t tweak_inv_shuffle(uint64_t i) > return o; > } > > -static uint64_t pauth_computepac(uint64_t data, uint64_t modifier, > - ARMPACKey key) > +static uint64_t __attribute__((noinline)) > +pauth_computepac_architected(uint64_t data, uint64_t modifier, ARMPACKey key) > { > static const uint64_t RC[5] = { > 0x0000000000000000ull, > @@ -272,6 +272,71 @@ static uint64_t pauth_computepac(uint64_t data, uint64_t modifier, > return workingval; > } > > +/* > + * The XXH64 algorithm from > + * https://github.com/Cyan4973/xxHash/blob/v0.8.0/xxhash.h > + */ > +#define PRIME64_1 0x9E3779B185EBCA87ULL > +#define PRIME64_2 0xC2B2AE3D27D4EB4FULL > +#define PRIME64_3 0x165667B19E3779F9ULL > +#define PRIME64_4 0x85EBCA77C2B2AE63ULL > +#define PRIME64_5 0x27D4EB2F165667C5ULL > + > +static inline uint64_t XXH64_round(uint64_t acc, uint64_t input) > +{ > + return rol64(acc + input * PRIME64_2, 31) * PRIME64_1; > +} > + > +static inline uint64_t XXH64_mergeround(uint64_t acc, uint64_t val) > +{ > + return (acc ^ XXH64_round(0, val)) * PRIME64_1 + PRIME64_4; > +} > + > +static inline uint64_t XXH64_avalanche(uint64_t h64) > +{ > + h64 ^= h64 >> 33; > + h64 *= PRIME64_2; > + h64 ^= h64 >> 29; > + h64 *= PRIME64_3; > + /* h64 ^= h64 >> 32; -- does not affect high 64 for pauth */ > + return h64; > +} > + > +static uint64_t __attribute__((noinline)) > +pauth_computepac_impdef(uint64_t data, uint64_t modifier, ARMPACKey key) > +{ > + uint64_t v1 = 1 + PRIME64_1 + PRIME64_2; > + uint64_t v2 = 1 + PRIME64_2; > + uint64_t v3 = 1 + 0; > + uint64_t v4 = 1 - PRIME64_1; > + uint64_t h64; > + > + v1 = XXH64_round(v1, data); > + v2 = XXH64_round(v2, modifier); > + v3 = XXH64_round(v3, key.lo); > + v4 = XXH64_round(v4, key.hi); > + > + h64 = rol64(v1, 1) + rol64(v2, 7) + rol64(v3, 12) + rol64(v4, 18); > + h64 = XXH64_mergeround(h64, v1); > + h64 = XXH64_mergeround(h64, v2); > + h64 = XXH64_mergeround(h64, v3); > + h64 = XXH64_mergeround(h64, v4); > + > + return XXH64_avalanche(h64); > +} You might find it easier to #include "qemu/xxhash.h" which we use for tb hashing amongst other things. -- Alex Bennée
On 8/12/20 2:49 AM, Alex Bennée wrote: > > Richard Henderson <richard.henderson@linaro.org> writes: > >> Without hardware acceleration, a cryptographically strong >> algorithm is too expensive for pauth_computepac. >> >> Even with hardware accel, we are not currently expecting >> to link the linux-user binaries to any crypto libraries, >> and doing so would generally make the --static build fail. >> >> So choose XXH64 as a reasonably quick and decent hash. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/arm/pauth_helper.c | 75 ++++++++++++++++++++++++++++++++++++--- >> 1 file changed, 70 insertions(+), 5 deletions(-) >> >> diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c >> index 6dbab03768..f1a4389465 100644 >> --- a/target/arm/pauth_helper.c >> +++ b/target/arm/pauth_helper.c >> @@ -207,8 +207,8 @@ static uint64_t tweak_inv_shuffle(uint64_t i) >> return o; >> } >> >> -static uint64_t pauth_computepac(uint64_t data, uint64_t modifier, >> - ARMPACKey key) >> +static uint64_t __attribute__((noinline)) >> +pauth_computepac_architected(uint64_t data, uint64_t modifier, ARMPACKey key) >> { >> static const uint64_t RC[5] = { >> 0x0000000000000000ull, >> @@ -272,6 +272,71 @@ static uint64_t pauth_computepac(uint64_t data, uint64_t modifier, >> return workingval; >> } >> >> +/* >> + * The XXH64 algorithm from >> + * https://github.com/Cyan4973/xxHash/blob/v0.8.0/xxhash.h >> + */ >> +#define PRIME64_1 0x9E3779B185EBCA87ULL >> +#define PRIME64_2 0xC2B2AE3D27D4EB4FULL >> +#define PRIME64_3 0x165667B19E3779F9ULL >> +#define PRIME64_4 0x85EBCA77C2B2AE63ULL >> +#define PRIME64_5 0x27D4EB2F165667C5ULL >> + >> +static inline uint64_t XXH64_round(uint64_t acc, uint64_t input) >> +{ >> + return rol64(acc + input * PRIME64_2, 31) * PRIME64_1; >> +} >> + >> +static inline uint64_t XXH64_mergeround(uint64_t acc, uint64_t val) >> +{ >> + return (acc ^ XXH64_round(0, val)) * PRIME64_1 + PRIME64_4; >> +} >> + >> +static inline uint64_t XXH64_avalanche(uint64_t h64) >> +{ >> + h64 ^= h64 >> 33; >> + h64 *= PRIME64_2; >> + h64 ^= h64 >> 29; >> + h64 *= PRIME64_3; >> + /* h64 ^= h64 >> 32; -- does not affect high 64 for pauth */ >> + return h64; >> +} >> + >> +static uint64_t __attribute__((noinline)) >> +pauth_computepac_impdef(uint64_t data, uint64_t modifier, ARMPACKey key) >> +{ >> + uint64_t v1 = 1 + PRIME64_1 + PRIME64_2; >> + uint64_t v2 = 1 + PRIME64_2; >> + uint64_t v3 = 1 + 0; >> + uint64_t v4 = 1 - PRIME64_1; >> + uint64_t h64; >> + >> + v1 = XXH64_round(v1, data); >> + v2 = XXH64_round(v2, modifier); >> + v3 = XXH64_round(v3, key.lo); >> + v4 = XXH64_round(v4, key.hi); >> + >> + h64 = rol64(v1, 1) + rol64(v2, 7) + rol64(v3, 12) + rol64(v4, 18); >> + h64 = XXH64_mergeround(h64, v1); >> + h64 = XXH64_mergeround(h64, v2); >> + h64 = XXH64_mergeround(h64, v3); >> + h64 = XXH64_mergeround(h64, v4); >> + >> + return XXH64_avalanche(h64); >> +} > > You might find it easier to #include "qemu/xxhash.h" which we use for tb > hashing amongst other things. First, that's the 32-bit version, XXH32. Second, we define xxhash7 there; we would need xxhash8 here. r~
Richard Henderson <richard.henderson@linaro.org> writes: > On 8/12/20 2:49 AM, Alex Bennée wrote: >> >> Richard Henderson <richard.henderson@linaro.org> writes: >> >>> Without hardware acceleration, a cryptographically strong >>> algorithm is too expensive for pauth_computepac. >>> >>> Even with hardware accel, we are not currently expecting >>> to link the linux-user binaries to any crypto libraries, >>> and doing so would generally make the --static build fail. >>> >>> So choose XXH64 as a reasonably quick and decent hash. >>> >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >>> --- >>> target/arm/pauth_helper.c | 75 ++++++++++++++++++++++++++++++++++++--- >>> 1 file changed, 70 insertions(+), 5 deletions(-) >>> >>> diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c >>> index 6dbab03768..f1a4389465 100644 >>> --- a/target/arm/pauth_helper.c >>> +++ b/target/arm/pauth_helper.c >>> @@ -207,8 +207,8 @@ static uint64_t tweak_inv_shuffle(uint64_t i) >>> return o; >>> } >>> >>> -static uint64_t pauth_computepac(uint64_t data, uint64_t modifier, >>> - ARMPACKey key) >>> +static uint64_t __attribute__((noinline)) >>> +pauth_computepac_architected(uint64_t data, uint64_t modifier, ARMPACKey key) >>> { >>> static const uint64_t RC[5] = { >>> 0x0000000000000000ull, >>> @@ -272,6 +272,71 @@ static uint64_t pauth_computepac(uint64_t data, uint64_t modifier, >>> return workingval; >>> } >>> >>> +/* >>> + * The XXH64 algorithm from >>> + * https://github.com/Cyan4973/xxHash/blob/v0.8.0/xxhash.h >>> + */ >>> +#define PRIME64_1 0x9E3779B185EBCA87ULL >>> +#define PRIME64_2 0xC2B2AE3D27D4EB4FULL >>> +#define PRIME64_3 0x165667B19E3779F9ULL >>> +#define PRIME64_4 0x85EBCA77C2B2AE63ULL >>> +#define PRIME64_5 0x27D4EB2F165667C5ULL >>> + >>> +static inline uint64_t XXH64_round(uint64_t acc, uint64_t input) >>> +{ >>> + return rol64(acc + input * PRIME64_2, 31) * PRIME64_1; >>> +} >>> + >>> +static inline uint64_t XXH64_mergeround(uint64_t acc, uint64_t val) >>> +{ >>> + return (acc ^ XXH64_round(0, val)) * PRIME64_1 + PRIME64_4; >>> +} >>> + >>> +static inline uint64_t XXH64_avalanche(uint64_t h64) >>> +{ >>> + h64 ^= h64 >> 33; >>> + h64 *= PRIME64_2; >>> + h64 ^= h64 >> 29; >>> + h64 *= PRIME64_3; >>> + /* h64 ^= h64 >> 32; -- does not affect high 64 for pauth */ >>> + return h64; >>> +} >>> + >>> +static uint64_t __attribute__((noinline)) >>> +pauth_computepac_impdef(uint64_t data, uint64_t modifier, ARMPACKey key) >>> +{ >>> + uint64_t v1 = 1 + PRIME64_1 + PRIME64_2; >>> + uint64_t v2 = 1 + PRIME64_2; >>> + uint64_t v3 = 1 + 0; >>> + uint64_t v4 = 1 - PRIME64_1; >>> + uint64_t h64; >>> + >>> + v1 = XXH64_round(v1, data); >>> + v2 = XXH64_round(v2, modifier); >>> + v3 = XXH64_round(v3, key.lo); >>> + v4 = XXH64_round(v4, key.hi); >>> + >>> + h64 = rol64(v1, 1) + rol64(v2, 7) + rol64(v3, 12) + rol64(v4, 18); >>> + h64 = XXH64_mergeround(h64, v1); >>> + h64 = XXH64_mergeround(h64, v2); >>> + h64 = XXH64_mergeround(h64, v3); >>> + h64 = XXH64_mergeround(h64, v4); >>> + >>> + return XXH64_avalanche(h64); >>> +} >> >> You might find it easier to #include "qemu/xxhash.h" which we use for tb >> hashing amongst other things. > > First, that's the 32-bit version, XXH32. Ahh I missed that detail. > Second, we define xxhash7 there; we would need xxhash8 here. We could at least put the code in the header with the others. > > > r~ -- Alex Bennée
diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c index 6dbab03768..f1a4389465 100644 --- a/target/arm/pauth_helper.c +++ b/target/arm/pauth_helper.c @@ -207,8 +207,8 @@ static uint64_t tweak_inv_shuffle(uint64_t i) return o; } -static uint64_t pauth_computepac(uint64_t data, uint64_t modifier, - ARMPACKey key) +static uint64_t __attribute__((noinline)) +pauth_computepac_architected(uint64_t data, uint64_t modifier, ARMPACKey key) { static const uint64_t RC[5] = { 0x0000000000000000ull, @@ -272,6 +272,71 @@ static uint64_t pauth_computepac(uint64_t data, uint64_t modifier, return workingval; } +/* + * The XXH64 algorithm from + * https://github.com/Cyan4973/xxHash/blob/v0.8.0/xxhash.h + */ +#define PRIME64_1 0x9E3779B185EBCA87ULL +#define PRIME64_2 0xC2B2AE3D27D4EB4FULL +#define PRIME64_3 0x165667B19E3779F9ULL +#define PRIME64_4 0x85EBCA77C2B2AE63ULL +#define PRIME64_5 0x27D4EB2F165667C5ULL + +static inline uint64_t XXH64_round(uint64_t acc, uint64_t input) +{ + return rol64(acc + input * PRIME64_2, 31) * PRIME64_1; +} + +static inline uint64_t XXH64_mergeround(uint64_t acc, uint64_t val) +{ + return (acc ^ XXH64_round(0, val)) * PRIME64_1 + PRIME64_4; +} + +static inline uint64_t XXH64_avalanche(uint64_t h64) +{ + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + /* h64 ^= h64 >> 32; -- does not affect high 64 for pauth */ + return h64; +} + +static uint64_t __attribute__((noinline)) +pauth_computepac_impdef(uint64_t data, uint64_t modifier, ARMPACKey key) +{ + uint64_t v1 = 1 + PRIME64_1 + PRIME64_2; + uint64_t v2 = 1 + PRIME64_2; + uint64_t v3 = 1 + 0; + uint64_t v4 = 1 - PRIME64_1; + uint64_t h64; + + v1 = XXH64_round(v1, data); + v2 = XXH64_round(v2, modifier); + v3 = XXH64_round(v3, key.lo); + v4 = XXH64_round(v4, key.hi); + + h64 = rol64(v1, 1) + rol64(v2, 7) + rol64(v3, 12) + rol64(v4, 18); + h64 = XXH64_mergeround(h64, v1); + h64 = XXH64_mergeround(h64, v2); + h64 = XXH64_mergeround(h64, v3); + h64 = XXH64_mergeround(h64, v4); + + return XXH64_avalanche(h64); +} + +static uint64_t pauth_computepac(CPUARMState *env, uint64_t data, + uint64_t modifier, ARMPACKey key) +{ + ARMCPU *cpu = env_archcpu(env); + + if (FIELD_EX64(cpu->isar.id_aa64isar1, ID_AA64ISAR1, APA)) { + return pauth_computepac_architected(data, modifier, key); + } else { + return pauth_computepac_impdef(data, modifier, key); + } +} + static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, ARMPACKey *key, bool data) { @@ -292,7 +357,7 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, bot_bit = 64 - param.tsz; ext_ptr = deposit64(ptr, bot_bit, top_bit - bot_bit, ext); - pac = pauth_computepac(ext_ptr, modifier, *key); + pac = pauth_computepac(env, ext_ptr, modifier, *key); /* * Check if the ptr has good extension bits and corrupt the @@ -341,7 +406,7 @@ static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier, uint64_t pac, orig_ptr, test; orig_ptr = pauth_original_ptr(ptr, param); - pac = pauth_computepac(orig_ptr, modifier, *key); + pac = pauth_computepac(env, orig_ptr, modifier, *key); bot_bit = 64 - param.tsz; top_bit = 64 - 8 * param.tbi; @@ -442,7 +507,7 @@ uint64_t HELPER(pacga)(CPUARMState *env, uint64_t x, uint64_t y) uint64_t pac; pauth_check_trap(env, arm_current_el(env), GETPC()); - pac = pauth_computepac(x, y, env->keys.apga); + pac = pauth_computepac(env, x, y, env->keys.apga); return pac & 0xffffffff00000000ull; }
Without hardware acceleration, a cryptographically strong algorithm is too expensive for pauth_computepac. Even with hardware accel, we are not currently expecting to link the linux-user binaries to any crypto libraries, and doing so would generally make the --static build fail. So choose XXH64 as a reasonably quick and decent hash. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/pauth_helper.c | 75 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 5 deletions(-) -- 2.25.1