Message ID | 20230609022401.684157-39-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | crypto: Provide aes-round.h and host accel | expand |
On 6/8/23 23:24, Richard Henderson wrote: > Detect CRYPTO in cpuinfo; implement the accel hooks. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> > host/include/ppc/host/aes-round.h | 181 ++++++++++++++++++++++++++++ > host/include/ppc/host/cpuinfo.h | 1 + > host/include/ppc64/host/aes-round.h | 1 + > util/cpuinfo-ppc.c | 8 ++ > 4 files changed, 191 insertions(+) > create mode 100644 host/include/ppc/host/aes-round.h > create mode 100644 host/include/ppc64/host/aes-round.h > > diff --git a/host/include/ppc/host/aes-round.h b/host/include/ppc/host/aes-round.h > new file mode 100644 > index 0000000000..9b5a15d1e5 > --- /dev/null > +++ b/host/include/ppc/host/aes-round.h > @@ -0,0 +1,181 @@ > +/* > + * Power v2.07 specific aes acceleration. > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#ifndef PPC_HOST_AES_ROUND_H > +#define PPC_HOST_AES_ROUND_H > + > +#ifndef __ALTIVEC__ > +/* Without ALTIVEC, we can't even write inline assembly. */ > +#include "host/include/generic/host/aes-round.h" > +#else > +#include "host/cpuinfo.h" > + > +#ifdef __CRYPTO__ > +# define HAVE_AES_ACCEL true > +#else > +# define HAVE_AES_ACCEL likely(cpuinfo & CPUINFO_CRYPTO) > +#endif > +#define ATTR_AES_ACCEL > + > +/* > + * While there is <altivec.h>, both gcc and clang "aid" with the > + * endianness issues in different ways. Just use inline asm instead. > + */ > + > +/* Bytes in memory are host-endian; bytes in register are @be. */ > +static inline AESStateVec aes_accel_ld(const AESState *p, bool be) > +{ > + AESStateVec r; > + > + if (be) { > + asm("lvx %0, 0, %1" : "=v"(r) : "r"(p), "m"(*p)); > + } else if (HOST_BIG_ENDIAN) { > + AESStateVec rev = { > + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, > + }; > + asm("lvx %0, 0, %1\n\t" > + "vperm %0, %0, %0, %2" > + : "=v"(r) : "r"(p), "v"(rev), "m"(*p)); > + } else { > +#ifdef __POWER9_VECTOR__ > + asm("lxvb16x %x0, 0, %1" : "=v"(r) : "r"(p), "m"(*p)); > +#else > + asm("lxvd2x %x0, 0, %1\n\t" > + "xxpermdi %x0, %x0, %x0, 2" > + : "=v"(r) : "r"(p), "m"(*p)); > +#endif > + } > + return r; > +} > + > +static void aes_accel_st(AESState *p, AESStateVec r, bool be) > +{ > + if (be) { > + asm("stvx %1, 0, %2" : "=m"(*p) : "v"(r), "r"(p)); > + } else if (HOST_BIG_ENDIAN) { > + AESStateVec rev = { > + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, > + }; > + asm("vperm %1, %1, %1, %2\n\t" > + "stvx %1, 0, %3" > + : "=m"(*p), "+v"(r) : "v"(rev), "r"(p)); > + } else { > +#ifdef __POWER9_VECTOR__ > + asm("stxvb16x %x1, 0, %2" : "=m"(*p) : "v"(r), "r"(p)); > +#else > + asm("xxpermdi %x1, %x1, %x1, 2\n\t" > + "stxvd2x %x1, 0, %2" > + : "=m"(*p), "+v"(r) : "r"(p)); > +#endif > + } > +} > + > +static inline AESStateVec aes_accel_vcipher(AESStateVec d, AESStateVec k) > +{ > + asm("vcipher %0, %0, %1" : "+v"(d) : "v"(k)); > + return d; > +} > + > +static inline AESStateVec aes_accel_vncipher(AESStateVec d, AESStateVec k) > +{ > + asm("vncipher %0, %0, %1" : "+v"(d) : "v"(k)); > + return d; > +} > + > +static inline AESStateVec aes_accel_vcipherlast(AESStateVec d, AESStateVec k) > +{ > + asm("vcipherlast %0, %0, %1" : "+v"(d) : "v"(k)); > + return d; > +} > + > +static inline AESStateVec aes_accel_vncipherlast(AESStateVec d, AESStateVec k) > +{ > + asm("vncipherlast %0, %0, %1" : "+v"(d) : "v"(k)); > + return d; > +} > + > +static inline void > +aesenc_MC_accel(AESState *ret, const AESState *st, bool be) > +{ > + AESStateVec t, z = { }; > + > + t = aes_accel_ld(st, be); > + t = aes_accel_vncipherlast(t, z); > + t = aes_accel_vcipher(t, z); > + aes_accel_st(ret, t, be); > +} > + > +static inline void > +aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st, > + const AESState *rk, bool be) > +{ > + AESStateVec t, k; > + > + t = aes_accel_ld(st, be); > + k = aes_accel_ld(rk, be); > + t = aes_accel_vcipherlast(t, k); > + aes_accel_st(ret, t, be); > +} > + > +static inline void > +aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st, > + const AESState *rk, bool be) > +{ > + AESStateVec t, k; > + > + t = aes_accel_ld(st, be); > + k = aes_accel_ld(rk, be); > + t = aes_accel_vcipher(t, k); > + aes_accel_st(ret, t, be); > +} > + > +static inline void > +aesdec_IMC_accel(AESState *ret, const AESState *st, bool be) > +{ > + AESStateVec t, z = { }; > + > + t = aes_accel_ld(st, be); > + t = aes_accel_vcipherlast(t, z); > + t = aes_accel_vncipher(t, z); > + aes_accel_st(ret, t, be); > +} > + > +static inline void > +aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st, > + const AESState *rk, bool be) > +{ > + AESStateVec t, k; > + > + t = aes_accel_ld(st, be); > + k = aes_accel_ld(rk, be); > + t = aes_accel_vncipherlast(t, k); > + aes_accel_st(ret, t, be); > +} > + > +static inline void > +aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st, > + const AESState *rk, bool be) > +{ > + AESStateVec t, k; > + > + t = aes_accel_ld(st, be); > + k = aes_accel_ld(rk, be); > + t = aes_accel_vncipher(t, k); > + aes_accel_st(ret, t, be); > +} > + > +static inline void > +aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st, > + const AESState *rk, bool be) > +{ > + AESStateVec t, k, z = { }; > + > + t = aes_accel_ld(st, be); > + k = aes_accel_ld(rk, be); > + t = aes_accel_vncipher(t, z); > + aes_accel_st(ret, t ^ k, be); > +} > +#endif /* __ALTIVEC__ */ > +#endif /* PPC_HOST_AES_ROUND_H */ > diff --git a/host/include/ppc/host/cpuinfo.h b/host/include/ppc/host/cpuinfo.h > index 7ec252ef52..6cc727dba7 100644 > --- a/host/include/ppc/host/cpuinfo.h > +++ b/host/include/ppc/host/cpuinfo.h > @@ -16,6 +16,7 @@ > #define CPUINFO_ISEL (1u << 5) > #define CPUINFO_ALTIVEC (1u << 6) > #define CPUINFO_VSX (1u << 7) > +#define CPUINFO_CRYPTO (1u << 8) > > /* Initialized with a constructor. */ > extern unsigned cpuinfo; > diff --git a/host/include/ppc64/host/aes-round.h b/host/include/ppc64/host/aes-round.h > new file mode 100644 > index 0000000000..4a78d94de8 > --- /dev/null > +++ b/host/include/ppc64/host/aes-round.h > @@ -0,0 +1 @@ > +#include "host/include/ppc/host/aes-round.h" > diff --git a/util/cpuinfo-ppc.c b/util/cpuinfo-ppc.c > index ee761de33a..053b383720 100644 > --- a/util/cpuinfo-ppc.c > +++ b/util/cpuinfo-ppc.c > @@ -49,6 +49,14 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) > /* We only care about the portion of VSX that overlaps Altivec. */ > if (hwcap & PPC_FEATURE_HAS_VSX) { > info |= CPUINFO_VSX; > + /* > + * We use VSX especially for little-endian, but we should > + * always have both anyway, since VSX came with Power7 > + * and crypto came with Power8. > + */ > + if (hwcap2 & PPC_FEATURE2_HAS_VEC_CRYPTO) { > + info |= CPUINFO_CRYPTO; > + } > } > } >
diff --git a/host/include/ppc/host/aes-round.h b/host/include/ppc/host/aes-round.h new file mode 100644 index 0000000000..9b5a15d1e5 --- /dev/null +++ b/host/include/ppc/host/aes-round.h @@ -0,0 +1,181 @@ +/* + * Power v2.07 specific aes acceleration. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef PPC_HOST_AES_ROUND_H +#define PPC_HOST_AES_ROUND_H + +#ifndef __ALTIVEC__ +/* Without ALTIVEC, we can't even write inline assembly. */ +#include "host/include/generic/host/aes-round.h" +#else +#include "host/cpuinfo.h" + +#ifdef __CRYPTO__ +# define HAVE_AES_ACCEL true +#else +# define HAVE_AES_ACCEL likely(cpuinfo & CPUINFO_CRYPTO) +#endif +#define ATTR_AES_ACCEL + +/* + * While there is <altivec.h>, both gcc and clang "aid" with the + * endianness issues in different ways. Just use inline asm instead. + */ + +/* Bytes in memory are host-endian; bytes in register are @be. */ +static inline AESStateVec aes_accel_ld(const AESState *p, bool be) +{ + AESStateVec r; + + if (be) { + asm("lvx %0, 0, %1" : "=v"(r) : "r"(p), "m"(*p)); + } else if (HOST_BIG_ENDIAN) { + AESStateVec rev = { + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + }; + asm("lvx %0, 0, %1\n\t" + "vperm %0, %0, %0, %2" + : "=v"(r) : "r"(p), "v"(rev), "m"(*p)); + } else { +#ifdef __POWER9_VECTOR__ + asm("lxvb16x %x0, 0, %1" : "=v"(r) : "r"(p), "m"(*p)); +#else + asm("lxvd2x %x0, 0, %1\n\t" + "xxpermdi %x0, %x0, %x0, 2" + : "=v"(r) : "r"(p), "m"(*p)); +#endif + } + return r; +} + +static void aes_accel_st(AESState *p, AESStateVec r, bool be) +{ + if (be) { + asm("stvx %1, 0, %2" : "=m"(*p) : "v"(r), "r"(p)); + } else if (HOST_BIG_ENDIAN) { + AESStateVec rev = { + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + }; + asm("vperm %1, %1, %1, %2\n\t" + "stvx %1, 0, %3" + : "=m"(*p), "+v"(r) : "v"(rev), "r"(p)); + } else { +#ifdef __POWER9_VECTOR__ + asm("stxvb16x %x1, 0, %2" : "=m"(*p) : "v"(r), "r"(p)); +#else + asm("xxpermdi %x1, %x1, %x1, 2\n\t" + "stxvd2x %x1, 0, %2" + : "=m"(*p), "+v"(r) : "r"(p)); +#endif + } +} + +static inline AESStateVec aes_accel_vcipher(AESStateVec d, AESStateVec k) +{ + asm("vcipher %0, %0, %1" : "+v"(d) : "v"(k)); + return d; +} + +static inline AESStateVec aes_accel_vncipher(AESStateVec d, AESStateVec k) +{ + asm("vncipher %0, %0, %1" : "+v"(d) : "v"(k)); + return d; +} + +static inline AESStateVec aes_accel_vcipherlast(AESStateVec d, AESStateVec k) +{ + asm("vcipherlast %0, %0, %1" : "+v"(d) : "v"(k)); + return d; +} + +static inline AESStateVec aes_accel_vncipherlast(AESStateVec d, AESStateVec k) +{ + asm("vncipherlast %0, %0, %1" : "+v"(d) : "v"(k)); + return d; +} + +static inline void +aesenc_MC_accel(AESState *ret, const AESState *st, bool be) +{ + AESStateVec t, z = { }; + + t = aes_accel_ld(st, be); + t = aes_accel_vncipherlast(t, z); + t = aes_accel_vcipher(t, z); + aes_accel_st(ret, t, be); +} + +static inline void +aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st, + const AESState *rk, bool be) +{ + AESStateVec t, k; + + t = aes_accel_ld(st, be); + k = aes_accel_ld(rk, be); + t = aes_accel_vcipherlast(t, k); + aes_accel_st(ret, t, be); +} + +static inline void +aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st, + const AESState *rk, bool be) +{ + AESStateVec t, k; + + t = aes_accel_ld(st, be); + k = aes_accel_ld(rk, be); + t = aes_accel_vcipher(t, k); + aes_accel_st(ret, t, be); +} + +static inline void +aesdec_IMC_accel(AESState *ret, const AESState *st, bool be) +{ + AESStateVec t, z = { }; + + t = aes_accel_ld(st, be); + t = aes_accel_vcipherlast(t, z); + t = aes_accel_vncipher(t, z); + aes_accel_st(ret, t, be); +} + +static inline void +aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st, + const AESState *rk, bool be) +{ + AESStateVec t, k; + + t = aes_accel_ld(st, be); + k = aes_accel_ld(rk, be); + t = aes_accel_vncipherlast(t, k); + aes_accel_st(ret, t, be); +} + +static inline void +aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st, + const AESState *rk, bool be) +{ + AESStateVec t, k; + + t = aes_accel_ld(st, be); + k = aes_accel_ld(rk, be); + t = aes_accel_vncipher(t, k); + aes_accel_st(ret, t, be); +} + +static inline void +aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st, + const AESState *rk, bool be) +{ + AESStateVec t, k, z = { }; + + t = aes_accel_ld(st, be); + k = aes_accel_ld(rk, be); + t = aes_accel_vncipher(t, z); + aes_accel_st(ret, t ^ k, be); +} +#endif /* __ALTIVEC__ */ +#endif /* PPC_HOST_AES_ROUND_H */ diff --git a/host/include/ppc/host/cpuinfo.h b/host/include/ppc/host/cpuinfo.h index 7ec252ef52..6cc727dba7 100644 --- a/host/include/ppc/host/cpuinfo.h +++ b/host/include/ppc/host/cpuinfo.h @@ -16,6 +16,7 @@ #define CPUINFO_ISEL (1u << 5) #define CPUINFO_ALTIVEC (1u << 6) #define CPUINFO_VSX (1u << 7) +#define CPUINFO_CRYPTO (1u << 8) /* Initialized with a constructor. */ extern unsigned cpuinfo; diff --git a/host/include/ppc64/host/aes-round.h b/host/include/ppc64/host/aes-round.h new file mode 100644 index 0000000000..4a78d94de8 --- /dev/null +++ b/host/include/ppc64/host/aes-round.h @@ -0,0 +1 @@ +#include "host/include/ppc/host/aes-round.h" diff --git a/util/cpuinfo-ppc.c b/util/cpuinfo-ppc.c index ee761de33a..053b383720 100644 --- a/util/cpuinfo-ppc.c +++ b/util/cpuinfo-ppc.c @@ -49,6 +49,14 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) /* We only care about the portion of VSX that overlaps Altivec. */ if (hwcap & PPC_FEATURE_HAS_VSX) { info |= CPUINFO_VSX; + /* + * We use VSX especially for little-endian, but we should + * always have both anyway, since VSX came with Power7 + * and crypto came with Power8. + */ + if (hwcap2 & PPC_FEATURE2_HAS_VEC_CRYPTO) { + info |= CPUINFO_CRYPTO; + } } }
Detect CRYPTO in cpuinfo; implement the accel hooks. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- host/include/ppc/host/aes-round.h | 181 ++++++++++++++++++++++++++++ host/include/ppc/host/cpuinfo.h | 1 + host/include/ppc64/host/aes-round.h | 1 + util/cpuinfo-ppc.c | 8 ++ 4 files changed, 191 insertions(+) create mode 100644 host/include/ppc/host/aes-round.h create mode 100644 host/include/ppc64/host/aes-round.h