Message ID | 20200302175829.2183-6-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/arm: Misc cleanups surrounding TBI | expand |
On 3/2/20 6:58 PM, Richard Henderson wrote: > This is an aarch64-only function. Move it out of the shared file. > This patch is code movement only. > > Reviewed-by: Peter Maydell <peter.maydell@linaro.org> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/helper-a64.h | 1 + > target/arm/helper.h | 1 - > target/arm/helper-a64.c | 91 ++++++++++++++++++++++++++++++++++++++++ > target/arm/op_helper.c | 93 ----------------------------------------- > 4 files changed, 92 insertions(+), 94 deletions(-) > > diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h > index a915c1247f..b1a5935f61 100644 > --- a/target/arm/helper-a64.h > +++ b/target/arm/helper-a64.h > @@ -90,6 +90,7 @@ DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr) > DEF_HELPER_2(sqrt_f16, f16, f16, ptr) > > DEF_HELPER_2(exception_return, void, env, i64) > +DEF_HELPER_2(dc_zva, void, env, i64) > > DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64) > DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64) > diff --git a/target/arm/helper.h b/target/arm/helper.h > index fcbf504121..72eb9e6a1a 100644 > --- a/target/arm/helper.h > +++ b/target/arm/helper.h > @@ -559,7 +559,6 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) > > DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) > DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) > -DEF_HELPER_2(dc_zva, void, env, i64) > > DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG, > void, ptr, ptr, ptr, ptr, i32) > diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c > index 123ce50e7a..bc0649a44a 100644 > --- a/target/arm/helper-a64.c > +++ b/target/arm/helper-a64.c > @@ -18,6 +18,7 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/units.h" > #include "cpu.h" > #include "exec/gdbstub.h" > #include "exec/helper-proto.h" > @@ -1109,4 +1110,94 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) > return float16_sqrt(a, s); > } > > +void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) > +{ > + /* > + * Implement DC ZVA, which zeroes a fixed-length block of memory. > + * Note that we do not implement the (architecturally mandated) > + * alignment fault for attempts to use this on Device memory > + * (which matches the usual QEMU behaviour of not implementing either > + * alignment faults or any memory attribute handling). > + */ > > + ARMCPU *cpu = env_archcpu(env); > + uint64_t blocklen = 4 << cpu->dcz_blocksize; > + uint64_t vaddr = vaddr_in & ~(blocklen - 1); > + > +#ifndef CONFIG_USER_ONLY > + { > + /* > + * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than > + * the block size so we might have to do more than one TLB lookup. > + * We know that in fact for any v8 CPU the page size is at least 4K > + * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only > + * 1K as an artefact of legacy v5 subpage support being present in the > + * same QEMU executable. So in practice the hostaddr[] array has > + * two entries, given the current setting of TARGET_PAGE_BITS_MIN. > + */ > + int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); > + void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; > + int try, i; > + unsigned mmu_idx = cpu_mmu_index(env, false); > + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); > + > + assert(maxidx <= ARRAY_SIZE(hostaddr)); > + > + for (try = 0; try < 2; try++) { > + > + for (i = 0; i < maxidx; i++) { > + hostaddr[i] = tlb_vaddr_to_host(env, > + vaddr + TARGET_PAGE_SIZE * i, > + 1, mmu_idx); > + if (!hostaddr[i]) { > + break; > + } > + } > + if (i == maxidx) { > + /* > + * If it's all in the TLB it's fair game for just writing to; > + * we know we don't need to update dirty status, etc. > + */ > + for (i = 0; i < maxidx - 1; i++) { > + memset(hostaddr[i], 0, TARGET_PAGE_SIZE); > + } > + memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); > + return; > + } > + /* > + * OK, try a store and see if we can populate the tlb. This > + * might cause an exception if the memory isn't writable, > + * in which case we will longjmp out of here. We must for > + * this purpose use the actual register value passed to us > + * so that we get the fault address right. > + */ > + helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); > + /* Now we can populate the other TLB entries, if any */ > + for (i = 0; i < maxidx; i++) { > + uint64_t va = vaddr + TARGET_PAGE_SIZE * i; > + if (va != (vaddr_in & TARGET_PAGE_MASK)) { > + helper_ret_stb_mmu(env, va, 0, oi, GETPC()); > + } > + } > + } > + > + /* > + * Slow path (probably attempt to do this to an I/O device or > + * similar, or clearing of a block of code we have translations > + * cached for). Just do a series of byte writes as the architecture > + * demands. It's not worth trying to use a cpu_physical_memory_map(), > + * memset(), unmap() sequence here because: > + * + we'd need to account for the blocksize being larger than a page > + * + the direct-RAM access case is almost always going to be dealt > + * with in the fastpath code above, so there's no speed benefit > + * + we would have to deal with the map returning NULL because the > + * bounce buffer was in use > + */ > + for (i = 0; i < blocklen; i++) { > + helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); > + } > + } > +#else > + memset(g2h(vaddr), 0, blocklen); > +#endif > +} > diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c > index af3020b78f..eb0de080f1 100644 > --- a/target/arm/op_helper.c > +++ b/target/arm/op_helper.c > @@ -17,7 +17,6 @@ > * License along with this library; if not, see <http://www.gnu.org/licenses/>. > */ > #include "qemu/osdep.h" > -#include "qemu/units.h" > #include "qemu/log.h" > #include "qemu/main-loop.h" > #include "cpu.h" > @@ -936,95 +935,3 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) > return ((uint32_t)x >> shift) | (x << (32 - shift)); > } > } > - > -void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) > -{ > - /* > - * Implement DC ZVA, which zeroes a fixed-length block of memory. > - * Note that we do not implement the (architecturally mandated) > - * alignment fault for attempts to use this on Device memory > - * (which matches the usual QEMU behaviour of not implementing either > - * alignment faults or any memory attribute handling). > - */ > - > - ARMCPU *cpu = env_archcpu(env); > - uint64_t blocklen = 4 << cpu->dcz_blocksize; > - uint64_t vaddr = vaddr_in & ~(blocklen - 1); > - > -#ifndef CONFIG_USER_ONLY > - { > - /* > - * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than > - * the block size so we might have to do more than one TLB lookup. > - * We know that in fact for any v8 CPU the page size is at least 4K > - * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only > - * 1K as an artefact of legacy v5 subpage support being present in the > - * same QEMU executable. So in practice the hostaddr[] array has > - * two entries, given the current setting of TARGET_PAGE_BITS_MIN. > - */ > - int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); > - void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; > - int try, i; > - unsigned mmu_idx = cpu_mmu_index(env, false); > - TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); > - > - assert(maxidx <= ARRAY_SIZE(hostaddr)); > - > - for (try = 0; try < 2; try++) { > - > - for (i = 0; i < maxidx; i++) { > - hostaddr[i] = tlb_vaddr_to_host(env, > - vaddr + TARGET_PAGE_SIZE * i, > - 1, mmu_idx); > - if (!hostaddr[i]) { > - break; > - } > - } > - if (i == maxidx) { > - /* > - * If it's all in the TLB it's fair game for just writing to; > - * we know we don't need to update dirty status, etc. > - */ > - for (i = 0; i < maxidx - 1; i++) { > - memset(hostaddr[i], 0, TARGET_PAGE_SIZE); > - } > - memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); > - return; > - } > - /* > - * OK, try a store and see if we can populate the tlb. This > - * might cause an exception if the memory isn't writable, > - * in which case we will longjmp out of here. We must for > - * this purpose use the actual register value passed to us > - * so that we get the fault address right. > - */ > - helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); > - /* Now we can populate the other TLB entries, if any */ > - for (i = 0; i < maxidx; i++) { > - uint64_t va = vaddr + TARGET_PAGE_SIZE * i; > - if (va != (vaddr_in & TARGET_PAGE_MASK)) { > - helper_ret_stb_mmu(env, va, 0, oi, GETPC()); > - } > - } > - } > - > - /* > - * Slow path (probably attempt to do this to an I/O device or > - * similar, or clearing of a block of code we have translations > - * cached for). Just do a series of byte writes as the architecture > - * demands. It's not worth trying to use a cpu_physical_memory_map(), > - * memset(), unmap() sequence here because: > - * + we'd need to account for the blocksize being larger than a page > - * + the direct-RAM access case is almost always going to be dealt > - * with in the fastpath code above, so there's no speed benefit > - * + we would have to deal with the map returning NULL because the > - * bounce buffer was in use > - */ > - for (i = 0; i < blocklen; i++) { > - helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); > - } > - } > -#else > - memset(g2h(vaddr), 0, blocklen); > -#endif > -} > Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index a915c1247f..b1a5935f61 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -90,6 +90,7 @@ DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr) DEF_HELPER_2(sqrt_f16, f16, f16, ptr) DEF_HELPER_2(exception_return, void, env, i64) +DEF_HELPER_2(dc_zva, void, env, i64) DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/arm/helper.h b/target/arm/helper.h index fcbf504121..72eb9e6a1a 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -559,7 +559,6 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) -DEF_HELPER_2(dc_zva, void, env, i64) DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 123ce50e7a..bc0649a44a 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "cpu.h" #include "exec/gdbstub.h" #include "exec/helper-proto.h" @@ -1109,4 +1110,94 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) return float16_sqrt(a, s); } +void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) +{ + /* + * Implement DC ZVA, which zeroes a fixed-length block of memory. + * Note that we do not implement the (architecturally mandated) + * alignment fault for attempts to use this on Device memory + * (which matches the usual QEMU behaviour of not implementing either + * alignment faults or any memory attribute handling). + */ + ARMCPU *cpu = env_archcpu(env); + uint64_t blocklen = 4 << cpu->dcz_blocksize; + uint64_t vaddr = vaddr_in & ~(blocklen - 1); + +#ifndef CONFIG_USER_ONLY + { + /* + * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than + * the block size so we might have to do more than one TLB lookup. + * We know that in fact for any v8 CPU the page size is at least 4K + * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only + * 1K as an artefact of legacy v5 subpage support being present in the + * same QEMU executable. So in practice the hostaddr[] array has + * two entries, given the current setting of TARGET_PAGE_BITS_MIN. + */ + int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); + void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; + int try, i; + unsigned mmu_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + + assert(maxidx <= ARRAY_SIZE(hostaddr)); + + for (try = 0; try < 2; try++) { + + for (i = 0; i < maxidx; i++) { + hostaddr[i] = tlb_vaddr_to_host(env, + vaddr + TARGET_PAGE_SIZE * i, + 1, mmu_idx); + if (!hostaddr[i]) { + break; + } + } + if (i == maxidx) { + /* + * If it's all in the TLB it's fair game for just writing to; + * we know we don't need to update dirty status, etc. + */ + for (i = 0; i < maxidx - 1; i++) { + memset(hostaddr[i], 0, TARGET_PAGE_SIZE); + } + memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); + return; + } + /* + * OK, try a store and see if we can populate the tlb. This + * might cause an exception if the memory isn't writable, + * in which case we will longjmp out of here. We must for + * this purpose use the actual register value passed to us + * so that we get the fault address right. + */ + helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); + /* Now we can populate the other TLB entries, if any */ + for (i = 0; i < maxidx; i++) { + uint64_t va = vaddr + TARGET_PAGE_SIZE * i; + if (va != (vaddr_in & TARGET_PAGE_MASK)) { + helper_ret_stb_mmu(env, va, 0, oi, GETPC()); + } + } + } + + /* + * Slow path (probably attempt to do this to an I/O device or + * similar, or clearing of a block of code we have translations + * cached for). Just do a series of byte writes as the architecture + * demands. It's not worth trying to use a cpu_physical_memory_map(), + * memset(), unmap() sequence here because: + * + we'd need to account for the blocksize being larger than a page + * + the direct-RAM access case is almost always going to be dealt + * with in the fastpath code above, so there's no speed benefit + * + we would have to deal with the map returning NULL because the + * bounce buffer was in use + */ + for (i = 0; i < blocklen; i++) { + helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); + } + } +#else + memset(g2h(vaddr), 0, blocklen); +#endif +} diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index af3020b78f..eb0de080f1 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -17,7 +17,6 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" -#include "qemu/units.h" #include "qemu/log.h" #include "qemu/main-loop.h" #include "cpu.h" @@ -936,95 +935,3 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) return ((uint32_t)x >> shift) | (x << (32 - shift)); } } - -void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) -{ - /* - * Implement DC ZVA, which zeroes a fixed-length block of memory. - * Note that we do not implement the (architecturally mandated) - * alignment fault for attempts to use this on Device memory - * (which matches the usual QEMU behaviour of not implementing either - * alignment faults or any memory attribute handling). - */ - - ARMCPU *cpu = env_archcpu(env); - uint64_t blocklen = 4 << cpu->dcz_blocksize; - uint64_t vaddr = vaddr_in & ~(blocklen - 1); - -#ifndef CONFIG_USER_ONLY - { - /* - * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than - * the block size so we might have to do more than one TLB lookup. - * We know that in fact for any v8 CPU the page size is at least 4K - * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only - * 1K as an artefact of legacy v5 subpage support being present in the - * same QEMU executable. So in practice the hostaddr[] array has - * two entries, given the current setting of TARGET_PAGE_BITS_MIN. - */ - int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); - void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; - int try, i; - unsigned mmu_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); - - assert(maxidx <= ARRAY_SIZE(hostaddr)); - - for (try = 0; try < 2; try++) { - - for (i = 0; i < maxidx; i++) { - hostaddr[i] = tlb_vaddr_to_host(env, - vaddr + TARGET_PAGE_SIZE * i, - 1, mmu_idx); - if (!hostaddr[i]) { - break; - } - } - if (i == maxidx) { - /* - * If it's all in the TLB it's fair game for just writing to; - * we know we don't need to update dirty status, etc. - */ - for (i = 0; i < maxidx - 1; i++) { - memset(hostaddr[i], 0, TARGET_PAGE_SIZE); - } - memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); - return; - } - /* - * OK, try a store and see if we can populate the tlb. This - * might cause an exception if the memory isn't writable, - * in which case we will longjmp out of here. We must for - * this purpose use the actual register value passed to us - * so that we get the fault address right. - */ - helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); - /* Now we can populate the other TLB entries, if any */ - for (i = 0; i < maxidx; i++) { - uint64_t va = vaddr + TARGET_PAGE_SIZE * i; - if (va != (vaddr_in & TARGET_PAGE_MASK)) { - helper_ret_stb_mmu(env, va, 0, oi, GETPC()); - } - } - } - - /* - * Slow path (probably attempt to do this to an I/O device or - * similar, or clearing of a block of code we have translations - * cached for). Just do a series of byte writes as the architecture - * demands. It's not worth trying to use a cpu_physical_memory_map(), - * memset(), unmap() sequence here because: - * + we'd need to account for the blocksize being larger than a page - * + the direct-RAM access case is almost always going to be dealt - * with in the fastpath code above, so there's no speed benefit - * + we would have to deal with the map returning NULL because the - * bounce buffer was in use - */ - for (i = 0; i < blocklen; i++) { - helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); - } - } -#else - memset(g2h(vaddr), 0, blocklen); -#endif -}