Message ID | 20190719210326.15466-3-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/arm: Implement ARMv8.1-VHE | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > Since we have remembered ASIDs, we can further minimize flushing > by comparing against the one we want to flush. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > include/exec/exec-all.h | 16 +++++++++++++ > include/qom/cpu.h | 1 + > accel/tcg/cputlb.c | 51 +++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 68 insertions(+) > > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 9c77aa5bf9..0d890e1e60 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -240,6 +240,22 @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap); > */ > void tlb_set_asid_for_mmuidx(CPUState *cpu, uint32_t asid, > uint16_t idxmap, uint16_t dep_idxmap); > +/** > + * tlb_flush_asid_by_mmuidx: > + * @cpu: Originating CPU of the flush > + * @asid: Address Space Identifier > + * @idxmap: bitmap of MMU indexes to flush if asid matches > + * > + * For each mmu index, if @asid matches the value previously saved via > + * tlb_set_asid_for_mmuidx, flush the index. > + */ > +void tlb_flush_asid_by_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap); > +/* Similarly, broadcasting to all cpus. */ > +void tlb_flush_asid_by_mmuidx_all_cpus(CPUState *cpu, uint32_t asid, > + uint16_t idxmap); > +/* Similarly, waiting for the broadcast to complete. */ > +void tlb_flush_asid_by_mmuidx_all_cpus_synced(CPUState *cpu, uint32_t asid, > + uint16_t idxmap); > /** > * tlb_set_page_with_attrs: > * @cpu: CPU to add this TLB entry for > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index 5ee0046b62..4ae6ea3e1d 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -283,6 +283,7 @@ struct hax_vcpu_state; > typedef union { > int host_int; > unsigned long host_ulong; > + uint64_t host_uint64; Missing an access helper, although host_uint64 doesn't make sense, 64 is 64 always: #define RUN_ON_CPU_UINT64(i) ((run_on_cpu_data){.uint64 = (i)}) > void *host_ptr; > vaddr target_ptr; > } run_on_cpu_data; > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index c68f57755b..3ef68a11bf 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -540,6 +540,57 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) > tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); > } > > +static void tlb_flush_asid_by_mmuidx_async_work(CPUState *cpu, > + run_on_cpu_data data) > +{ > + CPUTLB *tlb = cpu_tlb(cpu); > + uint32_t asid = data.host_uint64; > + uint16_t idxmap = data.host_uint64 >> 32; > + uint16_t to_flush = 0, work; > + > + assert_cpu_is_self(cpu); > + > + for (work = idxmap; work != 0; work &= work - 1) { > + int mmu_idx = ctz32(work); > + if (tlb->d[mmu_idx].asid == asid) { > + to_flush |= 1 << mmu_idx; > + } > + } > + > + if (to_flush) { > + tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(to_flush)); > + } > +} > + > +void tlb_flush_asid_by_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap) > +{ > + run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, > idxmap) }; Then this would be: uint64_t asid_idx_map = deposit64(asid, 32, 32, idxmap); ... async_run_on_cpu(cpu, tlb_flush_asid_by_mmuidx_async_work, RUN_ON_CPU_UINT64(data)) Not a massive win but consistent with the other *_run_on calls and easier to grep. > + > + if (cpu->created && !qemu_cpu_is_self(cpu)) { > + async_run_on_cpu(cpu, tlb_flush_asid_by_mmuidx_async_work, data); > + } else { > + tlb_flush_asid_by_mmuidx_async_work(cpu, data); > + } > +} > + > +void tlb_flush_asid_by_mmuidx_all_cpus(CPUState *src_cpu, > + uint32_t asid, uint16_t idxmap) > +{ > + run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) }; > + > + flush_all_helper(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data); > + tlb_flush_asid_by_mmuidx_async_work(src_cpu, data); > +} > + > +void tlb_flush_asid_by_mmuidx_all_cpus_synced(CPUState *src_cpu, > + uint32_t asid, uint16_t idxmap) > +{ > + run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) }; > + > + flush_all_helper(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data); > + async_safe_run_on_cpu(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data); > +} > + > void tlb_set_asid_for_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap, > uint16_t depmap) > { Otherwise: Reviewed-by: Alex Bennée <alex.bennee@linaro.org> -- Alex Bennée
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 9c77aa5bf9..0d890e1e60 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -240,6 +240,22 @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap); */ void tlb_set_asid_for_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap, uint16_t dep_idxmap); +/** + * tlb_flush_asid_by_mmuidx: + * @cpu: Originating CPU of the flush + * @asid: Address Space Identifier + * @idxmap: bitmap of MMU indexes to flush if asid matches + * + * For each mmu index, if @asid matches the value previously saved via + * tlb_set_asid_for_mmuidx, flush the index. + */ +void tlb_flush_asid_by_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap); +/* Similarly, broadcasting to all cpus. */ +void tlb_flush_asid_by_mmuidx_all_cpus(CPUState *cpu, uint32_t asid, + uint16_t idxmap); +/* Similarly, waiting for the broadcast to complete. */ +void tlb_flush_asid_by_mmuidx_all_cpus_synced(CPUState *cpu, uint32_t asid, + uint16_t idxmap); /** * tlb_set_page_with_attrs: * @cpu: CPU to add this TLB entry for diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 5ee0046b62..4ae6ea3e1d 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -283,6 +283,7 @@ struct hax_vcpu_state; typedef union { int host_int; unsigned long host_ulong; + uint64_t host_uint64; void *host_ptr; vaddr target_ptr; } run_on_cpu_data; diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index c68f57755b..3ef68a11bf 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -540,6 +540,57 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); } +static void tlb_flush_asid_by_mmuidx_async_work(CPUState *cpu, + run_on_cpu_data data) +{ + CPUTLB *tlb = cpu_tlb(cpu); + uint32_t asid = data.host_uint64; + uint16_t idxmap = data.host_uint64 >> 32; + uint16_t to_flush = 0, work; + + assert_cpu_is_self(cpu); + + for (work = idxmap; work != 0; work &= work - 1) { + int mmu_idx = ctz32(work); + if (tlb->d[mmu_idx].asid == asid) { + to_flush |= 1 << mmu_idx; + } + } + + if (to_flush) { + tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(to_flush)); + } +} + +void tlb_flush_asid_by_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap) +{ + run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) }; + + if (cpu->created && !qemu_cpu_is_self(cpu)) { + async_run_on_cpu(cpu, tlb_flush_asid_by_mmuidx_async_work, data); + } else { + tlb_flush_asid_by_mmuidx_async_work(cpu, data); + } +} + +void tlb_flush_asid_by_mmuidx_all_cpus(CPUState *src_cpu, + uint32_t asid, uint16_t idxmap) +{ + run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) }; + + flush_all_helper(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data); + tlb_flush_asid_by_mmuidx_async_work(src_cpu, data); +} + +void tlb_flush_asid_by_mmuidx_all_cpus_synced(CPUState *src_cpu, + uint32_t asid, uint16_t idxmap) +{ + run_on_cpu_data data = { .host_uint64 = deposit64(asid, 32, 32, idxmap) }; + + flush_all_helper(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data); + async_safe_run_on_cpu(src_cpu, tlb_flush_asid_by_mmuidx_async_work, data); +} + void tlb_set_asid_for_mmuidx(CPUState *cpu, uint32_t asid, uint16_t idxmap, uint16_t depmap) {
Since we have remembered ASIDs, we can further minimize flushing by comparing against the one we want to flush. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- include/exec/exec-all.h | 16 +++++++++++++ include/qom/cpu.h | 1 + accel/tcg/cputlb.c | 51 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) -- 2.17.1