Message ID | 20190923230004.9231-11-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | Move rom and notdirty handling to cputlb | expand |
On 24.09.19 00:59, Richard Henderson wrote: > There is only one caller, tlb_set_page_with_attrs. We cannot > inline the entire function because the AddressSpaceDispatch > structure is private to exec.c, and cannot easily be moved to > include/exec/memory-internal.h. > > Compute is_ram and is_romd once within tlb_set_page_with_attrs. > Fold the number of tests against these predicates. Compute > cpu_physical_memory_is_clean outside of the tlb lock region. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > include/exec/exec-all.h | 6 +--- > accel/tcg/cputlb.c | 68 ++++++++++++++++++++++++++--------------- > exec.c | 22 ++----------- > 3 files changed, 47 insertions(+), 49 deletions(-) > > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 81b02eb2fe..49db07ba0b 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, > hwaddr *xlat, hwaddr *plen, > MemTxAttrs attrs, int *prot); > hwaddr memory_region_section_get_iotlb(CPUState *cpu, > - MemoryRegionSection *section, > - target_ulong vaddr, > - hwaddr paddr, hwaddr xlat, > - int prot, > - target_ulong *address); > + MemoryRegionSection *section); > #endif > > /* vl.c */ > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index 05212ff244..05530a8b0c 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -704,13 +704,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, > MemoryRegionSection *section; > unsigned int index; > target_ulong address; > - target_ulong code_address; > + target_ulong write_address; > uintptr_t addend; > CPUTLBEntry *te, tn; > hwaddr iotlb, xlat, sz, paddr_page; > target_ulong vaddr_page; > int asidx = cpu_asidx_from_attrs(cpu, attrs); > int wp_flags; > + bool is_ram, is_romd; > > assert_cpu_is_self(cpu); > > @@ -739,18 +740,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, > if (attrs.byte_swap) { > address |= TLB_BSWAP; > } > - if (!memory_region_is_ram(section->mr) && > - !memory_region_is_romd(section->mr)) { > - /* IO memory case */ > - address |= TLB_MMIO; > - addend = 0; > - } else { > + > + is_ram = memory_region_is_ram(section->mr); > + is_romd = memory_region_is_romd(section->mr); > + > + if (is_ram || is_romd) { > + /* RAM and ROMD both have associated host memory. */ > addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; > + } else { > + /* I/O does not; force the host address to NULL. */ > + addend = 0; > + } > + > + write_address = address; I guess the only "suboptimal" change is that you now have two checks for "prot & PAGE_WRITE" twice in the case of ram instead of one. > + if (is_ram) { > + iotlb = memory_region_get_ram_addr(section->mr) + xlat; > + /* > + * Computing is_clean is expensive; avoid all that unless > + * the page is actually writable. > + */ > + if (prot & PAGE_WRITE) { > + if (section->readonly) { > + write_address |= TLB_ROM; > + } else if (cpu_physical_memory_is_clean(iotlb)) { > + write_address |= TLB_NOTDIRTY; > + } > + } > + } else { > + /* I/O or ROMD */ > + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; > + /* > + * Writes to romd devices must go through MMIO to enable write. > + * Reads to romd devices go through the ram_ptr found above, > + * but of course reads to I/O must go through MMIO. > + */ > + write_address |= TLB_MMIO; ... and here you calculate write_address even if probably unused. Can your move the calculation of the write_address completely into the "prot & PAGE_WRITE" case below? (I'm not looking at the full code, so could as well be that I am missing something :) ) -- Thanks, David / dhildenb
Richard Henderson <richard.henderson@linaro.org> writes: > There is only one caller, tlb_set_page_with_attrs. We cannot > inline the entire function because the AddressSpaceDispatch > structure is private to exec.c, and cannot easily be moved to > include/exec/memory-internal.h. > > Compute is_ram and is_romd once within tlb_set_page_with_attrs. > Fold the number of tests against these predicates. Compute > cpu_physical_memory_is_clean outside of the tlb lock region. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> > --- > include/exec/exec-all.h | 6 +--- > accel/tcg/cputlb.c | 68 ++++++++++++++++++++++++++--------------- > exec.c | 22 ++----------- > 3 files changed, 47 insertions(+), 49 deletions(-) > > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 81b02eb2fe..49db07ba0b 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, > hwaddr *xlat, hwaddr *plen, > MemTxAttrs attrs, int *prot); > hwaddr memory_region_section_get_iotlb(CPUState *cpu, > - MemoryRegionSection *section, > - target_ulong vaddr, > - hwaddr paddr, hwaddr xlat, > - int prot, > - target_ulong *address); > + MemoryRegionSection *section); > #endif > > /* vl.c */ > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index 05212ff244..05530a8b0c 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -704,13 +704,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, > MemoryRegionSection *section; > unsigned int index; > target_ulong address; > - target_ulong code_address; > + target_ulong write_address; > uintptr_t addend; > CPUTLBEntry *te, tn; > hwaddr iotlb, xlat, sz, paddr_page; > target_ulong vaddr_page; > int asidx = cpu_asidx_from_attrs(cpu, attrs); > int wp_flags; > + bool is_ram, is_romd; > > assert_cpu_is_self(cpu); > > @@ -739,18 +740,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, > if (attrs.byte_swap) { > address |= TLB_BSWAP; > } > - if (!memory_region_is_ram(section->mr) && > - !memory_region_is_romd(section->mr)) { > - /* IO memory case */ > - address |= TLB_MMIO; > - addend = 0; > - } else { > + > + is_ram = memory_region_is_ram(section->mr); > + is_romd = memory_region_is_romd(section->mr); > + > + if (is_ram || is_romd) { > + /* RAM and ROMD both have associated host memory. */ > addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; > + } else { > + /* I/O does not; force the host address to NULL. */ > + addend = 0; > + } > + > + write_address = address; > + if (is_ram) { > + iotlb = memory_region_get_ram_addr(section->mr) + xlat; > + /* > + * Computing is_clean is expensive; avoid all that unless > + * the page is actually writable. > + */ > + if (prot & PAGE_WRITE) { > + if (section->readonly) { > + write_address |= TLB_ROM; > + } else if (cpu_physical_memory_is_clean(iotlb)) { > + write_address |= TLB_NOTDIRTY; > + } > + } > + } else { > + /* I/O or ROMD */ > + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; > + /* > + * Writes to romd devices must go through MMIO to enable write. > + * Reads to romd devices go through the ram_ptr found above, > + * but of course reads to I/O must go through MMIO. > + */ > + write_address |= TLB_MMIO; > + if (!is_romd) { > + address = write_address; > + } > } > > - code_address = address; > - iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, > - paddr_page, xlat, prot, &address); > wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, > TARGET_PAGE_SIZE); > > @@ -790,8 +819,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, > /* > * At this point iotlb contains a physical section number in the lower > * TARGET_PAGE_BITS, and either > - * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM) > - * + the offset within section->mr of the page base (otherwise) > + * + the ram_addr_t of the page base of the target RAM (RAM) > + * + the offset within section->mr of the page base (I/O, ROMD) > * We subtract the vaddr_page (which is page aligned and thus won't > * disturb the low bits) to give an offset which can be added to the > * (non-page-aligned) vaddr of the eventual memory access to get > @@ -814,25 +843,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, > } > > if (prot & PAGE_EXEC) { > - tn.addr_code = code_address; > + tn.addr_code = address; > } else { > tn.addr_code = -1; > } > > tn.addr_write = -1; > if (prot & PAGE_WRITE) { > - tn.addr_write = address; > - if (memory_region_is_romd(section->mr)) { > - /* Use the MMIO path so that the device can switch states. */ > - tn.addr_write |= TLB_MMIO; > - } else if (memory_region_is_ram(section->mr)) { > - if (section->readonly) { > - tn.addr_write |= TLB_ROM; > - } else if (cpu_physical_memory_is_clean( > - memory_region_get_ram_addr(section->mr) + xlat)) { > - tn.addr_write |= TLB_NOTDIRTY; > - } > - } > + tn.addr_write = write_address; > if (prot & PAGE_WRITE_INV) { > tn.addr_write |= TLB_INVALID_MASK; > } > diff --git a/exec.c b/exec.c > index dc7001f115..961d7d6497 100644 > --- a/exec.c > +++ b/exec.c > @@ -1459,26 +1459,10 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, > > /* Called from RCU critical section */ > hwaddr memory_region_section_get_iotlb(CPUState *cpu, > - MemoryRegionSection *section, > - target_ulong vaddr, > - hwaddr paddr, hwaddr xlat, > - int prot, > - target_ulong *address) > + MemoryRegionSection *section) > { > - hwaddr iotlb; > - > - if (memory_region_is_ram(section->mr)) { > - /* Normal RAM. */ > - iotlb = memory_region_get_ram_addr(section->mr) + xlat; > - } else { > - AddressSpaceDispatch *d; > - > - d = flatview_to_dispatch(section->fv); > - iotlb = section - d->map.sections; > - iotlb += xlat; > - } > - > - return iotlb; > + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); > + return section - d->map.sections; > } > #endif /* defined(CONFIG_USER_ONLY) */ -- Alex Bennée
On 9/24/19 12:59 AM, David Hildenbrand wrote: >> + is_ram = memory_region_is_ram(section->mr); >> + is_romd = memory_region_is_romd(section->mr); >> + >> + if (is_ram || is_romd) { >> + /* RAM and ROMD both have associated host memory. */ >> addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; >> + } else { >> + /* I/O does not; force the host address to NULL. */ >> + addend = 0; >> + } >> + >> + write_address = address; > > I guess the only "suboptimal" change is that you now have two checks for > "prot & PAGE_WRITE" twice in the case of ram instead of one. It's a single bit test on a register operand -- as cheap as can be. If you look at the entire code, there *must* be more than one test. You can rearrange the code to choose exactly where those tests are, but you'll have to have them somewhere. >> + /* I/O or ROMD */ >> + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; >> + /* >> + * Writes to romd devices must go through MMIO to enable write. >> + * Reads to romd devices go through the ram_ptr found above, >> + * but of course reads to I/O must go through MMIO. >> + */ >> + write_address |= TLB_MMIO; > > ... and here you calculate write_address even if probably unused. Well... while the page might not be writable (but I'd bet that it is -- I/O memory is almost never read-only), and therefore write_address is technically unused, the variable is practically used in the next line: if (!is_romd) { address = write_address } which will compile to a conditional move. > Can your move the calculation of the write_address completely into the > "prot & PAGE_WRITE" case below? We'd prefer not to, since the code below is within the cpu tlb lock region. We'd prefer to keep all of the expensive operations outside that. r~
On 25.09.19 19:55, Richard Henderson wrote: > On 9/24/19 12:59 AM, David Hildenbrand wrote: >>> + is_ram = memory_region_is_ram(section->mr); >>> + is_romd = memory_region_is_romd(section->mr); >>> + >>> + if (is_ram || is_romd) { >>> + /* RAM and ROMD both have associated host memory. */ >>> addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; >>> + } else { >>> + /* I/O does not; force the host address to NULL. */ >>> + addend = 0; >>> + } >>> + >>> + write_address = address; >> >> I guess the only "suboptimal" change is that you now have two checks for >> "prot & PAGE_WRITE" twice in the case of ram instead of one. > > It's a single bit test on a register operand -- as cheap as can be. If you > look at the entire code, there *must* be more than one test. You can rearrange > the code to choose exactly where those tests are, but you'll have to have them > somewhere. > >>> + /* I/O or ROMD */ >>> + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; >>> + /* >>> + * Writes to romd devices must go through MMIO to enable write. >>> + * Reads to romd devices go through the ram_ptr found above, >>> + * but of course reads to I/O must go through MMIO. >>> + */ >>> + write_address |= TLB_MMIO; >> >> ... and here you calculate write_address even if probably unused. > > Well... while the page might not be writable (but I'd bet that it is -- I/O > memory is almost never read-only), and therefore write_address is technically > unused, the variable is practically used in the next line: > > if (!is_romd) { > address = write_address > } > > which will compile to a conditional move. > >> Can your move the calculation of the write_address completely into the >> "prot & PAGE_WRITE" case below? > > We'd prefer not to, since the code below is within the cpu tlb lock region. > We'd prefer to keep all of the expensive operations outside that. Makes all sense to me then and looks sane :) > > > r~ > -- Thanks, David / dhildenb
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 81b02eb2fe..49db07ba0b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, hwaddr *xlat, hwaddr *plen, MemTxAttrs attrs, int *prot); hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address); + MemoryRegionSection *section); #endif /* vl.c */ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 05212ff244..05530a8b0c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -704,13 +704,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, MemoryRegionSection *section; unsigned int index; target_ulong address; - target_ulong code_address; + target_ulong write_address; uintptr_t addend; CPUTLBEntry *te, tn; hwaddr iotlb, xlat, sz, paddr_page; target_ulong vaddr_page; int asidx = cpu_asidx_from_attrs(cpu, attrs); int wp_flags; + bool is_ram, is_romd; assert_cpu_is_self(cpu); @@ -739,18 +740,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, if (attrs.byte_swap) { address |= TLB_BSWAP; } - if (!memory_region_is_ram(section->mr) && - !memory_region_is_romd(section->mr)) { - /* IO memory case */ - address |= TLB_MMIO; - addend = 0; - } else { + + is_ram = memory_region_is_ram(section->mr); + is_romd = memory_region_is_romd(section->mr); + + if (is_ram || is_romd) { + /* RAM and ROMD both have associated host memory. */ addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; + } else { + /* I/O does not; force the host address to NULL. */ + addend = 0; + } + + write_address = address; + if (is_ram) { + iotlb = memory_region_get_ram_addr(section->mr) + xlat; + /* + * Computing is_clean is expensive; avoid all that unless + * the page is actually writable. + */ + if (prot & PAGE_WRITE) { + if (section->readonly) { + write_address |= TLB_ROM; + } else if (cpu_physical_memory_is_clean(iotlb)) { + write_address |= TLB_NOTDIRTY; + } + } + } else { + /* I/O or ROMD */ + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; + /* + * Writes to romd devices must go through MMIO to enable write. + * Reads to romd devices go through the ram_ptr found above, + * but of course reads to I/O must go through MMIO. + */ + write_address |= TLB_MMIO; + if (!is_romd) { + address = write_address; + } } - code_address = address; - iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, - paddr_page, xlat, prot, &address); wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, TARGET_PAGE_SIZE); @@ -790,8 +819,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, /* * At this point iotlb contains a physical section number in the lower * TARGET_PAGE_BITS, and either - * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM) - * + the offset within section->mr of the page base (otherwise) + * + the ram_addr_t of the page base of the target RAM (RAM) + * + the offset within section->mr of the page base (I/O, ROMD) * We subtract the vaddr_page (which is page aligned and thus won't * disturb the low bits) to give an offset which can be added to the * (non-page-aligned) vaddr of the eventual memory access to get @@ -814,25 +843,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } if (prot & PAGE_EXEC) { - tn.addr_code = code_address; + tn.addr_code = address; } else { tn.addr_code = -1; } tn.addr_write = -1; if (prot & PAGE_WRITE) { - tn.addr_write = address; - if (memory_region_is_romd(section->mr)) { - /* Use the MMIO path so that the device can switch states. */ - tn.addr_write |= TLB_MMIO; - } else if (memory_region_is_ram(section->mr)) { - if (section->readonly) { - tn.addr_write |= TLB_ROM; - } else if (cpu_physical_memory_is_clean( - memory_region_get_ram_addr(section->mr) + xlat)) { - tn.addr_write |= TLB_NOTDIRTY; - } - } + tn.addr_write = write_address; if (prot & PAGE_WRITE_INV) { tn.addr_write |= TLB_INVALID_MASK; } diff --git a/exec.c b/exec.c index dc7001f115..961d7d6497 100644 --- a/exec.c +++ b/exec.c @@ -1459,26 +1459,10 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, /* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address) + MemoryRegionSection *section) { - hwaddr iotlb; - - if (memory_region_is_ram(section->mr)) { - /* Normal RAM. */ - iotlb = memory_region_get_ram_addr(section->mr) + xlat; - } else { - AddressSpaceDispatch *d; - - d = flatview_to_dispatch(section->fv); - iotlb = section - d->map.sections; - iotlb += xlat; - } - - return iotlb; + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); + return section - d->map.sections; } #endif /* defined(CONFIG_USER_ONLY) */
There is only one caller, tlb_set_page_with_attrs. We cannot inline the entire function because the AddressSpaceDispatch structure is private to exec.c, and cannot easily be moved to include/exec/memory-internal.h. Compute is_ram and is_romd once within tlb_set_page_with_attrs. Fold the number of tests against these predicates. Compute cpu_physical_memory_is_clean outside of the tlb lock region. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- include/exec/exec-all.h | 6 +--- accel/tcg/cputlb.c | 68 ++++++++++++++++++++++++++--------------- exec.c | 22 ++----------- 3 files changed, 47 insertions(+), 49 deletions(-) -- 2.17.1