diff mbox series

[v4,10/16] cputlb: Partially inline memory_region_section_get_iotlb

Message ID 20190923230004.9231-11-richard.henderson@linaro.org
State Superseded
Headers show
Series Move rom and notdirty handling to cputlb | expand

Commit Message

Richard Henderson Sept. 23, 2019, 10:59 p.m. UTC
There is only one caller, tlb_set_page_with_attrs.  We cannot
inline the entire function because the AddressSpaceDispatch
structure is private to exec.c, and cannot easily be moved to
include/exec/memory-internal.h.

Compute is_ram and is_romd once within tlb_set_page_with_attrs.
Fold the number of tests against these predicates.  Compute
cpu_physical_memory_is_clean outside of the tlb lock region.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 include/exec/exec-all.h |  6 +---
 accel/tcg/cputlb.c      | 68 ++++++++++++++++++++++++++---------------
 exec.c                  | 22 ++-----------
 3 files changed, 47 insertions(+), 49 deletions(-)

-- 
2.17.1

Comments

David Hildenbrand Sept. 24, 2019, 7:59 a.m. UTC | #1
On 24.09.19 00:59, Richard Henderson wrote:
> There is only one caller, tlb_set_page_with_attrs.  We cannot

> inline the entire function because the AddressSpaceDispatch

> structure is private to exec.c, and cannot easily be moved to

> include/exec/memory-internal.h.

> 

> Compute is_ram and is_romd once within tlb_set_page_with_attrs.

> Fold the number of tests against these predicates.  Compute

> cpu_physical_memory_is_clean outside of the tlb lock region.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  include/exec/exec-all.h |  6 +---

>  accel/tcg/cputlb.c      | 68 ++++++++++++++++++++++++++---------------

>  exec.c                  | 22 ++-----------

>  3 files changed, 47 insertions(+), 49 deletions(-)

> 

> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

> index 81b02eb2fe..49db07ba0b 100644

> --- a/include/exec/exec-all.h

> +++ b/include/exec/exec-all.h

> @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

>                                    hwaddr *xlat, hwaddr *plen,

>                                    MemTxAttrs attrs, int *prot);

>  hwaddr memory_region_section_get_iotlb(CPUState *cpu,

> -                                       MemoryRegionSection *section,

> -                                       target_ulong vaddr,

> -                                       hwaddr paddr, hwaddr xlat,

> -                                       int prot,

> -                                       target_ulong *address);

> +                                       MemoryRegionSection *section);

>  #endif

>  

>  /* vl.c */

> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

> index 05212ff244..05530a8b0c 100644

> --- a/accel/tcg/cputlb.c

> +++ b/accel/tcg/cputlb.c

> @@ -704,13 +704,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,

>      MemoryRegionSection *section;

>      unsigned int index;

>      target_ulong address;

> -    target_ulong code_address;

> +    target_ulong write_address;

>      uintptr_t addend;

>      CPUTLBEntry *te, tn;

>      hwaddr iotlb, xlat, sz, paddr_page;

>      target_ulong vaddr_page;

>      int asidx = cpu_asidx_from_attrs(cpu, attrs);

>      int wp_flags;

> +    bool is_ram, is_romd;

>  

>      assert_cpu_is_self(cpu);

>  

> @@ -739,18 +740,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,

>      if (attrs.byte_swap) {

>          address |= TLB_BSWAP;

>      }

> -    if (!memory_region_is_ram(section->mr) &&

> -        !memory_region_is_romd(section->mr)) {

> -        /* IO memory case */

> -        address |= TLB_MMIO;

> -        addend = 0;

> -    } else {

> +

> +    is_ram = memory_region_is_ram(section->mr);

> +    is_romd = memory_region_is_romd(section->mr);

> +

> +    if (is_ram || is_romd) {

> +        /* RAM and ROMD both have associated host memory. */

>          addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;

> +    } else {

> +        /* I/O does not; force the host address to NULL. */

> +        addend = 0;

> +    }

> +

> +    write_address = address;


I guess the only "suboptimal" change is that you now have two checks for
"prot & PAGE_WRITE" twice in the case of ram instead of one.

> +    if (is_ram) {

> +        iotlb = memory_region_get_ram_addr(section->mr) + xlat;

> +        /*

> +         * Computing is_clean is expensive; avoid all that unless

> +         * the page is actually writable.

> +         */

> +        if (prot & PAGE_WRITE) {

> +            if (section->readonly) {

> +                write_address |= TLB_ROM;

> +            } else if (cpu_physical_memory_is_clean(iotlb)) {

> +                write_address |= TLB_NOTDIRTY;

> +            }

> +        }

> +    } else {

> +        /* I/O or ROMD */

> +        iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;

> +        /*

> +         * Writes to romd devices must go through MMIO to enable write.

> +         * Reads to romd devices go through the ram_ptr found above,

> +         * but of course reads to I/O must go through MMIO.

> +         */

> +        write_address |= TLB_MMIO;


... and here you calculate write_address even if probably unused.

Can your move the calculation of the write_address completely into the
"prot & PAGE_WRITE" case below?

(I'm not looking at the full code, so could as well be that I am missing
something :) )


-- 

Thanks,

David / dhildenb
Alex Bennée Sept. 25, 2019, 4:12 p.m. UTC | #2
Richard Henderson <richard.henderson@linaro.org> writes:

> There is only one caller, tlb_set_page_with_attrs.  We cannot

> inline the entire function because the AddressSpaceDispatch

> structure is private to exec.c, and cannot easily be moved to

> include/exec/memory-internal.h.

>

> Compute is_ram and is_romd once within tlb_set_page_with_attrs.

> Fold the number of tests against these predicates.  Compute

> cpu_physical_memory_is_clean outside of the tlb lock region.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>


Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


> ---

>  include/exec/exec-all.h |  6 +---

>  accel/tcg/cputlb.c      | 68 ++++++++++++++++++++++++++---------------

>  exec.c                  | 22 ++-----------

>  3 files changed, 47 insertions(+), 49 deletions(-)

>

> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

> index 81b02eb2fe..49db07ba0b 100644

> --- a/include/exec/exec-all.h

> +++ b/include/exec/exec-all.h

> @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

>                                    hwaddr *xlat, hwaddr *plen,

>                                    MemTxAttrs attrs, int *prot);

>  hwaddr memory_region_section_get_iotlb(CPUState *cpu,

> -                                       MemoryRegionSection *section,

> -                                       target_ulong vaddr,

> -                                       hwaddr paddr, hwaddr xlat,

> -                                       int prot,

> -                                       target_ulong *address);

> +                                       MemoryRegionSection *section);

>  #endif

>

>  /* vl.c */

> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

> index 05212ff244..05530a8b0c 100644

> --- a/accel/tcg/cputlb.c

> +++ b/accel/tcg/cputlb.c

> @@ -704,13 +704,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,

>      MemoryRegionSection *section;

>      unsigned int index;

>      target_ulong address;

> -    target_ulong code_address;

> +    target_ulong write_address;

>      uintptr_t addend;

>      CPUTLBEntry *te, tn;

>      hwaddr iotlb, xlat, sz, paddr_page;

>      target_ulong vaddr_page;

>      int asidx = cpu_asidx_from_attrs(cpu, attrs);

>      int wp_flags;

> +    bool is_ram, is_romd;

>

>      assert_cpu_is_self(cpu);

>

> @@ -739,18 +740,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,

>      if (attrs.byte_swap) {

>          address |= TLB_BSWAP;

>      }

> -    if (!memory_region_is_ram(section->mr) &&

> -        !memory_region_is_romd(section->mr)) {

> -        /* IO memory case */

> -        address |= TLB_MMIO;

> -        addend = 0;

> -    } else {

> +

> +    is_ram = memory_region_is_ram(section->mr);

> +    is_romd = memory_region_is_romd(section->mr);

> +

> +    if (is_ram || is_romd) {

> +        /* RAM and ROMD both have associated host memory. */

>          addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;

> +    } else {

> +        /* I/O does not; force the host address to NULL. */

> +        addend = 0;

> +    }

> +

> +    write_address = address;

> +    if (is_ram) {

> +        iotlb = memory_region_get_ram_addr(section->mr) + xlat;

> +        /*

> +         * Computing is_clean is expensive; avoid all that unless

> +         * the page is actually writable.

> +         */

> +        if (prot & PAGE_WRITE) {

> +            if (section->readonly) {

> +                write_address |= TLB_ROM;

> +            } else if (cpu_physical_memory_is_clean(iotlb)) {

> +                write_address |= TLB_NOTDIRTY;

> +            }

> +        }

> +    } else {

> +        /* I/O or ROMD */

> +        iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;

> +        /*

> +         * Writes to romd devices must go through MMIO to enable write.

> +         * Reads to romd devices go through the ram_ptr found above,

> +         * but of course reads to I/O must go through MMIO.

> +         */

> +        write_address |= TLB_MMIO;

> +        if (!is_romd) {

> +            address = write_address;

> +        }

>      }

>

> -    code_address = address;

> -    iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,

> -                                            paddr_page, xlat, prot, &address);

>      wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,

>                                                TARGET_PAGE_SIZE);

>

> @@ -790,8 +819,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,

>      /*

>       * At this point iotlb contains a physical section number in the lower

>       * TARGET_PAGE_BITS, and either

> -     *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)

> -     *  + the offset within section->mr of the page base (otherwise)

> +     *  + the ram_addr_t of the page base of the target RAM (RAM)

> +     *  + the offset within section->mr of the page base (I/O, ROMD)

>       * We subtract the vaddr_page (which is page aligned and thus won't

>       * disturb the low bits) to give an offset which can be added to the

>       * (non-page-aligned) vaddr of the eventual memory access to get

> @@ -814,25 +843,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,

>      }

>

>      if (prot & PAGE_EXEC) {

> -        tn.addr_code = code_address;

> +        tn.addr_code = address;

>      } else {

>          tn.addr_code = -1;

>      }

>

>      tn.addr_write = -1;

>      if (prot & PAGE_WRITE) {

> -        tn.addr_write = address;

> -        if (memory_region_is_romd(section->mr)) {

> -            /* Use the MMIO path so that the device can switch states. */

> -            tn.addr_write |= TLB_MMIO;

> -        } else if (memory_region_is_ram(section->mr)) {

> -            if (section->readonly) {

> -                tn.addr_write |= TLB_ROM;

> -            } else if (cpu_physical_memory_is_clean(

> -                        memory_region_get_ram_addr(section->mr) + xlat)) {

> -                tn.addr_write |= TLB_NOTDIRTY;

> -            }

> -        }

> +        tn.addr_write = write_address;

>          if (prot & PAGE_WRITE_INV) {

>              tn.addr_write |= TLB_INVALID_MASK;

>          }

> diff --git a/exec.c b/exec.c

> index dc7001f115..961d7d6497 100644

> --- a/exec.c

> +++ b/exec.c

> @@ -1459,26 +1459,10 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,

>

>  /* Called from RCU critical section */

>  hwaddr memory_region_section_get_iotlb(CPUState *cpu,

> -                                       MemoryRegionSection *section,

> -                                       target_ulong vaddr,

> -                                       hwaddr paddr, hwaddr xlat,

> -                                       int prot,

> -                                       target_ulong *address)

> +                                       MemoryRegionSection *section)

>  {

> -    hwaddr iotlb;

> -

> -    if (memory_region_is_ram(section->mr)) {

> -        /* Normal RAM.  */

> -        iotlb = memory_region_get_ram_addr(section->mr) + xlat;

> -    } else {

> -        AddressSpaceDispatch *d;

> -

> -        d = flatview_to_dispatch(section->fv);

> -        iotlb = section - d->map.sections;

> -        iotlb += xlat;

> -    }

> -

> -    return iotlb;

> +    AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);

> +    return section - d->map.sections;

>  }

>  #endif /* defined(CONFIG_USER_ONLY) */



--
Alex Bennée
Richard Henderson Sept. 25, 2019, 5:55 p.m. UTC | #3
On 9/24/19 12:59 AM, David Hildenbrand wrote:
>> +    is_ram = memory_region_is_ram(section->mr);

>> +    is_romd = memory_region_is_romd(section->mr);

>> +

>> +    if (is_ram || is_romd) {

>> +        /* RAM and ROMD both have associated host memory. */

>>          addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;

>> +    } else {

>> +        /* I/O does not; force the host address to NULL. */

>> +        addend = 0;

>> +    }

>> +

>> +    write_address = address;

> 

> I guess the only "suboptimal" change is that you now have two checks for

> "prot & PAGE_WRITE" twice in the case of ram instead of one.


It's a single bit test on a register operand -- as cheap as can be.  If you
look at the entire code, there *must* be more than one test.  You can rearrange
the code to choose exactly where those tests are, but you'll have to have them
somewhere.

>> +        /* I/O or ROMD */

>> +        iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;

>> +        /*

>> +         * Writes to romd devices must go through MMIO to enable write.

>> +         * Reads to romd devices go through the ram_ptr found above,

>> +         * but of course reads to I/O must go through MMIO.

>> +         */

>> +        write_address |= TLB_MMIO;

> 

> ... and here you calculate write_address even if probably unused.


Well... while the page might not be writable (but I'd bet that it is -- I/O
memory is almost never read-only), and therefore write_address is technically
unused, the variable is practically used in the next line:

    if (!is_romd) {
        address = write_address
    }

which will compile to a conditional move.

> Can your move the calculation of the write_address completely into the

> "prot & PAGE_WRITE" case below?


We'd prefer not to, since the code below is within the cpu tlb lock region.
We'd prefer to keep all of the expensive operations outside that.


r~
David Hildenbrand Sept. 25, 2019, 7:40 p.m. UTC | #4
On 25.09.19 19:55, Richard Henderson wrote:
> On 9/24/19 12:59 AM, David Hildenbrand wrote:

>>> +    is_ram = memory_region_is_ram(section->mr);

>>> +    is_romd = memory_region_is_romd(section->mr);

>>> +

>>> +    if (is_ram || is_romd) {

>>> +        /* RAM and ROMD both have associated host memory. */

>>>          addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;

>>> +    } else {

>>> +        /* I/O does not; force the host address to NULL. */

>>> +        addend = 0;

>>> +    }

>>> +

>>> +    write_address = address;

>>

>> I guess the only "suboptimal" change is that you now have two checks for

>> "prot & PAGE_WRITE" twice in the case of ram instead of one.

> 

> It's a single bit test on a register operand -- as cheap as can be.  If you

> look at the entire code, there *must* be more than one test.  You can rearrange

> the code to choose exactly where those tests are, but you'll have to have them

> somewhere.

> 

>>> +        /* I/O or ROMD */

>>> +        iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;

>>> +        /*

>>> +         * Writes to romd devices must go through MMIO to enable write.

>>> +         * Reads to romd devices go through the ram_ptr found above,

>>> +         * but of course reads to I/O must go through MMIO.

>>> +         */

>>> +        write_address |= TLB_MMIO;

>>

>> ... and here you calculate write_address even if probably unused.

> 

> Well... while the page might not be writable (but I'd bet that it is -- I/O

> memory is almost never read-only), and therefore write_address is technically

> unused, the variable is practically used in the next line:

> 

>     if (!is_romd) {

>         address = write_address

>     }

> 

> which will compile to a conditional move.

> 

>> Can your move the calculation of the write_address completely into the

>> "prot & PAGE_WRITE" case below?

> 

> We'd prefer not to, since the code below is within the cpu tlb lock region.

> We'd prefer to keep all of the expensive operations outside that.


Makes all sense to me then and looks sane :)

> 

> 

> r~

> 



-- 

Thanks,

David / dhildenb
diff mbox series

Patch

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 81b02eb2fe..49db07ba0b 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -509,11 +509,7 @@  address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
                                   hwaddr *xlat, hwaddr *plen,
                                   MemTxAttrs attrs, int *prot);
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
-                                       MemoryRegionSection *section,
-                                       target_ulong vaddr,
-                                       hwaddr paddr, hwaddr xlat,
-                                       int prot,
-                                       target_ulong *address);
+                                       MemoryRegionSection *section);
 #endif
 
 /* vl.c */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 05212ff244..05530a8b0c 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -704,13 +704,14 @@  void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     MemoryRegionSection *section;
     unsigned int index;
     target_ulong address;
-    target_ulong code_address;
+    target_ulong write_address;
     uintptr_t addend;
     CPUTLBEntry *te, tn;
     hwaddr iotlb, xlat, sz, paddr_page;
     target_ulong vaddr_page;
     int asidx = cpu_asidx_from_attrs(cpu, attrs);
     int wp_flags;
+    bool is_ram, is_romd;
 
     assert_cpu_is_self(cpu);
 
@@ -739,18 +740,46 @@  void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     if (attrs.byte_swap) {
         address |= TLB_BSWAP;
     }
-    if (!memory_region_is_ram(section->mr) &&
-        !memory_region_is_romd(section->mr)) {
-        /* IO memory case */
-        address |= TLB_MMIO;
-        addend = 0;
-    } else {
+
+    is_ram = memory_region_is_ram(section->mr);
+    is_romd = memory_region_is_romd(section->mr);
+
+    if (is_ram || is_romd) {
+        /* RAM and ROMD both have associated host memory. */
         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
+    } else {
+        /* I/O does not; force the host address to NULL. */
+        addend = 0;
+    }
+
+    write_address = address;
+    if (is_ram) {
+        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
+        /*
+         * Computing is_clean is expensive; avoid all that unless
+         * the page is actually writable.
+         */
+        if (prot & PAGE_WRITE) {
+            if (section->readonly) {
+                write_address |= TLB_ROM;
+            } else if (cpu_physical_memory_is_clean(iotlb)) {
+                write_address |= TLB_NOTDIRTY;
+            }
+        }
+    } else {
+        /* I/O or ROMD */
+        iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
+        /*
+         * Writes to romd devices must go through MMIO to enable write.
+         * Reads to romd devices go through the ram_ptr found above,
+         * but of course reads to I/O must go through MMIO.
+         */
+        write_address |= TLB_MMIO;
+        if (!is_romd) {
+            address = write_address;
+        }
     }
 
-    code_address = address;
-    iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
-                                            paddr_page, xlat, prot, &address);
     wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
                                               TARGET_PAGE_SIZE);
 
@@ -790,8 +819,8 @@  void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     /*
      * At this point iotlb contains a physical section number in the lower
      * TARGET_PAGE_BITS, and either
-     *  + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
-     *  + the offset within section->mr of the page base (otherwise)
+     *  + the ram_addr_t of the page base of the target RAM (RAM)
+     *  + the offset within section->mr of the page base (I/O, ROMD)
      * We subtract the vaddr_page (which is page aligned and thus won't
      * disturb the low bits) to give an offset which can be added to the
      * (non-page-aligned) vaddr of the eventual memory access to get
@@ -814,25 +843,14 @@  void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     }
 
     if (prot & PAGE_EXEC) {
-        tn.addr_code = code_address;
+        tn.addr_code = address;
     } else {
         tn.addr_code = -1;
     }
 
     tn.addr_write = -1;
     if (prot & PAGE_WRITE) {
-        tn.addr_write = address;
-        if (memory_region_is_romd(section->mr)) {
-            /* Use the MMIO path so that the device can switch states. */
-            tn.addr_write |= TLB_MMIO;
-        } else if (memory_region_is_ram(section->mr)) {
-            if (section->readonly) {
-                tn.addr_write |= TLB_ROM;
-            } else if (cpu_physical_memory_is_clean(
-                        memory_region_get_ram_addr(section->mr) + xlat)) {
-                tn.addr_write |= TLB_NOTDIRTY;
-            }
-        }
+        tn.addr_write = write_address;
         if (prot & PAGE_WRITE_INV) {
             tn.addr_write |= TLB_INVALID_MASK;
         }
diff --git a/exec.c b/exec.c
index dc7001f115..961d7d6497 100644
--- a/exec.c
+++ b/exec.c
@@ -1459,26 +1459,10 @@  bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
 
 /* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
-                                       MemoryRegionSection *section,
-                                       target_ulong vaddr,
-                                       hwaddr paddr, hwaddr xlat,
-                                       int prot,
-                                       target_ulong *address)
+                                       MemoryRegionSection *section)
 {
-    hwaddr iotlb;
-
-    if (memory_region_is_ram(section->mr)) {
-        /* Normal RAM.  */
-        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
-    } else {
-        AddressSpaceDispatch *d;
-
-        d = flatview_to_dispatch(section->fv);
-        iotlb = section - d->map.sections;
-        iotlb += xlat;
-    }
-
-    return iotlb;
+    AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
+    return section - d->map.sections;
 }
 #endif /* defined(CONFIG_USER_ONLY) */