diff mbox series

[v2,13/13] target/riscv: Simplify probing in vext_ldff

Message ID 20240710032814.104643-14-richard.henderson@linaro.org
State New
Headers show
Series Fixes for user-only munmap races | expand

Commit Message

Richard Henderson July 10, 2024, 3:28 a.m. UTC
The current pairing of tlb_vaddr_to_host with extra is either
inefficient (user-only, with page_check_range) or incorrect
(system, with probe_pages).

For proper non-fault behaviour, use probe_access_flags with
its nonfault parameter set to true.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/riscv/vector_helper.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

Comments

Alistair Francis July 10, 2024, 4:09 a.m. UTC | #1
On Wed, Jul 10, 2024 at 1:30 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The current pairing of tlb_vaddr_to_host with extra is either
> inefficient (user-only, with page_check_range) or incorrect
> (system, with probe_pages).
>
> For proper non-fault behaviour, use probe_access_flags with
> its nonfault parameter set to true.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  target/riscv/vector_helper.c | 34 ++++++++++++++++++----------------
>  1 file changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 1b4d5a8e37..4d72eb74d3 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -474,7 +474,6 @@ vext_ldff(void *vd, void *v0, target_ulong base,
>            vext_ldst_elem_fn *ldst_elem,
>            uint32_t log2_esz, uintptr_t ra)
>  {
> -    void *host;
>      uint32_t i, k, vl = 0;
>      uint32_t nf = vext_nf(desc);
>      uint32_t vm = vext_vm(desc);
> @@ -493,27 +492,31 @@ vext_ldff(void *vd, void *v0, target_ulong base,
>          }
>          addr = adjust_addr(env, base + i * (nf << log2_esz));
>          if (i == 0) {
> +            /* Allow fault on first element. */
>              probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
>          } else {
> -            /* if it triggers an exception, no need to check watchpoint */
>              remain = nf << log2_esz;
>              while (remain > 0) {
> +                void *host;
> +                int flags;
> +
>                  offset = -(addr | TARGET_PAGE_MASK);
> -                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
> -                if (host) {
> -#ifdef CONFIG_USER_ONLY
> -                    if (!page_check_range(addr, offset, PAGE_READ)) {
> -                        vl = i;
> -                        goto ProbeSuccess;
> -                    }
> -#else
> -                    probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
> -#endif
> -                } else {
> +
> +                /* Probe nonfault on subsequent elements. */
> +                flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
> +                                           mmu_index, true, &host, 0);
> +                if (flags) {
> +                    /*
> +                     * Stop any flag bit set:
> +                     *   invalid (unmapped)
> +                     *   mmio (transaction failed)
> +                     *   watchpoint (debug)
> +                     * In all cases, handle as the first load next time.
> +                     */
>                      vl = i;
> -                    goto ProbeSuccess;
> +                    break;
>                  }
> -                if (remain <=  offset) {
> +                if (remain <= offset) {
>                      break;
>                  }
>                  remain -= offset;
> @@ -521,7 +524,6 @@ vext_ldff(void *vd, void *v0, target_ulong base,
>              }
>          }
>      }
> -ProbeSuccess:
>      /* load bytes from guest memory */
>      if (vl != 0) {
>          env->vl = vl;
> --
> 2.43.0
>
>
Max Chou July 15, 2024, 7:06 a.m. UTC | #2
On 2024/7/10 11:28 AM, Richard Henderson wrote:
> The current pairing of tlb_vaddr_to_host with extra is either
> inefficient (user-only, with page_check_range) or incorrect
> (system, with probe_pages).
>
> For proper non-fault behaviour, use probe_access_flags with
> its nonfault parameter set to true.
>
> Signed-off-by: Richard Henderson<richard.henderson@linaro.org>
> ---
>   target/riscv/vector_helper.c | 34 ++++++++++++++++++----------------
>   1 file changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 1b4d5a8e37..4d72eb74d3 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -474,7 +474,6 @@ vext_ldff(void *vd, void *v0, target_ulong base,
>             vext_ldst_elem_fn *ldst_elem,
>             uint32_t log2_esz, uintptr_t ra)
>   {
> -    void *host;
>       uint32_t i, k, vl = 0;
>       uint32_t nf = vext_nf(desc);
>       uint32_t vm = vext_vm(desc);
> @@ -493,27 +492,31 @@ vext_ldff(void *vd, void *v0, target_ulong base,
>           }
>           addr = adjust_addr(env, base + i * (nf << log2_esz));
>           if (i == 0) {
> +            /* Allow fault on first element. */
>               probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
>           } else {
> -            /* if it triggers an exception, no need to check watchpoint */
>               remain = nf << log2_esz;
>               while (remain > 0) {
> +                void *host;
> +                int flags;
> +
>                   offset = -(addr | TARGET_PAGE_MASK);
> -                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
> -                if (host) {
> -#ifdef CONFIG_USER_ONLY
> -                    if (!page_check_range(addr, offset, PAGE_READ)) {
> -                        vl = i;
> -                        goto ProbeSuccess;
> -                    }
> -#else
> -                    probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
> -#endif
> -                } else {
> +
> +                /* Probe nonfault on subsequent elements. */
> +                flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
> +                                           mmu_index, true, &host, 0);
> +                if (flags) {
According to the section 7.7. Unit-stride Fault-Only-First Loads in the 
v spec (v1.0)

      When the fault-only-first instruction would trigger a debug 
data-watchpoint trap on an element after the first,
      implementations should not reduce vl but instead should trigger 
the debug trap as otherwise the event might be lost.

I think that we need to filter out the watchpoint bit in the flag here 
liked below.
+ if (flags & ~TLB_WATCHPOINT) {


> +                    /*
> +                     * Stop any flag bit set:
> +                     *   invalid (unmapped)
> +                     *   mmio (transaction failed)
> +                     *   watchpoint (debug)
> +                     * In all cases, handle as the first load next time.
> +                     */
>                       vl = i;
> -                    goto ProbeSuccess;
> +                    break;
This break will just break the while loop, so the outside for loop will 
continue checking the following elements that we may get unexpected vl.


>                   }
> -                if (remain <=  offset) {
> +                if (remain <= offset) {
>                       break;
>                   }
>                   remain -= offset;
> @@ -521,7 +524,6 @@ vext_ldff(void *vd, void *v0, target_ulong base,
>               }
>           }
>       }
> -ProbeSuccess:
>       /* load bytes from guest memory */
>       if (vl != 0) {
>           env->vl = vl;


Thanks for this series patch set, I’m trying to rebase the RVV 
optimization RFC patch set to this series then optimize the vext_ldff part.
And I think that there is a potential issue in the original 
implementation that maybe we can fix in this patch.

We need to assign the correct element load size to the 
probe_access_internal function called by tlb_vaddr_to_host in original 
implementation or is called directly in this patch.
The size parameter will be used by the pmp_hart_has_privs function to do 
the physical memory protection (PMP) checking.
If we set the size parameter to the remain page size, we may get 
unexpected trap caused by the PMP rules that covered the regions of 
masked-off elements.

Maybe we can replace the while loop liked below.


vext_ldff(void *vd, void *v0, target_ulong base,
           ...
{
     ...
     uint32_t size = nf << log2_esz;

     VSTART_CHECK_EARLY_EXIT(env);

     /* probe every access */
     for (i = env->vstart; i < env->vl; i++) {
         if (!vm && !vext_elem_mask(v0, i)) {
             continue;
         }
         addr = adjust_addr(env, base + i * size);
         if (i == 0) {
             probe_pages(env, addr, size, ra, MMU_DATA_LOAD);
         } else {
             /* if it triggers an exception, no need to check watchpoint */
             void *host;
             int flags;

             /* Probe nonfault on subsequent elements. */
             flags = probe_access_flags(env, addr, size, MMU_DATA_LOAD,
                     mmu_index, true, &host, 0);
             if (flags & ~TLB_WATCHPOINT) {
                 /*
                  * Stop any flag bit set:
                  *   invalid (unmapped)
                  *   mmio (transaction failed)
                  * In all cases, handle as the first load next time.
                  */
                 vl = i;
                 break;
             }
         }
     }

     /* load bytes from guest memory */
     if (vl != 0) {
         env->vl = vl;
     }
     ...
}
Richard Henderson July 15, 2024, 9:42 p.m. UTC | #3
On 7/15/24 17:06, Max Chou wrote:
>> +                /* Probe nonfault on subsequent elements. */
>> +                flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
>> +                                           mmu_index, true, &host, 0);
>> +                if (flags) {
> According to the section 7.7. Unit-stride Fault-Only-First Loads in the v spec (v1.0)
> 
>       When the fault-only- data-watchpoint trap on an element after the      
> implementations should not reduce vl but instead should trigger the debug trap as 
> otherwise the event might be lost.

Hmm, ok.  Interesting.


> And I think that there is a potential issue in the original implementation that maybe we 
> can fix in this patch.
> 
> We need to assign the correct element load size to the probe_access_internal function 
> called by tlb_vaddr_to_host in original implementation or is called directly in this patch.
> The size parameter will be used by the pmp_hart_has_privs function to do the physical 
> memory protection (PMP) checking.
> If we set the size parameter to the remain page size, we may get unexpected trap caused by 
> the PMP rules that covered the regions of masked-off elements.
> 
> Maybe we can replace the while loop liked below.
> 
> 
> vext_ldff(void *vd, void *v0, target_ulong base,
>            ...
> {
>      ...
>      uint32_t size = nf << log2_esz;
> 
>      VSTART_CHECK_EARLY_EXIT(env);
> 
>      /* probe every access */
>      for (i = env->vstart; i < env->vl; i++) {
>          if (!vm && !vext_elem_mask(v0, i)) {
>              continue;
>          }
>          addr = adjust_addr(env, base + i * size);
>          if (i == 0) {
>              probe_pages(env, addr, size, ra, MMU_DATA_LOAD);
>          } else {
>              /* if it triggers an exception, no need to check watchpoint */
>              void *host;
>              int flags;
> 
>              /* Probe nonfault on subsequent elements. */
>              flags = probe_access_flags(env, addr, size, MMU_DATA_LOAD,
>                      mmu_index, true, &host, 0);
>              if (flags & ~TLB_WATCHPOINT) {
>                  /*
>                   * Stop any flag bit set:
>                   *   invalid (unmapped)
>                   *   mmio (transaction failed)
>                   * In all cases, handle as the first load next time.
>                   */
>                  vl = i;
>                  break;
>              }
>          }
>      }

No, I don't think repeated probing is a good idea.
You'll lose everything you attempted to gain with the other improvements.

It seems, to handle watchpoints, you need to start by probing the entire length non-fault. 
  That will tell you if any portion of the length has any of the problem cases.  The fast 
path will not, of course.

After probing, you have flags for the 1 or two pages, and you can make a choice about the 
actual load length:

   - invalid on first page: either the first element faults,
     or you need to check PMP via some alternate mechanism.
     Do not be afraid to add something to CPUTLBEntryFull.extra.riscv
     during tlb_fill in order to accelerate this, if needed.

   - mmio on first page: just one element, as the second might fault
     during the transaction.

     It would be possible to enhance riscv_cpu_do_transaction_failed to
     suppress the fault and set a flag noting the fault.  This would allow
     multiple elements to be loaded, at the expense of another check after
     each element within the slow tlb-load path.  I don't know if this is
     desirable, really.  Using vector operations on mmio is usually a
     programming error.  :-)

   - invalid or mmio on second page, continue to the end of the first page.

Once we have the actual load length, handle watchpoints by hand.
See sve_cont_ldst_watchpoints.

Finally, the loop loading the elements, likely in ram via host pointer.


r~
diff mbox series

Patch

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 1b4d5a8e37..4d72eb74d3 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -474,7 +474,6 @@  vext_ldff(void *vd, void *v0, target_ulong base,
           vext_ldst_elem_fn *ldst_elem,
           uint32_t log2_esz, uintptr_t ra)
 {
-    void *host;
     uint32_t i, k, vl = 0;
     uint32_t nf = vext_nf(desc);
     uint32_t vm = vext_vm(desc);
@@ -493,27 +492,31 @@  vext_ldff(void *vd, void *v0, target_ulong base,
         }
         addr = adjust_addr(env, base + i * (nf << log2_esz));
         if (i == 0) {
+            /* Allow fault on first element. */
             probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
         } else {
-            /* if it triggers an exception, no need to check watchpoint */
             remain = nf << log2_esz;
             while (remain > 0) {
+                void *host;
+                int flags;
+
                 offset = -(addr | TARGET_PAGE_MASK);
-                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
-                if (host) {
-#ifdef CONFIG_USER_ONLY
-                    if (!page_check_range(addr, offset, PAGE_READ)) {
-                        vl = i;
-                        goto ProbeSuccess;
-                    }
-#else
-                    probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
-#endif
-                } else {
+
+                /* Probe nonfault on subsequent elements. */
+                flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
+                                           mmu_index, true, &host, 0);
+                if (flags) {
+                    /*
+                     * Stop any flag bit set:
+                     *   invalid (unmapped)
+                     *   mmio (transaction failed)
+                     *   watchpoint (debug)
+                     * In all cases, handle as the first load next time.
+                     */
                     vl = i;
-                    goto ProbeSuccess;
+                    break;
                 }
-                if (remain <=  offset) {
+                if (remain <= offset) {
                     break;
                 }
                 remain -= offset;
@@ -521,7 +524,6 @@  vext_ldff(void *vd, void *v0, target_ulong base,
             }
         }
     }
-ProbeSuccess:
     /* load bytes from guest memory */
     if (vl != 0) {
         env->vl = vl;