diff mbox series

[PULL,47/47] tests/tcg/multiarch: add vma-pthread.c

Message ID 20221230000221.2764875-48-richard.henderson@linaro.org
State Superseded
Headers show
Series [PULL,01/47] tcg: convert tcg/README to rst | expand

Commit Message

Richard Henderson Dec. 30, 2022, 12:02 a.m. UTC
From: Ilya Leoshkevich <iii@linux.ibm.com>

Add a test that locklessly changes and exercises page protection bits
from various threads. This helps catch race conditions in the VMA
handling.

Acked-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Message-Id: <20221223120252.513319-1-iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tests/tcg/multiarch/nop_func.h       |  25 ++++
 tests/tcg/multiarch/munmap-pthread.c |  16 +--
 tests/tcg/multiarch/vma-pthread.c    | 207 +++++++++++++++++++++++++++
 tests/tcg/multiarch/Makefile.target  |   3 +
 4 files changed, 236 insertions(+), 15 deletions(-)
 create mode 100644 tests/tcg/multiarch/nop_func.h
 create mode 100644 tests/tcg/multiarch/vma-pthread.c

Comments

Richard Henderson Jan. 5, 2023, 12:26 a.m. UTC | #1
On 12/29/22 16:02, Richard Henderson wrote:
> From: Ilya Leoshkevich <iii@linux.ibm.com>
> 
> Add a test that locklessly changes and exercises page protection bits
> from various threads. This helps catch race conditions in the VMA
> handling.
> 
> Acked-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> Message-Id: <20221223120252.513319-1-iii@linux.ibm.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tests/tcg/multiarch/nop_func.h       |  25 ++++
>   tests/tcg/multiarch/munmap-pthread.c |  16 +--
>   tests/tcg/multiarch/vma-pthread.c    | 207 +++++++++++++++++++++++++++
>   tests/tcg/multiarch/Makefile.target  |   3 +
>   4 files changed, 236 insertions(+), 15 deletions(-)
>   create mode 100644 tests/tcg/multiarch/nop_func.h
>   create mode 100644 tests/tcg/multiarch/vma-pthread.c

Hi Peter,

 From the failures I see on the gitlab merge job, I think I need to resubmit with this new 
test adjusted to loop less,

> +    for (i = 0; i < 50000; i++) {

here.

The failing jobs are --enable-debug, and take about 115 seconds to run manually on our 
aarch64 test host, exceeding the 90 second timeout.

I'll cut this down to 10000 loops and double-check times before resubmitting.


r~
Richard Henderson Jan. 5, 2023, 12:37 a.m. UTC | #2
On 1/4/23 16:26, Richard Henderson wrote:
>  From the failures I see on the gitlab merge job, I think I need to resubmit with this new 
> test adjusted to loop less,
> 
>> +    for (i = 0; i < 50000; i++) {
> 
> here.
> 
> The failing jobs are --enable-debug, and take about 115 seconds to run manually on our 
> aarch64 test host, exceeding the 90 second timeout.
> 
> I'll cut this down to 10000 loops and double-check times before resubmitting.

Hmm.  Even this only reduced the runtime to 98 seconds. Curiously, the test runs in 1.8 
seconds with optimization enabled (--enable-debug-tcg). I'm not sure where all that extra 
time is going...


r~
Richard Henderson Jan. 5, 2023, 5:24 a.m. UTC | #3
On 1/4/23 16:37, Richard Henderson wrote:
> On 1/4/23 16:26, Richard Henderson wrote:
>>  From the failures I see on the gitlab merge job, I think I need to resubmit with this 
>> new test adjusted to loop less,
>>
>>> +    for (i = 0; i < 50000; i++) {
>>
>> here.
>>
>> The failing jobs are --enable-debug, and take about 115 seconds to run manually on our 
>> aarch64 test host, exceeding the 90 second timeout.
>>
>> I'll cut this down to 10000 loops and double-check times before resubmitting.
> 
> Hmm.  Even this only reduced the runtime to 98 seconds.

Bah.  The testcase didn't rebuild as expected.  Building from clean, the 10k loop 
completes in 20 seconds with optimization disabled.

I do wonder what the build time / test time trade-off is here, and whether we should be 
doing much -O0 testing in CI...


r~
Alex Bennée Jan. 5, 2023, 9:10 a.m. UTC | #4
Richard Henderson <richard.henderson@linaro.org> writes:

> On 1/4/23 16:37, Richard Henderson wrote:
>> On 1/4/23 16:26, Richard Henderson wrote:
>>>  From the failures I see on the gitlab merge job, I think I need to
>>> resubmit with this new test adjusted to loop less,
>>>
>>>> +    for (i = 0; i < 50000; i++) {
>>>
>>> here.
>>>
>>> The failing jobs are --enable-debug, and take about 115 seconds to
>>> run manually on our aarch64 test host, exceeding the 90 second
>>> timeout.
>>>
>>> I'll cut this down to 10000 loops and double-check times before resubmitting.
>> Hmm.  Even this only reduced the runtime to 98 seconds.
>
> Bah.  The testcase didn't rebuild as expected.  Building from clean,
> the 10k loop completes in 20 seconds with optimization disabled.
>
> I do wonder what the build time / test time trade-off is here, and
> whether we should be doing much -O0 testing in CI...

I think the main argument for --enable-debug is less about the -O0 and
more about the extra asserts. Can we have -O3 with --enable-debug-tcg?

>
>
> r~
diff mbox series

Patch

diff --git a/tests/tcg/multiarch/nop_func.h b/tests/tcg/multiarch/nop_func.h
new file mode 100644
index 0000000000..f714d21000
--- /dev/null
+++ b/tests/tcg/multiarch/nop_func.h
@@ -0,0 +1,25 @@ 
+/*
+ * No-op functions that can be safely copied.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef NOP_FUNC_H
+#define NOP_FUNC_H
+
+static const char nop_func[] = {
+#if defined(__aarch64__)
+    0xc0, 0x03, 0x5f, 0xd6,     /* ret */
+#elif defined(__alpha__)
+    0x01, 0x80, 0xFA, 0x6B,     /* ret */
+#elif defined(__arm__)
+    0x1e, 0xff, 0x2f, 0xe1,     /* bx lr */
+#elif defined(__riscv)
+    0x67, 0x80, 0x00, 0x00,     /* ret */
+#elif defined(__s390__)
+    0x07, 0xfe,                 /* br %r14 */
+#elif defined(__i386__) || defined(__x86_64__)
+    0xc3,                       /* ret */
+#endif
+};
+
+#endif
diff --git a/tests/tcg/multiarch/munmap-pthread.c b/tests/tcg/multiarch/munmap-pthread.c
index d7143b00d5..1c79005846 100644
--- a/tests/tcg/multiarch/munmap-pthread.c
+++ b/tests/tcg/multiarch/munmap-pthread.c
@@ -7,21 +7,7 @@ 
 #include <sys/mman.h>
 #include <unistd.h>
 
-static const char nop_func[] = {
-#if defined(__aarch64__)
-    0xc0, 0x03, 0x5f, 0xd6,     /* ret */
-#elif defined(__alpha__)
-    0x01, 0x80, 0xFA, 0x6B,     /* ret */
-#elif defined(__arm__)
-    0x1e, 0xff, 0x2f, 0xe1,     /* bx lr */
-#elif defined(__riscv)
-    0x67, 0x80, 0x00, 0x00,     /* ret */
-#elif defined(__s390__)
-    0x07, 0xfe,                 /* br %r14 */
-#elif defined(__i386__) || defined(__x86_64__)
-    0xc3,                       /* ret */
-#endif
-};
+#include "nop_func.h"
 
 static void *thread_mmap_munmap(void *arg)
 {
diff --git a/tests/tcg/multiarch/vma-pthread.c b/tests/tcg/multiarch/vma-pthread.c
new file mode 100644
index 0000000000..35e63cd6cd
--- /dev/null
+++ b/tests/tcg/multiarch/vma-pthread.c
@@ -0,0 +1,207 @@ 
+/*
+ * Test that VMA updates do not race.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Map a contiguous chunk of RWX memory. Split it into 8 equally sized
+ * regions, each of which is guaranteed to have a certain combination of
+ * protection bits set.
+ *
+ * Reader, writer and executor threads perform the respective operations on
+ * pages, which are guaranteed to have the respective protection bit set.
+ * Two mutator threads change the non-fixed protection bits randomly.
+ */
+#include <assert.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "nop_func.h"
+
+#define PAGE_IDX_BITS 10
+#define PAGE_COUNT (1 << PAGE_IDX_BITS)
+#define PAGE_IDX_MASK (PAGE_COUNT - 1)
+#define REGION_IDX_BITS 3
+#define PAGE_IDX_R_MASK (1 << 7)
+#define PAGE_IDX_W_MASK (1 << 8)
+#define PAGE_IDX_X_MASK (1 << 9)
+#define REGION_MASK (PAGE_IDX_R_MASK | PAGE_IDX_W_MASK | PAGE_IDX_X_MASK)
+#define PAGES_PER_REGION (1 << (PAGE_IDX_BITS - REGION_IDX_BITS))
+
+struct context {
+    int pagesize;
+    char *ptr;
+    int dev_null_fd;
+    volatile int mutator_count;
+};
+
+static void *thread_read(void *arg)
+{
+    struct context *ctx = arg;
+    ssize_t sret;
+    size_t i, j;
+    int ret;
+
+    for (i = 0; ctx->mutator_count; i++) {
+        char *p;
+
+        j = (i & PAGE_IDX_MASK) | PAGE_IDX_R_MASK;
+        p = &ctx->ptr[j * ctx->pagesize];
+
+        /* Read directly. */
+        ret = memcmp(p, nop_func, sizeof(nop_func));
+        if (ret != 0) {
+            fprintf(stderr, "fail direct read %p\n", p);
+            abort();
+        }
+
+        /* Read indirectly. */
+        sret = write(ctx->dev_null_fd, p, 1);
+        if (sret != 1) {
+            if (sret < 0) {
+                fprintf(stderr, "fail indirect read %p (%m)\n", p);
+            } else {
+                fprintf(stderr, "fail indirect read %p (%zd)\n", p, sret);
+            }
+            abort();
+        }
+    }
+
+    return NULL;
+}
+
+static void *thread_write(void *arg)
+{
+    struct context *ctx = arg;
+    struct timespec *ts;
+    size_t i, j;
+    int ret;
+
+    for (i = 0; ctx->mutator_count; i++) {
+        j = (i & PAGE_IDX_MASK) | PAGE_IDX_W_MASK;
+
+        /* Write directly. */
+        memcpy(&ctx->ptr[j * ctx->pagesize], nop_func, sizeof(nop_func));
+
+        /* Write using a syscall. */
+        ts = (struct timespec *)(&ctx->ptr[(j + 1) * ctx->pagesize] -
+                                 sizeof(struct timespec));
+        ret = clock_gettime(CLOCK_REALTIME, ts);
+        if (ret != 0) {
+            fprintf(stderr, "fail indirect write %p (%m)\n", ts);
+            abort();
+        }
+    }
+
+    return NULL;
+}
+
+static void *thread_execute(void *arg)
+{
+    struct context *ctx = arg;
+    size_t i, j;
+
+    for (i = 0; ctx->mutator_count; i++) {
+        j = (i & PAGE_IDX_MASK) | PAGE_IDX_X_MASK;
+        ((void(*)(void))&ctx->ptr[j * ctx->pagesize])();
+    }
+
+    return NULL;
+}
+
+static void *thread_mutate(void *arg)
+{
+    size_t i, start_idx, end_idx, page_idx, tmp;
+    struct context *ctx = arg;
+    unsigned int seed;
+    int prot, ret;
+
+    seed = (unsigned int)time(NULL);
+    for (i = 0; i < 50000; i++) {
+        start_idx = rand_r(&seed) & PAGE_IDX_MASK;
+        end_idx = rand_r(&seed) & PAGE_IDX_MASK;
+        if (start_idx > end_idx) {
+            tmp = start_idx;
+            start_idx = end_idx;
+            end_idx = tmp;
+        }
+        prot = rand_r(&seed) & (PROT_READ | PROT_WRITE | PROT_EXEC);
+        for (page_idx = start_idx & REGION_MASK; page_idx <= end_idx;
+             page_idx += PAGES_PER_REGION) {
+            if (page_idx & PAGE_IDX_R_MASK) {
+                prot |= PROT_READ;
+            }
+            if (page_idx & PAGE_IDX_W_MASK) {
+                /* FIXME: qemu syscalls check for both read+write. */
+                prot |= PROT_WRITE | PROT_READ;
+            }
+            if (page_idx & PAGE_IDX_X_MASK) {
+                prot |= PROT_EXEC;
+            }
+        }
+        ret = mprotect(&ctx->ptr[start_idx * ctx->pagesize],
+                       (end_idx - start_idx + 1) * ctx->pagesize, prot);
+        assert(ret == 0);
+    }
+
+    __atomic_fetch_sub(&ctx->mutator_count, 1, __ATOMIC_SEQ_CST);
+
+    return NULL;
+}
+
+int main(void)
+{
+    pthread_t threads[5];
+    struct context ctx;
+    size_t i;
+    int ret;
+
+    /* Without a template, nothing to test. */
+    if (sizeof(nop_func) == 0) {
+        return EXIT_SUCCESS;
+    }
+
+    /* Initialize memory chunk. */
+    ctx.pagesize = getpagesize();
+    ctx.ptr = mmap(NULL, PAGE_COUNT * ctx.pagesize,
+                   PROT_READ | PROT_WRITE | PROT_EXEC,
+                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    assert(ctx.ptr != MAP_FAILED);
+    for (i = 0; i < PAGE_COUNT; i++) {
+        memcpy(&ctx.ptr[i * ctx.pagesize], nop_func, sizeof(nop_func));
+    }
+    ctx.dev_null_fd = open("/dev/null", O_WRONLY);
+    assert(ctx.dev_null_fd >= 0);
+    ctx.mutator_count = 2;
+
+    /* Start threads. */
+    ret = pthread_create(&threads[0], NULL, thread_read, &ctx);
+    assert(ret == 0);
+    ret = pthread_create(&threads[1], NULL, thread_write, &ctx);
+    assert(ret == 0);
+    ret = pthread_create(&threads[2], NULL, thread_execute, &ctx);
+    assert(ret == 0);
+    for (i = 3; i <= 4; i++) {
+        ret = pthread_create(&threads[i], NULL, thread_mutate, &ctx);
+        assert(ret == 0);
+    }
+
+    /* Wait for threads to stop. */
+    for (i = 0; i < sizeof(threads) / sizeof(threads[0]); i++) {
+        ret = pthread_join(threads[i], NULL);
+        assert(ret == 0);
+    }
+
+    /* Destroy memory chunk. */
+    ret = close(ctx.dev_null_fd);
+    assert(ret == 0);
+    ret = munmap(ctx.ptr, PAGE_COUNT * ctx.pagesize);
+    assert(ret == 0);
+
+    return EXIT_SUCCESS;
+}
diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
index 5f0fee1aad..e7213af492 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -39,6 +39,9 @@  signals: LDFLAGS+=-lrt -lpthread
 munmap-pthread: CFLAGS+=-pthread
 munmap-pthread: LDFLAGS+=-pthread
 
+vma-pthread: CFLAGS+=-pthread
+vma-pthread: LDFLAGS+=-pthread
+
 # We define the runner for test-mmap after the individual
 # architectures have defined their supported pages sizes. If no
 # additional page sizes are defined we only run the default test.