From 497ed0672f7fe08d9654a0e5c11b682bea43a59e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 5 Oct 2011 08:29:39 +0200
Subject: [PATCH 0/3] *** SUBJECT HERE ***
Paolo Bonzini (3):
qemu-threads: add TLS wrappers
windows
tls
configure | 20 +++++++++++++++++
coroutine-win32.c | 7 ++++-
cpu-all.h | 4 ++-
cpus.c | 13 +++++++---
exec.c | 2 +-
qemu-thread-posix.c | 42 ++++++++++++++++++++++++++++++++---
qemu-thread-win32.c | 16 +++++++++++++
qemu-tls-gcc.h | 25 +++++++++++++++++++++
qemu-tls-pthread.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
qemu-tls-win32.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 234 insertions(+), 12 deletions(-)
create mode 100644 qemu-tls-gcc.h
create mode 100644 qemu-tls-pthread.h
create mode 100644 qemu-tls-win32.h
--
1.7.6
From d8c3c4e789f9b86a66042a9181333e1a096b6b93 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 16 Aug 2011 10:37:44 -0700
Subject: [PATCH 1/3] qemu-threads: add TLS wrappers
Win32 emulated TLS is slow and is not available on all versions of GCC;
some versions of Unix only have pthread_getspecific as a means to access
TLS.
Actually, Win32 does have support for decent TLS, and GCC does not map
__thread to it. But kind of unlike ELF TLS, it's perfectly possible
to declare TLS variables with simple C code! For pthread_getspecific
we similarly allocate a memory block; we have to compute all the offsets
at load time, which is also cheaper than doing a pthread_key_create for
each variable. Not optimal, but it works.
This patch adds wrappers to qemu-thread that will use __thread or
pthread_getspecific on POSIX systems, and the .tls segment on Windows.
It does kinda uglify the declarations, but not too much.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
configure | 20 +++++++++++++++++
coroutine-win32.c | 7 ++++-
qemu-thread-posix.c | 42 ++++++++++++++++++++++++++++++++---
qemu-thread-win32.c | 16 +++++++++++++
qemu-tls-gcc.h | 25 +++++++++++++++++++++
qemu-tls-pthread.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
qemu-tls-win32.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 221 insertions(+), 6 deletions(-)
create mode 100644 qemu-tls-gcc.h
create mode 100644 qemu-tls-pthread.h
create mode 100644 qemu-tls-win32.h
@@ -1215,6 +1215,23 @@ EOF
fi
##########################################
+# __thread check
+
+if test "$mingw32" = "yes" ; then
+ tls_model=win32
+else
+ cat > $TMPC << EOF
+__thread int x;
+int main() { return x; }
+EOF
+ if compile_prog "" "" ; then
+ tls_model=gcc
+ else
+ tls_model=pthread
+ fi
+fi
+
+##########################################
# zlib check
if test "$zlib" != "no" ; then
@@ -2697,6 +2714,7 @@ echo "Documentation $docs"
[ ! -z "$uname_release" ] && \
echo "uname -r $uname_release"
echo "NPTL support $nptl"
+echo "TLS support $tls_model"
echo "GUEST_BASE $guest_base"
echo "PIE user targets $user_pie"
echo "vde support $vde"
@@ -3580,6 +3598,8 @@ if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
esac
fi
+symlink $source_path/qemu-tls-$tls_model.h qemu-tls.h
+
# use included Linux headers
if test "$linux" = "yes" ; then
includes="-I\$(SRC_PATH)/linux-headers $includes"
@@ -24,6 +24,7 @@
#include "qemu-common.h"
#include "qemu-coroutine-int.h"
+#include "qemu-tls.h"
typedef struct
{
@@ -33,8 +34,10 @@ typedef struct
CoroutineAction action;
} CoroutineWin32;
-static __thread CoroutineWin32 leader;
-static __thread Coroutine *current;
+static DEFINE_TLS(CoroutineWin32, tls_leader);
+static DEFINE_TLS(Coroutine *, tls_current);
+#define leader get_tls(tls_leader)
+#define current get_tls(tls_current)
CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
CoroutineAction action)
@@ -18,6 +18,9 @@
#include <stdint.h>
#include <string.h>
#include "qemu-thread.h"
+#include "qemu-common.h"
+#include "qemu-tls.h"
+#include "qemu-barrier.h"
static void error_exit(int err, const char *msg)
{
@@ -115,18 +118,44 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
error_exit(err, __func__);
}
+size_t tls_size;
+pthread_key_t tls_key;
+
+static void __attribute__((constructor(102))) tls_init_thread(void)
+{
+ /* It's easier to always create the key, even if using GCC tls. */
+ pthread_key_create(&tls_key, g_free);
+ _tls_init_thread();
+}
+
+typedef struct QemuThreadData {
+ void *(*start_routine)(void *);
+ void *arg;
+} QemuThreadData;
+
+static void *start_routine_wrapper(void *arg)
+{
+ QemuThreadData args = *(QemuThreadData *) arg;
+ g_free(arg);
+ _tls_init_thread();
+ return args.start_routine(args.arg);
+}
+
void qemu_thread_create(QemuThread *thread,
- void *(*start_routine)(void*),
+ void *(*start_routine)(void *),
void *arg)
{
+ sigset_t set, oldset;
+ QemuThreadData *args = g_malloc(sizeof(QemuThreadData));
int err;
- /* Leave signal handling to the iothread. */
- sigset_t set, oldset;
+ args->start_routine = start_routine;
+ args->arg = arg;
+ /* Leave signal handling to the iothread. */
sigfillset(&set);
pthread_sigmask(SIG_SETMASK, &set, &oldset);
- err = pthread_create(&thread->thread, NULL, start_routine, arg);
+ err = pthread_create(&thread->thread, NULL, start_routine_wrapper, args);
if (err)
error_exit(err, __func__);
@@ -16,6 +16,22 @@
#include <assert.h>
#include <limits.h>
+/* TLS support. */
+
+int __attribute__((section(".tls$000"))) _tls_start = 0;
+int __attribute__((section(".tls$ZZZ"))) _tls_end = 0;
+int _tls_index = 0;
+
+const IMAGE_TLS_DIRECTORY _tls_used __attribute__((used, section(".rdata$T"))) = {
+ (ULONG)(ULONG_PTR) &_tls_start, /* start of tls data */
+ (ULONG)(ULONG_PTR) &_tls_end, /* end of tls data */
+ (ULONG)(ULONG_PTR) &_tls_index, /* address of tls_index */
+ (ULONG) 0, /* pointer to callbacks */
+ (ULONG) 0, /* size of tls zero fill */
+ (ULONG) 0 /* characteristics */
+};
+
+
static void error_exit(int err, const char *msg)
{
char *pstr;
new file mode 100644
@@ -0,0 +1,24 @@
+/*
+ * TLS with __thread
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_GCC_H
+#define QEMU_TLS_GCC_H
+
+#define DECLARE_TLS(type, x) extern __thread type tls__##x
+#define DEFINE_TLS(type, x) __thread type tls__##x
+#define get_tls(x) tls__##x
+
+static inline size_t tls_init(size_t size, size_t alignment) { return 0; }
+static inline void _tls_init_thread(void) {}
+
+#endif
new file mode 100644
@@ -0,0 +1,57 @@
+/*
+ * TLS with pthread_getspecific
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_PTHREAD_H
+#define QEMU_TLS_PTHREAD_H
+
+#include <pthread.h>
+#include <glib.h>
+
+#define DECLARE_TLS(type, x) \
+ extern size_t tls_offset__##x; \
+ extern type tls_dummy__##x
+
+#define DEFINE_TLS(type, x) \
+ size_t tls_offset__##x; \
+ static void __attribute__((constructor(101))) tls_init__##x(void) \
+ { \
+ tls_offset__##x = tls_init(sizeof(type), __alignof__(type)); \
+ } \
+ extern type tls_dummy__##x
+
+extern size_t tls_size;
+extern pthread_key_t tls_key;
+
+static inline size_t tls_init(size_t size, size_t alignment)
+{
+ size_t tls_offset = (tls_size + alignment - 1) & -alignment;
+ tls_size = tls_offset + size;
+ return tls_offset;
+}
+
+static inline void _tls_init_thread(void)
+{
+ void *mem = tls_size == 0 ? NULL : g_malloc0(tls_size);
+ pthread_setspecific(tls_key, mem);
+}
+
+static inline __attribute__((__const__)) void *_get_tls(size_t offset)
+{
+ char *base = pthread_getspecific(tls_key);
+ return &base[offset];
+}
+
+#define get_tls(x) \
+ (*(__typeof__(&tls_dummy__##x)) _get_tls(tls_offset__##x))
+
+#endif
new file mode 100644
@@ -0,0 +1,59 @@
+/*
+ * TLS with Win32 .tls sections
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_WIN32_H
+#define QEMU_TLS_WIN32_H
+
+#include <windows.h>
+#include <winnt.h>
+
+typedef struct _TEB {
+ NT_TIB NtTib;
+ void *EnvironmentPointer;
+ void *x[3];
+ char **ThreadLocalStoragePointer;
+} TEB, *PTEB;
+
+/* 1) The initial contents TLS variables is placed in the .tls section. */
+
+#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
+#define DEFINE_TLS(type, x) type tls__##x __attribute__((section(".tls$AAA")))
+
+/* 2) _tls_index holds the number of our module. The executable should be
+ zero, DLLs are numbered 1 and up. The loader fills it in for us. */
+
+extern int _tls_index;
+extern int _tls_start;
+static inline void _tls_init_thread(void) {}
+
+/* 3) Thus, Teb->ThreadLocalStoragePointer[_tls_index] is the base of
+ the TLS segment for this (thread, module) pair. Each segment has
+ the same layout as this module's .tls segment and is initialized
+ with the content of the .tls segment; 0 is the _tls_start variable.
+ So, get_tls passes us the offset of the passed variable relative to
+ _tls_start, and we return that same offset plus the base of segment. */
+
+static inline __attribute__((__const__)) void *_get_tls(size_t offset)
+{
+ PTEB Teb = NtCurrentTeb();
+ return (char *)(Teb->ThreadLocalStoragePointer[_tls_index]) + offset;
+}
+
+/* 4) get_tls, in addition to computing the offset, returns an lvalue.
+ "I got it. Magic." */
+
+#define get_tls(x) \
+ (*(__typeof__(tls__##x) *) \
+ _get_tls((ULONG_PTR)&(tls__##x) - (ULONG_PTR)&_tls_start))
+
+#endif
--
1.7.6
From b10531473a833cf5e925f00461134b0bcd2295bb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Aug 2011 17:03:55 +0200
Subject: [PATCH 2/3] Prepare Windows port for thread-local cpu_single_env
Windows does not execute cpu_signal in VCPU-thread context,
so it won't be able to use cpu_single_env there. However,
it has the CPUState available, so nothing is lost.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
cpus.c | 13 +++++++++----
1 files changed, 9 insertions(+), 4 deletions(-)
@@ -176,10 +176,10 @@ static void cpu_handle_guest_debug(CPUState *env)
env->stopped = 1;
}
-static void cpu_signal(int sig)
+static inline void do_cpu_kick(CPUState *env)
{
- if (cpu_single_env) {
- cpu_exit(cpu_single_env);
+ if (env) {
+ cpu_exit(env);
}
exit_request = 1;
}
@@ -437,6 +437,11 @@ static void qemu_kvm_init_cpu_signals(CPUState *env)
}
}
+static void cpu_signal(int sig)
+{
+ do_cpu_kick(cpu_single_env);
+}
+
static void qemu_tcg_init_cpu_signals(void)
{
sigset_t set;
@@ -708,7 +713,7 @@ static void qemu_cpu_kick_thread(CPUState *env)
#else /* _WIN32 */
if (!qemu_cpu_is_self(env)) {
SuspendThread(env->thread->thread);
- cpu_signal(0);
+ do_cpu_kick(env);
ResumeThread(env->thread->thread);
}
#endif
--
1.7.6
From 497ed0672f7fe08d9654a0e5c11b682bea43a59e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 29 Aug 2011 17:04:01 +0200
Subject: [PATCH 3/3] Make cpu_single_env thread-local
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
cpu-all.h | 4 +++-
exec.c | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
@@ -20,6 +20,7 @@
#define CPU_ALL_H
#include "qemu-common.h"
+#include "qemu-tls.h"
#include "cpu-common.h"
/* some important defines:
@@ -334,7 +335,8 @@ void cpu_dump_statistics(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
void QEMU_NORETURN cpu_abort(CPUState *env, const char *fmt, ...)
GCC_FMT_ATTR(2, 3);
extern CPUState *first_cpu;
-extern CPUState *cpu_single_env;
+DECLARE_TLS(CPUState *,tls_cpu_single_env);
+#define cpu_single_env get_tls(tls_cpu_single_env)
/* Flags for use in ENV->INTERRUPT_PENDING.
@@ -120,7 +120,7 @@ static MemoryRegion *system_io;
CPUState *first_cpu;
/* current CPU in the current thread. It is only valid inside
cpu_exec() */
-CPUState *cpu_single_env;
+DEFINE_TLS(CPUState *,cpu_single_env);
/* 0 = Do not count executed instructions.
1 = Precise instruction counting.
2 = Adaptive rate instruction counting. */
--
1.7.6