@@ -1317,6 +1317,8 @@ static void prepare_icount_for_run(CPUState *cpu)
insns_left = MIN(0xffff, cpu->icount_budget);
cpu->icount_decr.u16.low = insns_left;
cpu->icount_extra = cpu->icount_budget - insns_left;
+
+ replay_mutex_lock();
}
}
@@ -1332,6 +1334,8 @@ static void process_icount_data(CPUState *cpu)
cpu->icount_budget = 0;
replay_account_executed_instructions();
+
+ replay_mutex_unlock();
}
}
@@ -1346,11 +1350,9 @@ static int tcg_cpu_exec(CPUState *cpu)
#ifdef CONFIG_PROFILER
ti = profile_getclock();
#endif
- qemu_mutex_unlock_iothread();
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
- qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
@@ -1417,6 +1419,9 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
cpu->exit_request = 1;
while (1) {
+ qemu_mutex_unlock_iothread();
+ replay_mutex_lock();
+ qemu_mutex_lock_iothread();
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_account_warp_timer();
@@ -1425,6 +1430,8 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
*/
handle_icount_deadline();
+ replay_mutex_unlock();
+
if (!cpu) {
cpu = first_cpu;
}
@@ -1440,11 +1447,13 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
if (cpu_can_run(cpu)) {
int r;
+ qemu_mutex_unlock_iothread();
prepare_icount_for_run(cpu);
r = tcg_cpu_exec(cpu);
process_icount_data(cpu);
+ qemu_mutex_lock_iothread();
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
@@ -1634,7 +1643,9 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
while (1) {
if (cpu_can_run(cpu)) {
int r;
+ qemu_mutex_unlock_iothread();
r = tcg_cpu_exec(cpu);
+ qemu_mutex_lock_iothread();
switch (r) {
case EXCP_DEBUG:
cpu_handle_guest_debug(cpu);
@@ -1781,12 +1792,21 @@ void pause_all_vcpus(void)
}
}
+ /* We need to drop the replay_lock so any vCPU threads woken up
+ * can finish their replay tasks
+ */
+ replay_mutex_unlock();
+
while (!all_vcpus_paused()) {
qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
CPU_FOREACH(cpu) {
qemu_cpu_kick(cpu);
}
}
+
+ qemu_mutex_unlock_iothread();
+ replay_mutex_lock();
+ qemu_mutex_lock_iothread();
}
void cpu_resume(CPUState *cpu)
@@ -49,6 +49,28 @@ Modifications of qemu include:
* recording/replaying user input (mouse and keyboard)
* adding internal checkpoints for cpu and io synchronization
+Locking and thread synchronisation
+----------------------------------
+
+Previously the synchronisation of the main thread and the vCPU thread
+was ensured by the holding of the BQL. However the trend has been to
+reduce the time the BQL was held across the system including under TCG
+system emulation. As it is important that batches of events are kept
+in sequence (e.g. expiring timers and checkpoints in the main thread
+while instruction checkpoints are written by the vCPU thread) we need
+another lock to keep things in lock-step. This role is now handled by
+the replay_mutex_lock. It used to be held only for each event being
+written but now it is held for a whole execution period. This results
+in a deterministic ping-pong between the two main threads.
+
+As the BQL is now a finer grained lock than the replay_lock it is almost
+certainly a bug, and a source of deadlocks, to take the
+replay_mutex_lock while the BQL is held. This is enforced by an assert.
+While the unlocks are usually in the reverse order, this is not
+necessary; you can drop the replay_lock while holding the BQL, without
+doing a more complicated unlock_iothread/replay_unlock/lock_iothread
+sequence.
+
Non-deterministic events
------------------------
@@ -19,20 +19,17 @@
void replay_audio_out(int *played)
{
if (replay_mode == REPLAY_MODE_RECORD) {
+ g_assert(replay_mutex_locked());
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_AUDIO_OUT);
replay_put_dword(*played);
- replay_mutex_unlock();
} else if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
replay_account_executed_instructions();
- replay_mutex_lock();
if (replay_next_event_is(EVENT_AUDIO_OUT)) {
*played = replay_get_dword();
replay_finish_event();
- replay_mutex_unlock();
} else {
- replay_mutex_unlock();
error_report("Missing audio out event in the replay log");
abort();
}
@@ -44,8 +41,8 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
int pos;
uint64_t left, right;
if (replay_mode == REPLAY_MODE_RECORD) {
+ g_assert(replay_mutex_locked());
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_AUDIO_IN);
replay_put_dword(*recorded);
replay_put_dword(*wpos);
@@ -55,10 +52,9 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
replay_put_qword(left);
replay_put_qword(right);
}
- replay_mutex_unlock();
} else if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
replay_account_executed_instructions();
- replay_mutex_lock();
if (replay_next_event_is(EVENT_AUDIO_IN)) {
*recorded = replay_get_dword();
*wpos = replay_get_dword();
@@ -69,9 +65,7 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
audio_sample_from_uint64(samples, pos, left, right);
}
replay_finish_event();
- replay_mutex_unlock();
} else {
- replay_mutex_unlock();
error_report("Missing audio in event in the replay log");
abort();
}
@@ -96,25 +96,24 @@ void *replay_event_char_read_load(void)
void replay_char_write_event_save(int res, int offset)
{
+ g_assert(replay_mutex_locked());
+
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_CHAR_WRITE);
replay_put_dword(res);
replay_put_dword(offset);
- replay_mutex_unlock();
}
void replay_char_write_event_load(int *res, int *offset)
{
+ g_assert(replay_mutex_locked());
+
replay_account_executed_instructions();
- replay_mutex_lock();
if (replay_next_event_is(EVENT_CHAR_WRITE)) {
*res = replay_get_dword();
*offset = replay_get_dword();
replay_finish_event();
- replay_mutex_unlock();
} else {
- replay_mutex_unlock();
error_report("Missing character write event in the replay log");
exit(1);
}
@@ -122,23 +121,21 @@ void replay_char_write_event_load(int *res, int *offset)
int replay_char_read_all_load(uint8_t *buf)
{
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
+
if (replay_next_event_is(EVENT_CHAR_READ_ALL)) {
size_t size;
int res;
replay_get_array(buf, &size);
replay_finish_event();
- replay_mutex_unlock();
res = (int)size;
assert(res >= 0);
return res;
} else if (replay_next_event_is(EVENT_CHAR_READ_ALL_ERROR)) {
int res = replay_get_dword();
replay_finish_event();
- replay_mutex_unlock();
return res;
} else {
- replay_mutex_unlock();
error_report("Missing character read all event in the replay log");
exit(1);
}
@@ -146,19 +143,17 @@ int replay_char_read_all_load(uint8_t *buf)
void replay_char_read_all_save_error(int res)
{
+ g_assert(replay_mutex_locked());
assert(res < 0);
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_CHAR_READ_ALL_ERROR);
replay_put_dword(res);
- replay_mutex_unlock();
}
void replay_char_read_all_save_buf(uint8_t *buf, int offset)
{
+ g_assert(replay_mutex_locked());
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_CHAR_READ_ALL);
replay_put_array(buf, offset);
- replay_mutex_unlock();
}
@@ -79,16 +79,14 @@ bool replay_has_events(void)
void replay_flush_events(void)
{
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
+
while (!QTAILQ_EMPTY(&events_list)) {
Event *event = QTAILQ_FIRST(&events_list);
- replay_mutex_unlock();
replay_run_event(event);
- replay_mutex_lock();
QTAILQ_REMOVE(&events_list, event, events);
g_free(event);
}
- replay_mutex_unlock();
}
void replay_disable_events(void)
@@ -102,14 +100,14 @@ void replay_disable_events(void)
void replay_clear_events(void)
{
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
+
while (!QTAILQ_EMPTY(&events_list)) {
Event *event = QTAILQ_FIRST(&events_list);
QTAILQ_REMOVE(&events_list, event, events);
g_free(event);
}
- replay_mutex_unlock();
}
/*! Adds specified async event to the queue */
@@ -136,9 +134,8 @@ void replay_add_event(ReplayAsyncEventKind event_kind,
event->opaque2 = opaque2;
event->id = id;
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
QTAILQ_INSERT_TAIL(&events_list, event, events);
- replay_mutex_unlock();
}
void replay_bh_schedule_event(QEMUBH *bh)
@@ -207,13 +204,11 @@ static void replay_save_event(Event *event, int checkpoint)
/* Called with replay mutex locked */
void replay_save_events(int checkpoint)
{
+ g_assert(replay_mutex_locked());
while (!QTAILQ_EMPTY(&events_list)) {
Event *event = QTAILQ_FIRST(&events_list);
replay_save_event(event, checkpoint);
-
- replay_mutex_unlock();
replay_run_event(event);
- replay_mutex_lock();
QTAILQ_REMOVE(&events_list, event, events);
g_free(event);
}
@@ -292,6 +287,7 @@ static Event *replay_read_event(int checkpoint)
/* Called with replay mutex locked */
void replay_read_events(int checkpoint)
{
+ g_assert(replay_mutex_locked());
while (replay_state.data_kind == EVENT_ASYNC) {
Event *event = replay_read_event(checkpoint);
if (!event) {
@@ -299,9 +295,7 @@ void replay_read_events(int checkpoint)
}
replay_finish_event();
read_event_kind = -1;
- replay_mutex_unlock();
replay_run_event(event);
- replay_mutex_lock();
g_free(event);
}
@@ -174,6 +174,9 @@ static __thread bool replay_locked;
void replay_mutex_init(void)
{
qemu_mutex_init(&lock);
+ /* Hold the mutex while we start-up */
+ qemu_mutex_lock(&lock);
+ replay_locked = true;
}
bool replay_mutex_locked(void)
@@ -181,25 +184,31 @@ bool replay_mutex_locked(void)
return replay_locked;
}
+/* Ordering constraints, replay_lock must be taken before BQL */
void replay_mutex_lock(void)
{
- g_assert(!replay_mutex_locked());
- qemu_mutex_lock(&lock);
- replay_locked = true;
+ if (replay_mode != REPLAY_MODE_NONE) {
+ g_assert(!qemu_mutex_iothread_locked());
+ g_assert(!replay_mutex_locked());
+ qemu_mutex_lock(&lock);
+ replay_locked = true;
+ }
}
void replay_mutex_unlock(void)
{
- g_assert(replay_mutex_locked());
- replay_locked = false;
- qemu_mutex_unlock(&lock);
+ if (replay_mode != REPLAY_MODE_NONE) {
+ g_assert(replay_mutex_locked());
+ replay_locked = false;
+ qemu_mutex_unlock(&lock);
+ }
}
/*! Saves cached instructions. */
void replay_save_instructions(void)
{
if (replay_file && replay_mode == REPLAY_MODE_RECORD) {
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
int diff = (int)(replay_get_current_step() - replay_state.current_step);
/* Time can only go forward */
@@ -210,6 +219,5 @@ void replay_save_instructions(void)
replay_put_dword(diff);
replay_state.current_step += diff;
}
- replay_mutex_unlock();
}
}
@@ -17,13 +17,13 @@
int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
{
- replay_save_instructions();
if (replay_file) {
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
+
+ replay_save_instructions();
replay_put_event(EVENT_CLOCK + kind);
replay_put_qword(clock);
- replay_mutex_unlock();
}
return clock;
@@ -46,16 +46,16 @@ void replay_read_next_clock(ReplayClockKind kind)
/*! Reads next clock event from the input. */
int64_t replay_read_clock(ReplayClockKind kind)
{
+ g_assert(replay_file && replay_mutex_locked());
+
replay_account_executed_instructions();
if (replay_file) {
int64_t ret;
- replay_mutex_lock();
if (replay_next_event_is(EVENT_CLOCK + kind)) {
replay_read_next_clock(kind);
}
ret = replay_state.cached_clock[kind];
- replay_mutex_unlock();
return ret;
}
@@ -81,7 +81,7 @@ int replay_get_instructions(void)
void replay_account_executed_instructions(void)
{
if (replay_mode == REPLAY_MODE_PLAY) {
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
if (replay_state.instructions_count > 0) {
int count = (int)(replay_get_current_step()
- replay_state.current_step);
@@ -100,24 +100,22 @@ void replay_account_executed_instructions(void)
qemu_notify_event();
}
}
- replay_mutex_unlock();
}
}
bool replay_exception(void)
{
+
if (replay_mode == REPLAY_MODE_RECORD) {
+ g_assert(replay_mutex_locked());
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_EXCEPTION);
- replay_mutex_unlock();
return true;
} else if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
bool res = replay_has_exception();
if (res) {
- replay_mutex_lock();
replay_finish_event();
- replay_mutex_unlock();
}
return res;
}
@@ -129,10 +127,9 @@ bool replay_has_exception(void)
{
bool res = false;
if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
replay_account_executed_instructions();
- replay_mutex_lock();
res = replay_next_event_is(EVENT_EXCEPTION);
- replay_mutex_unlock();
}
return res;
@@ -141,17 +138,15 @@ bool replay_has_exception(void)
bool replay_interrupt(void)
{
if (replay_mode == REPLAY_MODE_RECORD) {
+ g_assert(replay_mutex_locked());
replay_save_instructions();
- replay_mutex_lock();
replay_put_event(EVENT_INTERRUPT);
- replay_mutex_unlock();
return true;
} else if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
bool res = replay_has_interrupt();
if (res) {
- replay_mutex_lock();
replay_finish_event();
- replay_mutex_unlock();
}
return res;
}
@@ -163,10 +158,9 @@ bool replay_has_interrupt(void)
{
bool res = false;
if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
replay_account_executed_instructions();
- replay_mutex_lock();
res = replay_next_event_is(EVENT_INTERRUPT);
- replay_mutex_unlock();
}
return res;
}
@@ -174,9 +168,8 @@ bool replay_has_interrupt(void)
void replay_shutdown_request(ShutdownCause cause)
{
if (replay_mode == REPLAY_MODE_RECORD) {
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
replay_put_event(EVENT_SHUTDOWN + cause);
- replay_mutex_unlock();
}
}
@@ -190,9 +183,9 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
return true;
}
- replay_mutex_lock();
if (replay_mode == REPLAY_MODE_PLAY) {
+ g_assert(replay_mutex_locked());
if (replay_next_event_is(EVENT_CHECKPOINT + checkpoint)) {
replay_finish_event();
} else if (replay_state.data_kind != EVENT_ASYNC) {
@@ -205,12 +198,12 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
checkpoint were processed */
res = replay_state.data_kind != EVENT_ASYNC;
} else if (replay_mode == REPLAY_MODE_RECORD) {
+ g_assert(replay_mutex_locked());
replay_put_event(EVENT_CHECKPOINT + checkpoint);
replay_save_events(checkpoint);
res = true;
}
out:
- replay_mutex_unlock();
return res;
}
@@ -233,8 +226,6 @@ static void replay_enable(const char *fname, int mode)
atexit(replay_finish);
- replay_mutex_init();
-
replay_file = fopen(fname, fmode);
if (replay_file == NULL) {
fprintf(stderr, "Replay: open %s: %s\n", fname, strerror(errno));
@@ -242,8 +233,9 @@ static void replay_enable(const char *fname, int mode)
}
replay_filename = g_strdup(fname);
-
replay_mode = mode;
+ replay_mutex_init();
+
replay_state.data_kind = -1;
replay_state.instructions_count = 0;
replay_state.current_step = 0;
@@ -29,6 +29,7 @@
#include "qemu/sockets.h" // struct in_addr needed for libslirp.h
#include "sysemu/qtest.h"
#include "sysemu/cpus.h"
+#include "sysemu/replay.h"
#include "slirp/libslirp.h"
#include "qemu/main-loop.h"
#include "block/aio.h"
@@ -245,18 +246,19 @@ static int os_host_main_loop_wait(int64_t timeout)
timeout = SCALE_MS;
}
+
if (timeout) {
spin_counter = 0;
- qemu_mutex_unlock_iothread();
} else {
spin_counter++;
}
+ qemu_mutex_unlock_iothread();
+ replay_mutex_unlock();
ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
- if (timeout) {
- qemu_mutex_lock_iothread();
- }
+ replay_mutex_lock();
+ qemu_mutex_lock_iothread();
glib_pollfds_poll();
@@ -463,8 +465,13 @@ static int os_host_main_loop_wait(int64_t timeout)
poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
qemu_mutex_unlock_iothread();
+
+ replay_mutex_unlock();
+
g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
+ replay_mutex_lock();
+
qemu_mutex_lock_iothread();
if (g_poll_ret > 0) {
for (i = 0; i < w->num; i++) {
@@ -3058,6 +3058,7 @@ int main(int argc, char **argv, char **envp)
qemu_init_cpu_list();
qemu_init_cpu_loop();
+
qemu_mutex_lock_iothread();
atexit(qemu_run_exit_notifiers);