Message ID | 20231205204106.95531-9-alex.bennee@linaro.org |
---|---|
State | New |
Headers | show |
Series | record/replay fixes, maybe for 8.2 or for post release stable? | expand |
Hi Alex, On 5/12/23 21:41, Alex Bennée wrote: > Figuring out why replay has failed is tricky at the best of times. > Lets centralise the reporting of a replay sync error and add a little > bit of extra information to help with debugging. > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > --- > replay/replay-internal.h | 12 ++++++++++++ > replay/replay-char.c | 6 ++---- > replay/replay-internal.c | 1 + > replay/replay.c | 9 +++++++++ > 4 files changed, 24 insertions(+), 4 deletions(-) > > diff --git a/replay/replay-internal.h b/replay/replay-internal.h > index 1bc8fd5086..709e2eb4cb 100644 > --- a/replay/replay-internal.h > +++ b/replay/replay-internal.h > @@ -74,6 +74,7 @@ enum ReplayEvents { > * @cached_clock: Cached clocks values > * @current_icount: number of processed instructions > * @instruction_count: number of instructions until next event > + * @current_event: current event index > * @data_kind: current event > * @has_unread_data: true if event not yet processed > * @file_offset: offset into replay log at replay snapshot > @@ -84,6 +85,7 @@ typedef struct ReplayState { > int64_t cached_clock[REPLAY_CLOCK_COUNT]; > uint64_t current_icount; > int instruction_count; > + unsigned int current_event; > unsigned int data_kind; > bool has_unread_data; > uint64_t file_offset; Shouldn't this field be migrated?
On 12/5/23 12:41, Alex Bennée wrote: > Figuring out why replay has failed is tricky at the best of times. > Lets centralise the reporting of a replay sync error and add a little > bit of extra information to help with debugging. > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > --- > replay/replay-internal.h | 12 ++++++++++++ > replay/replay-char.c | 6 ++---- > replay/replay-internal.c | 1 + > replay/replay.c | 9 +++++++++ > 4 files changed, 24 insertions(+), 4 deletions(-) Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
On 12/6/23 03:35, Philippe Mathieu-Daudé wrote: > Hi Alex, > > On 5/12/23 21:41, Alex Bennée wrote: >> Figuring out why replay has failed is tricky at the best of times. >> Lets centralise the reporting of a replay sync error and add a little >> bit of extra information to help with debugging. >> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >> --- >> replay/replay-internal.h | 12 ++++++++++++ >> replay/replay-char.c | 6 ++---- >> replay/replay-internal.c | 1 + >> replay/replay.c | 9 +++++++++ >> 4 files changed, 24 insertions(+), 4 deletions(-) >> >> diff --git a/replay/replay-internal.h b/replay/replay-internal.h >> index 1bc8fd5086..709e2eb4cb 100644 >> --- a/replay/replay-internal.h >> +++ b/replay/replay-internal.h >> @@ -74,6 +74,7 @@ enum ReplayEvents { >> * @cached_clock: Cached clocks values >> * @current_icount: number of processed instructions >> * @instruction_count: number of instructions until next event >> + * @current_event: current event index >> * @data_kind: current event >> * @has_unread_data: true if event not yet processed >> * @file_offset: offset into replay log at replay snapshot >> @@ -84,6 +85,7 @@ typedef struct ReplayState { >> int64_t cached_clock[REPLAY_CLOCK_COUNT]; >> uint64_t current_icount; >> int instruction_count; >> + unsigned int current_event; >> unsigned int data_kind; >> bool has_unread_data; >> uint64_t file_offset; > Shouldn't this field be migrated? No, it's for diagnostic use only. r~
On 06.12.2023 19:48, Richard Henderson wrote: > On 12/6/23 03:35, Philippe Mathieu-Daudé wrote: >> Hi Alex, >> >> On 5/12/23 21:41, Alex Bennée wrote: >>> Figuring out why replay has failed is tricky at the best of times. >>> Lets centralise the reporting of a replay sync error and add a little >>> bit of extra information to help with debugging. >>> >>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >>> --- >>> replay/replay-internal.h | 12 ++++++++++++ >>> replay/replay-char.c | 6 ++---- >>> replay/replay-internal.c | 1 + >>> replay/replay.c | 9 +++++++++ >>> 4 files changed, 24 insertions(+), 4 deletions(-) >>> >>> diff --git a/replay/replay-internal.h b/replay/replay-internal.h >>> index 1bc8fd5086..709e2eb4cb 100644 >>> --- a/replay/replay-internal.h >>> +++ b/replay/replay-internal.h >>> @@ -74,6 +74,7 @@ enum ReplayEvents { >>> * @cached_clock: Cached clocks values >>> * @current_icount: number of processed instructions >>> * @instruction_count: number of instructions until next event >>> + * @current_event: current event index >>> * @data_kind: current event >>> * @has_unread_data: true if event not yet processed >>> * @file_offset: offset into replay log at replay snapshot >>> @@ -84,6 +85,7 @@ typedef struct ReplayState { >>> int64_t cached_clock[REPLAY_CLOCK_COUNT]; >>> uint64_t current_icount; >>> int instruction_count; >>> + unsigned int current_event; >>> unsigned int data_kind; >>> bool has_unread_data; >>> uint64_t file_offset; >> Shouldn't this field be migrated? > > No, it's for diagnostic use only. It should be migrated, because RR may be started from the snapshot, which references the middle of replayed scenario. Pavel Dovgalyuk
diff --git a/replay/replay-internal.h b/replay/replay-internal.h index 1bc8fd5086..709e2eb4cb 100644 --- a/replay/replay-internal.h +++ b/replay/replay-internal.h @@ -74,6 +74,7 @@ enum ReplayEvents { * @cached_clock: Cached clocks values * @current_icount: number of processed instructions * @instruction_count: number of instructions until next event + * @current_event: current event index * @data_kind: current event * @has_unread_data: true if event not yet processed * @file_offset: offset into replay log at replay snapshot @@ -84,6 +85,7 @@ typedef struct ReplayState { int64_t cached_clock[REPLAY_CLOCK_COUNT]; uint64_t current_icount; int instruction_count; + unsigned int current_event; unsigned int data_kind; bool has_unread_data; uint64_t file_offset; @@ -188,6 +190,16 @@ void replay_event_net_save(void *opaque); /*! Reads network from the file. */ void *replay_event_net_load(void); +/* Diagnostics */ + +/** + * replay_sync_error(): report sync error and exit + * + * When we reach an error condition we want to report it centrally so + * we can also dump some useful information into the logs. + */ +G_NORETURN void replay_sync_error(const char *error); + /* VMState-related functions */ /* Registers replay VMState. diff --git a/replay/replay-char.c b/replay/replay-char.c index a31aded032..72b1f832dd 100644 --- a/replay/replay-char.c +++ b/replay/replay-char.c @@ -113,8 +113,7 @@ void replay_char_write_event_load(int *res, int *offset) *offset = replay_get_dword(); replay_finish_event(); } else { - error_report("Missing character write event in the replay log"); - exit(1); + replay_sync_error("Missing character write event in the replay log"); } } @@ -135,8 +134,7 @@ int replay_char_read_all_load(uint8_t *buf) replay_finish_event(); return res; } else { - error_report("Missing character read all event in the replay log"); - exit(1); + replay_sync_error("Missing character read all event in the replay log"); } } diff --git a/replay/replay-internal.c b/replay/replay-internal.c index 634025096e..654b99cfb5 100644 --- a/replay/replay-internal.c +++ b/replay/replay-internal.c @@ -175,6 +175,7 @@ void replay_fetch_data_kind(void) if (replay_file) { if (!replay_state.has_unread_data) { replay_state.data_kind = replay_get_byte(); + replay_state.current_event++; if (replay_state.data_kind == EVENT_INSTRUCTION) { replay_state.instruction_count = replay_get_dword(); } diff --git a/replay/replay.c b/replay/replay.c index d729214197..e83c01285c 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -226,6 +226,14 @@ bool replay_has_event(void) return res; } +G_NORETURN void replay_sync_error(const char *error) +{ + error_report("%s (insn total %"PRId64"/%d left, event %u/%u)", error, + replay_state.current_icount, replay_state.instruction_count, + replay_state.current_event, replay_state.data_kind); + abort(); +} + static void replay_enable(const char *fname, int mode) { const char *fmode = NULL; @@ -258,6 +266,7 @@ static void replay_enable(const char *fname, int mode) replay_state.data_kind = -1; replay_state.instruction_count = 0; replay_state.current_icount = 0; + replay_state.current_event = 0; replay_state.has_unread_data = false; /* skip file header for RECORD and check it for PLAY */
Figuring out why replay has failed is tricky at the best of times. Lets centralise the reporting of a replay sync error and add a little bit of extra information to help with debugging. Signed-off-by: Alex Bennée <alex.bennee@linaro.org> --- replay/replay-internal.h | 12 ++++++++++++ replay/replay-char.c | 6 ++---- replay/replay-internal.c | 1 + replay/replay.c | 9 +++++++++ 4 files changed, 24 insertions(+), 4 deletions(-)