diff mbox series

[PATCH-for-8.2,v4,10/10] hw/char/pl011: Implement TX FIFO

Message ID 20231109192814.95977-11-philmd@linaro.org
State New
Headers show
Series hw/char/pl011: Implement TX (async) FIFO to avoid blocking the main loop | expand

Commit Message

Philippe Mathieu-Daudé Nov. 9, 2023, 7:28 p.m. UTC
If the UART back-end chardev doesn't drain data as fast as stdout
does or blocks, buffer in the TX FIFO to try again later.

This avoids having the IO-thread busy waiting on chardev back-ends,
reported recently when testing the Trusted Reference Stack and
using the socket backend:
https://linaro.atlassian.net/browse/TRS-149?focusedCommentId=149574

Implement registering a front-end 'watch' callback on back-end
events, so we can resume transmitting when the back-end is writable
again, not blocking the main loop.

Similarly to the RX FIFO path, FIFO level selection is not
implemented (interrupt is triggered when a single byte is available
in the FIFO).

We only migrate the TX FIFO if it is in use.

Reported-by: Mikko Rapeli <mikko.rapeli@linaro.org>
Suggested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
 hw/char/pl011.c      | 107 ++++++++++++++++++++++++++++++++++++++++---
 hw/char/trace-events |   4 ++
 2 files changed, 105 insertions(+), 6 deletions(-)

Comments

Richard Henderson Nov. 9, 2023, 11:34 p.m. UTC | #1
On 11/9/23 11:28, Philippe Mathieu-Daudé wrote:
> If the UART back-end chardev doesn't drain data as fast as stdout
> does or blocks, buffer in the TX FIFO to try again later.
> 
> This avoids having the IO-thread busy waiting on chardev back-ends,
> reported recently when testing the Trusted Reference Stack and
> using the socket backend:
> https://linaro.atlassian.net/browse/TRS-149?focusedCommentId=149574
> 
> Implement registering a front-end 'watch' callback on back-end
> events, so we can resume transmitting when the back-end is writable
> again, not blocking the main loop.
> 
> Similarly to the RX FIFO path, FIFO level selection is not
> implemented (interrupt is triggered when a single byte is available
> in the FIFO).
> 
> We only migrate the TX FIFO if it is in use.
> 
> Reported-by: Mikko Rapeli <mikko.rapeli@linaro.org>
> Suggested-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
>   hw/char/pl011.c      | 107 ++++++++++++++++++++++++++++++++++++++++---
>   hw/char/trace-events |   4 ++
>   2 files changed, 105 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/char/pl011.c b/hw/char/pl011.c
> index f474f56780..a14ece4f07 100644
> --- a/hw/char/pl011.c
> +++ b/hw/char/pl011.c
> @@ -57,6 +57,9 @@ DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
>   /* Data Register, UARTDR */
>   #define DR_BE   (1 << 10)
>   
> +/* Receive Status Register/Error Clear Register, UARTRSR/UARTECR */
> +#define RSR_OE  (1 << 3)
> +
>   /* Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC */
>   #define INT_OE (1 << 10)
>   #define INT_BE (1 << 9)
> @@ -156,6 +159,68 @@ static void pl011_reset_tx_fifo(PL011State *s)
>       fifo8_reset(&s->xmit_fifo);
>   }
>   
> +static gboolean pl011_drain_tx(PL011State *s)
> +{
> +    trace_pl011_fifo_tx_drain(fifo8_num_used(&s->xmit_fifo));
> +    pl011_reset_tx_fifo(s);
> +    s->rsr &= ~RSR_OE;
> +    return G_SOURCE_REMOVE;
> +}
> +
> +static gboolean pl011_xmit(void *do_not_use, GIOCondition cond, void *opaque)
> +{
> +    PL011State *s = opaque;
> +    int ret;
> +    const uint8_t *buf;
> +    uint32_t buflen;
> +    uint32_t count;
> +    bool tx_enabled;
> +
> +    tx_enabled = (s->cr & CR_UARTEN) && (s->cr & CR_TXE);
> +    if (!tx_enabled) {
> +        /*
> +         * If TX is disabled, nothing to do.
> +         * Keep the potentially used FIFO as is.
> +         */
> +        return G_SOURCE_REMOVE;
> +    }
> +
> +    if (!qemu_chr_fe_backend_connected(&s->chr)) {
> +        /* Instant drain the fifo when there's no back-end */
> +        return pl011_drain_tx(s);
> +    }
> +
> +    count = fifo8_num_used(&s->xmit_fifo);
> +    if (count < 1) {
> +        /* FIFO empty */
> +        return G_SOURCE_REMOVE;
> +    }

Could swap these two blocks.  Certainly the fifo does not need draining if it is empty...

> +    if (!fifo8_is_empty(&s->xmit_fifo)) {
> +        /* Reschedule another transmission if we couldn't transmit all */
> +        guint r = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
> +                                        pl011_xmit, s);
> +        if (!r) {
> +            /* Error in back-end? */
> +            return pl011_drain_tx(s);
> +        }
> +    }
> +
> +    pl011_update(s);
> +
> +    return G_SOURCE_REMOVE;

The documentation for FEWatchFunc says you should be returning G_SOURCE_CONTINUE to leave 
the source in the main loop.  That certainly makes more sense than re-adding a watch when 
the fifo is not empty.  There's also no error handling to do then.  Just


     pl011_update(s);
     return fifo8_is_empty(&s->xmit_fifo) ? G_SOURCE_REMOVE : G_SOURCE_CONTINUE;

> +    case 12: /* UARTCR */ {
> +        uint16_t en_bits = s->cr & (CR_UARTEN | CR_TXE | CR_RXE);
> +        uint16_t dis_bits = value & (CR_UARTEN | CR_TXE | CR_RXE);
> +        if (en_bits ^ dis_bits && !fifo8_is_empty(&s->xmit_fifo)) {
> +            /*
> +             * If the UART is disabled in the middle of transmission
> +             * or reception, it completes the current character before
> +             * stopping.
> +             */
> +            pl011_xmit(NULL, G_IO_OUT, s);

This seems wrong.  You don't know that the host device is ready.

We will never transmit a partial character because the host will never accept less than 
one character at a time.  Therefore there's absolutely nothing we need to do in order to 
"complete the current character", which is the one we started with the *previous* pl011_xmit.

Just set the CR bits with no further comment.


r~
Marc-André Lureau Nov. 22, 2023, 10:31 a.m. UTC | #2
Hi

On Thu, Nov 9, 2023 at 11:30 PM Philippe Mathieu-Daudé
<philmd@linaro.org> wrote:
>
> If the UART back-end chardev doesn't drain data as fast as stdout
> does or blocks, buffer in the TX FIFO to try again later.
>
> This avoids having the IO-thread busy waiting on chardev back-ends,
> reported recently when testing the Trusted Reference Stack and
> using the socket backend:
> https://linaro.atlassian.net/browse/TRS-149?focusedCommentId=149574
>
> Implement registering a front-end 'watch' callback on back-end
> events, so we can resume transmitting when the back-end is writable
> again, not blocking the main loop.

I do not have access to that Jira issue.

In general, chardev backends should have some buffering already
(socket, files etc).

If we want more, or extra control over buffering, maybe this should be
implemented at the chardev level, rather than each frontend implement
its own extra buffering logic...

Regardless, I think frontends should have an option to "drop" data
when the chardev/buffer is full, rather than hanging.


>
> Similarly to the RX FIFO path, FIFO level selection is not
> implemented (interrupt is triggered when a single byte is available
> in the FIFO).
>
> We only migrate the TX FIFO if it is in use.
>
> Reported-by: Mikko Rapeli <mikko.rapeli@linaro.org>
> Suggested-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
>  hw/char/pl011.c      | 107 ++++++++++++++++++++++++++++++++++++++++---
>  hw/char/trace-events |   4 ++
>  2 files changed, 105 insertions(+), 6 deletions(-)
>
> diff --git a/hw/char/pl011.c b/hw/char/pl011.c
> index f474f56780..a14ece4f07 100644
> --- a/hw/char/pl011.c
> +++ b/hw/char/pl011.c
> @@ -57,6 +57,9 @@ DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
>  /* Data Register, UARTDR */
>  #define DR_BE   (1 << 10)
>
> +/* Receive Status Register/Error Clear Register, UARTRSR/UARTECR */
> +#define RSR_OE  (1 << 3)
> +
>  /* Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC */
>  #define INT_OE (1 << 10)
>  #define INT_BE (1 << 9)
> @@ -156,6 +159,68 @@ static void pl011_reset_tx_fifo(PL011State *s)
>      fifo8_reset(&s->xmit_fifo);
>  }
>
> +static gboolean pl011_drain_tx(PL011State *s)
> +{
> +    trace_pl011_fifo_tx_drain(fifo8_num_used(&s->xmit_fifo));
> +    pl011_reset_tx_fifo(s);
> +    s->rsr &= ~RSR_OE;
> +    return G_SOURCE_REMOVE;
> +}
> +
> +static gboolean pl011_xmit(void *do_not_use, GIOCondition cond, void *opaque)
> +{
> +    PL011State *s = opaque;
> +    int ret;
> +    const uint8_t *buf;
> +    uint32_t buflen;
> +    uint32_t count;
> +    bool tx_enabled;
> +
> +    tx_enabled = (s->cr & CR_UARTEN) && (s->cr & CR_TXE);
> +    if (!tx_enabled) {
> +        /*
> +         * If TX is disabled, nothing to do.
> +         * Keep the potentially used FIFO as is.
> +         */
> +        return G_SOURCE_REMOVE;
> +    }
> +
> +    if (!qemu_chr_fe_backend_connected(&s->chr)) {
> +        /* Instant drain the fifo when there's no back-end */
> +        return pl011_drain_tx(s);
> +    }
> +
> +    count = fifo8_num_used(&s->xmit_fifo);
> +    if (count < 1) {
> +        /* FIFO empty */
> +        return G_SOURCE_REMOVE;
> +    }
> +
> +    /* Transmit as much data as we can */
> +    buf = fifo8_peek_buf(&s->xmit_fifo, count, &buflen);
> +    ret = qemu_chr_fe_write(&s->chr, buf, buflen);
> +    if (ret >= 0) {
> +        /* Pop the data we could transmit */
> +        trace_pl011_fifo_tx_xmit(ret);
> +        fifo8_pop_buf(&s->xmit_fifo, ret, NULL);
> +        s->int_level |= INT_TX;
> +    }
> +
> +    if (!fifo8_is_empty(&s->xmit_fifo)) {
> +        /* Reschedule another transmission if we couldn't transmit all */
> +        guint r = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
> +                                        pl011_xmit, s);
> +        if (!r) {
> +            /* Error in back-end? */
> +            return pl011_drain_tx(s);
> +        }
> +    }
> +
> +    pl011_update(s);
> +
> +    return G_SOURCE_REMOVE;
> +}
> +
>  static void pl011_write_txdata(PL011State *s, uint8_t data)
>  {
>      if (!(s->cr & CR_UARTEN)) {
> @@ -165,11 +230,25 @@ static void pl011_write_txdata(PL011State *s, uint8_t data)
>          qemu_log_mask(LOG_GUEST_ERROR, "PL011 data written to disabled TX UART\n");
>      }
>
> -    /* XXX this blocks entire thread. Rewrite to use
> -     * qemu_chr_fe_write and background I/O callbacks */
> -    qemu_chr_fe_write_all(&s->chr, &data, 1);
> -    s->int_level |= INT_TX;
> -    pl011_update(s);
> +    if (fifo8_is_full(&s->xmit_fifo)) {
> +        /*
> +         * The FIFO contents remain valid because no more data is
> +         * written when the FIFO is full, only the contents of the
> +         * shift register are overwritten. The CPU must now read
> +         * the data, to empty the FIFO.
> +         */
> +        trace_pl011_fifo_tx_overrun();
> +        s->rsr |= RSR_OE;
> +        return;
> +    }
> +
> +    trace_pl011_fifo_tx_put(data);
> +    fifo8_push(&s->xmit_fifo, data);
> +    if (fifo8_is_full(&s->xmit_fifo)) {
> +        s->flags |= PL011_FLAG_TXFF;
> +    }
> +
> +    pl011_xmit(NULL, G_IO_OUT, s);
>  }
>
>  static uint32_t pl011_read_rxdata(PL011State *s)
> @@ -331,10 +410,21 @@ static void pl011_write(void *opaque, hwaddr offset,
>          s->lcr = value;
>          pl011_set_read_trigger(s);
>          break;
> -    case 12: /* UARTCR */
> +    case 12: /* UARTCR */ {
> +        uint16_t en_bits = s->cr & (CR_UARTEN | CR_TXE | CR_RXE);
> +        uint16_t dis_bits = value & (CR_UARTEN | CR_TXE | CR_RXE);
> +        if (en_bits ^ dis_bits && !fifo8_is_empty(&s->xmit_fifo)) {
> +            /*
> +             * If the UART is disabled in the middle of transmission
> +             * or reception, it completes the current character before
> +             * stopping.
> +             */
> +            pl011_xmit(NULL, G_IO_OUT, s);
> +        }
>          /* ??? Need to implement the enable and loopback bits.  */
>          s->cr = value;
>          break;
> +    }
>      case 13: /* UARTIFS */
>          s->ifl = value;
>          pl011_set_read_trigger(s);
> @@ -477,6 +567,11 @@ static int pl011_post_load(void *opaque, int version_id)
>          s->read_pos = 0;
>      }
>
> +    if (!fifo8_is_empty(&s->xmit_fifo)) {
> +        /* Reschedule another transmission */
> +        qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP, pl011_xmit, s);
> +    }
> +
>      return 0;
>  }
>
> diff --git a/hw/char/trace-events b/hw/char/trace-events
> index bc9e84261f..ee00af0c66 100644
> --- a/hw/char/trace-events
> +++ b/hw/char/trace-events
> @@ -60,6 +60,10 @@ pl011_write(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x val
>  pl011_can_receive(uint32_t lcr, int read_count, int r) "LCR 0x%08x read_count %d returning %d"
>  pl011_fifo_rx_put(uint32_t c, int read_count) "new char 0x%02x read_count now %d"
>  pl011_fifo_rx_full(void) "RX FIFO now full, RXFF set"
> +pl011_fifo_tx_put(uint8_t byte) "TX FIFO push [0x%02x]"
> +pl011_fifo_tx_xmit(int count) "TX FIFO pop %d"
> +pl011_fifo_tx_overrun(void) "TX FIFO overrun"
> +pl011_fifo_tx_drain(unsigned drained) "TX FIFO draining %u"
>  pl011_baudrate_change(unsigned int baudrate, uint64_t clock, uint32_t ibrd, uint32_t fbrd) "new baudrate %u (clk: %" PRIu64 "hz, ibrd: %" PRIu32 ", fbrd: %" PRIu32 ")"
>
>  # cmsdk-apb-uart.c
> --
> 2.41.0
>
>


--
Marc-André Lureau
Daniel P. Berrangé Nov. 22, 2023, 10:38 a.m. UTC | #3
On Wed, Nov 22, 2023 at 02:31:29PM +0400, Marc-André Lureau wrote:
> Hi
> 
> On Thu, Nov 9, 2023 at 11:30 PM Philippe Mathieu-Daudé
> <philmd@linaro.org> wrote:
> >
> > If the UART back-end chardev doesn't drain data as fast as stdout
> > does or blocks, buffer in the TX FIFO to try again later.
> >
> > This avoids having the IO-thread busy waiting on chardev back-ends,
> > reported recently when testing the Trusted Reference Stack and
> > using the socket backend:
> > https://linaro.atlassian.net/browse/TRS-149?focusedCommentId=149574
> >
> > Implement registering a front-end 'watch' callback on back-end
> > events, so we can resume transmitting when the back-end is writable
> > again, not blocking the main loop.
> 
> I do not have access to that Jira issue.
> 
> In general, chardev backends should have some buffering already
> (socket, files etc).
> 
> If we want more, or extra control over buffering, maybe this should be
> implemented at the chardev level, rather than each frontend implement
> its own extra buffering logic...
> 
> Regardless, I think frontends should have an option to "drop" data
> when the chardev/buffer is full, rather than hanging.

Does anyone really want data to be dropped by QEMU ? Every time I've seen
a scenario where data has been dropped or lost, it has been considered
a bug to be solved.

Sure, we don't want QEMU to block on chardev writes, but we want that
more than throwing away data.

What's the use case for capturing data from the serial port, but throwing
it away if the other end of a socket doesn't read quickly enough ?

If someone does want lossy serial ports, they could configure the UDP
charedev backend already.

With regards,
Daniel
Philippe Mathieu-Daudé Nov. 24, 2023, 12:47 p.m. UTC | #4
On 22/11/23 11:38, Daniel P. Berrangé wrote:
> On Wed, Nov 22, 2023 at 02:31:29PM +0400, Marc-André Lureau wrote:
>> Hi
>>
>> On Thu, Nov 9, 2023 at 11:30 PM Philippe Mathieu-Daudé
>> <philmd@linaro.org> wrote:
>>>
>>> If the UART back-end chardev doesn't drain data as fast as stdout
>>> does or blocks, buffer in the TX FIFO to try again later.
>>>
>>> This avoids having the IO-thread busy waiting on chardev back-ends,
>>> reported recently when testing the Trusted Reference Stack and
>>> using the socket backend:
>>> https://linaro.atlassian.net/browse/TRS-149?focusedCommentId=149574
>>>
>>> Implement registering a front-end 'watch' callback on back-end
>>> events, so we can resume transmitting when the back-end is writable
>>> again, not blocking the main loop.
>>
>> I do not have access to that Jira issue.
>>
>> In general, chardev backends should have some buffering already
>> (socket, files etc).
>>
>> If we want more, or extra control over buffering, maybe this should be
>> implemented at the chardev level, rather than each frontend implement
>> its own extra buffering logic...
>>
>> Regardless, I think frontends should have an option to "drop" data
>> when the chardev/buffer is full, rather than hanging.
> 
> Does anyone really want data to be dropped by QEMU ? Every time I've seen
> a scenario where data has been dropped or lost, it has been considered
> a bug to be solved.

A kind of counter example is the RX UART model, which is used in
embedded world and respects the baudrate timing. I guess some scripts
were working with the QEMU UART chardev, but them the same script
failed when using HW UART, so realistic HW baudrate was emulated using
the timer API. See the chardev frontend handlers:

static int can_receive(void *opaque)
{
     RSCIState *sci = RSCI(opaque);
     if (sci->rx_next > qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)) {
         return 0;
     } else {
         return FIELD_EX8(sci->scr, SCR, RE);
     }
}

The TX path also use a timer:

static void send_byte(RSCIState *sci)
{
     if (qemu_chr_fe_backend_connected(&sci->chr)) {
         qemu_chr_fe_write_all(&sci->chr, &sci->tdr, 1);
     }
     timer_mod(&sci->timer,
              qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + sci->trtime);

The more complex 16550A UART model also use timer in FIFO mode.

> Sure, we don't want QEMU to block on chardev writes, but we want that
> more than throwing away data.
> 
> What's the use case for capturing data from the serial port, but throwing
> it away if the other end of a socket doesn't read quickly enough ?
> 
> If someone does want lossy serial ports, they could configure the UDP
> charedev backend already.
> 
> With regards,
> Daniel
diff mbox series

Patch

diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index f474f56780..a14ece4f07 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -57,6 +57,9 @@  DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr)
 /* Data Register, UARTDR */
 #define DR_BE   (1 << 10)
 
+/* Receive Status Register/Error Clear Register, UARTRSR/UARTECR */
+#define RSR_OE  (1 << 3)
+
 /* Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC */
 #define INT_OE (1 << 10)
 #define INT_BE (1 << 9)
@@ -156,6 +159,68 @@  static void pl011_reset_tx_fifo(PL011State *s)
     fifo8_reset(&s->xmit_fifo);
 }
 
+static gboolean pl011_drain_tx(PL011State *s)
+{
+    trace_pl011_fifo_tx_drain(fifo8_num_used(&s->xmit_fifo));
+    pl011_reset_tx_fifo(s);
+    s->rsr &= ~RSR_OE;
+    return G_SOURCE_REMOVE;
+}
+
+static gboolean pl011_xmit(void *do_not_use, GIOCondition cond, void *opaque)
+{
+    PL011State *s = opaque;
+    int ret;
+    const uint8_t *buf;
+    uint32_t buflen;
+    uint32_t count;
+    bool tx_enabled;
+
+    tx_enabled = (s->cr & CR_UARTEN) && (s->cr & CR_TXE);
+    if (!tx_enabled) {
+        /*
+         * If TX is disabled, nothing to do.
+         * Keep the potentially used FIFO as is.
+         */
+        return G_SOURCE_REMOVE;
+    }
+
+    if (!qemu_chr_fe_backend_connected(&s->chr)) {
+        /* Instant drain the fifo when there's no back-end */
+        return pl011_drain_tx(s);
+    }
+
+    count = fifo8_num_used(&s->xmit_fifo);
+    if (count < 1) {
+        /* FIFO empty */
+        return G_SOURCE_REMOVE;
+    }
+
+    /* Transmit as much data as we can */
+    buf = fifo8_peek_buf(&s->xmit_fifo, count, &buflen);
+    ret = qemu_chr_fe_write(&s->chr, buf, buflen);
+    if (ret >= 0) {
+        /* Pop the data we could transmit */
+        trace_pl011_fifo_tx_xmit(ret);
+        fifo8_pop_buf(&s->xmit_fifo, ret, NULL);
+        s->int_level |= INT_TX;
+    }
+
+    if (!fifo8_is_empty(&s->xmit_fifo)) {
+        /* Reschedule another transmission if we couldn't transmit all */
+        guint r = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
+                                        pl011_xmit, s);
+        if (!r) {
+            /* Error in back-end? */
+            return pl011_drain_tx(s);
+        }
+    }
+
+    pl011_update(s);
+
+    return G_SOURCE_REMOVE;
+}
+
 static void pl011_write_txdata(PL011State *s, uint8_t data)
 {
     if (!(s->cr & CR_UARTEN)) {
@@ -165,11 +230,25 @@  static void pl011_write_txdata(PL011State *s, uint8_t data)
         qemu_log_mask(LOG_GUEST_ERROR, "PL011 data written to disabled TX UART\n");
     }
 
-    /* XXX this blocks entire thread. Rewrite to use
-     * qemu_chr_fe_write and background I/O callbacks */
-    qemu_chr_fe_write_all(&s->chr, &data, 1);
-    s->int_level |= INT_TX;
-    pl011_update(s);
+    if (fifo8_is_full(&s->xmit_fifo)) {
+        /*
+         * The FIFO contents remain valid because no more data is
+         * written when the FIFO is full, only the contents of the
+         * shift register are overwritten. The CPU must now read
+         * the data, to empty the FIFO.
+         */
+        trace_pl011_fifo_tx_overrun();
+        s->rsr |= RSR_OE;
+        return;
+    }
+
+    trace_pl011_fifo_tx_put(data);
+    fifo8_push(&s->xmit_fifo, data);
+    if (fifo8_is_full(&s->xmit_fifo)) {
+        s->flags |= PL011_FLAG_TXFF;
+    }
+
+    pl011_xmit(NULL, G_IO_OUT, s);
 }
 
 static uint32_t pl011_read_rxdata(PL011State *s)
@@ -331,10 +410,21 @@  static void pl011_write(void *opaque, hwaddr offset,
         s->lcr = value;
         pl011_set_read_trigger(s);
         break;
-    case 12: /* UARTCR */
+    case 12: /* UARTCR */ {
+        uint16_t en_bits = s->cr & (CR_UARTEN | CR_TXE | CR_RXE);
+        uint16_t dis_bits = value & (CR_UARTEN | CR_TXE | CR_RXE);
+        if (en_bits ^ dis_bits && !fifo8_is_empty(&s->xmit_fifo)) {
+            /*
+             * If the UART is disabled in the middle of transmission
+             * or reception, it completes the current character before
+             * stopping.
+             */
+            pl011_xmit(NULL, G_IO_OUT, s);
+        }
         /* ??? Need to implement the enable and loopback bits.  */
         s->cr = value;
         break;
+    }
     case 13: /* UARTIFS */
         s->ifl = value;
         pl011_set_read_trigger(s);
@@ -477,6 +567,11 @@  static int pl011_post_load(void *opaque, int version_id)
         s->read_pos = 0;
     }
 
+    if (!fifo8_is_empty(&s->xmit_fifo)) {
+        /* Reschedule another transmission */
+        qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP, pl011_xmit, s);
+    }
+
     return 0;
 }
 
diff --git a/hw/char/trace-events b/hw/char/trace-events
index bc9e84261f..ee00af0c66 100644
--- a/hw/char/trace-events
+++ b/hw/char/trace-events
@@ -60,6 +60,10 @@  pl011_write(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x val
 pl011_can_receive(uint32_t lcr, int read_count, int r) "LCR 0x%08x read_count %d returning %d"
 pl011_fifo_rx_put(uint32_t c, int read_count) "new char 0x%02x read_count now %d"
 pl011_fifo_rx_full(void) "RX FIFO now full, RXFF set"
+pl011_fifo_tx_put(uint8_t byte) "TX FIFO push [0x%02x]"
+pl011_fifo_tx_xmit(int count) "TX FIFO pop %d"
+pl011_fifo_tx_overrun(void) "TX FIFO overrun"
+pl011_fifo_tx_drain(unsigned drained) "TX FIFO draining %u"
 pl011_baudrate_change(unsigned int baudrate, uint64_t clock, uint32_t ibrd, uint32_t fbrd) "new baudrate %u (clk: %" PRIu64 "hz, ibrd: %" PRIu32 ", fbrd: %" PRIu32 ")"
 
 # cmsdk-apb-uart.c