diff mbox series

[tty-next,v3,1/6] serial: 8250: Adjust the timeout for FIFO mode

Message ID 20241025105728.602310-2-john.ogness@linutronix.de
State New
Headers show
Series convert 8250 to nbcon | expand

Commit Message

John Ogness Oct. 25, 2024, 10:57 a.m. UTC
After a console has fed a line into TX, it uses wait_for_xmitr()
to wait until the data has been sent out before returning to the
printk code. However, wait_for_xmitr() will timeout after 10ms,
regardless if the data has been transmitted or not.

For single bytes, this timeout is sufficient even at very slow
baud rates, such as 1200bps. However, when FIFO mode is used,
there may be 64 bytes pushed into the FIFO at once. At a baud
rate of 115200bps, the 10ms timeout is still sufficient.
However, when using lower baud rates (such as 57600bps), the
timeout is _not_ sufficient. This causes longer lines to be cut
off, resulting in lost and horribly misformatted output on the
console.

When using FIFO mode, take the number of bytes into account to
determine an appropriate max timeout. Increasing the timeout
does not affect performance since ideally the timeout never
occurs.

Fixes: 8f3631f0f6eb ("serial/8250: Use fifo in 8250 console driver")
Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 drivers/tty/serial/8250/8250_port.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

Comments

Jiri Slaby (SUSE) Oct. 30, 2024, 6:05 a.m. UTC | #1
On 25. 10. 24, 12:57, John Ogness wrote:
> After a console has fed a line into TX, it uses wait_for_xmitr()
> to wait until the data has been sent out before returning to the
> printk code. However, wait_for_xmitr() will timeout after 10ms,
> regardless if the data has been transmitted or not.
> 
> For single bytes, this timeout is sufficient even at very slow
> baud rates, such as 1200bps. However, when FIFO mode is used,
> there may be 64 bytes pushed into the FIFO at once. At a baud
> rate of 115200bps, the 10ms timeout is still sufficient.
> However, when using lower baud rates (such as 57600bps), the
> timeout is _not_ sufficient. This causes longer lines to be cut
> off, resulting in lost and horribly misformatted output on the
> console.
> 
> When using FIFO mode, take the number of bytes into account to
> determine an appropriate max timeout. Increasing the timeout
> does not affect performance since ideally the timeout never
> occurs.
> 
> Fixes: 8f3631f0f6eb ("serial/8250: Use fifo in 8250 console driver")
> Signed-off-by: John Ogness <john.ogness@linutronix.de>
> ---
>   drivers/tty/serial/8250/8250_port.c | 24 ++++++++++++++++++++----
>   1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
> index 3509af7dc52b..adc48eeeac2b 100644
> --- a/drivers/tty/serial/8250/8250_port.c
> +++ b/drivers/tty/serial/8250/8250_port.c
> @@ -2059,11 +2059,12 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
>   	serial8250_rpm_put(up);
>   }
>   
> -static void wait_for_lsr(struct uart_8250_port *up, int bits)
> +/* Returns true if @bits were set, false on timeout. */
> +static bool wait_for_lsr(struct uart_8250_port *up, int bits)
>   {
>   	unsigned int status, tmout = 10000;
>   
> -	/* Wait up to 10ms for the character(s) to be sent. */
> +	/* Wait up to 10ms for the character to be sent. */
>   	for (;;) {
>   		status = serial_lsr_in(up);
>   
> @@ -2074,10 +2075,13 @@ static void wait_for_lsr(struct uart_8250_port *up, int bits)
>   		udelay(1);
>   		touch_nmi_watchdog();
>   	}
> +
> +	return (tmout != 0);
>   }
>   
>   /*
>    *	Wait for transmitter & holding register to empty
> + *	with timeout
>    */
>   static void wait_for_xmitr(struct uart_8250_port *up, int bits)
>   {
> @@ -3306,13 +3310,18 @@ static void serial8250_console_restore(struct uart_8250_port *up)
>   static void serial8250_console_fifo_write(struct uart_8250_port *up,
>   					  const char *s, unsigned int count)
>   {
> -	int i;
>   	const char *end = s + count;
>   	unsigned int fifosize = up->tx_loadsz;
> +	unsigned int tx_count = 0;
>   	bool cr_sent = false;
> +	unsigned int i;
>   
>   	while (s != end) {
> -		wait_for_lsr(up, UART_LSR_THRE);
> +		/* Allow timeout for each byte of a possibly full FIFO. */
> +		for (i = 0; i < fifosize; i++) {
> +			if (wait_for_lsr(up, UART_LSR_THRE))
> +				break;
> +		}

THRE only signals there is a space for one character. Multiplying it 
with fifosize does not make much sense to me. You perhaps want only to 
increase the timeout. Or somehow incorporate port->frame_time into the 
accounting (I am not sure it is available at this point already).

>   		for (i = 0; i < fifosize && s != end; ++i) {
>   			if (*s == '\n' && !cr_sent) {
> @@ -3323,6 +3332,13 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up,
>   				cr_sent = false;
>   			}
>   		}
> +		tx_count = i;
> +	}
> +
> +	/* Allow timeout for each byte written. */
> +	for (i = 0; i < tx_count; i++) {
> +		if (wait_for_lsr(up, UART_LSR_THRE))

This ensures you sent one character from the FIFO. The FIFO still can 
contain plenty of them. Did you want UART_LSR_TEMT?

But what's the purpose of spinning _here_? The kernel can run and FIFO 
too. Without the kernel waiting for the FIFO.

thanks,
Maciej W. Rozycki Oct. 31, 2024, 4:44 a.m. UTC | #2
On Wed, 30 Oct 2024, Jiri Slaby wrote:

> > @@ -3306,13 +3310,18 @@ static void serial8250_console_restore(struct
> > uart_8250_port *up)
> >   static void serial8250_console_fifo_write(struct uart_8250_port *up,
> >   					  const char *s, unsigned int count)
> >   {
> > -	int i;
> >   	const char *end = s + count;
> >   	unsigned int fifosize = up->tx_loadsz;
> > +	unsigned int tx_count = 0;
> >   	bool cr_sent = false;
> > +	unsigned int i;
> >     	while (s != end) {
> > -		wait_for_lsr(up, UART_LSR_THRE);
> > +		/* Allow timeout for each byte of a possibly full FIFO. */
> > +		for (i = 0; i < fifosize; i++) {
> > +			if (wait_for_lsr(up, UART_LSR_THRE))
> > +				break;
> > +		}
> 
> THRE only signals there is a space for one character.

 Nope[1]:

"In the FIFO mode, THRE is set when the transmit FIFO is empty; it is 
cleared when at least one byte is written to the transmit FIFO."

It seems common enough a misconception that once I actually had to fix the 
bad interpretation of THRE in an unpublished platform driver to get decent 
performance out of it at higher rates such as 230400bps, as it only pushed 
one byte at a time to the FIFO while it had it all available once THRE has 
been set.

> > +	/* Allow timeout for each byte written. */
> > +	for (i = 0; i < tx_count; i++) {
> > +		if (wait_for_lsr(up, UART_LSR_THRE))
> 
> This ensures you sent one character from the FIFO. The FIFO still can contain
> plenty of them. Did you want UART_LSR_TEMT?

 The difference between THRE and TEMT is the state of the shift register 
only[2]:

"In the FIFO mode, TEMT is set when the transmitter FIFO and shift 
register are both empty."

References:

[1] "TL16C550C, TL16C550CI Asynchronous Communications Element with 
    Autoflow Control", Texas Instruments, SLLS177F -- March 1994 -- 
    Revised March 2001, p. 30

[2] same

  Maciej
John Ogness Oct. 31, 2024, 8:49 a.m. UTC | #3
Hi Maciej,

Thanks for jumping in with some ref-manual quotes. Some more comments
from me below...

On 2024-10-31, "Maciej W. Rozycki" <macro@orcam.me.uk> wrote:
> On Wed, 30 Oct 2024, Jiri Slaby wrote:
>> > @@ -3306,13 +3310,18 @@ static void serial8250_console_restore(struct
>> > uart_8250_port *up)
>> >   static void serial8250_console_fifo_write(struct uart_8250_port *up,
>> >   					  const char *s, unsigned int count)
>> >   {
>> > -	int i;
>> >   	const char *end = s + count;
>> >   	unsigned int fifosize = up->tx_loadsz;
>> > +	unsigned int tx_count = 0;
>> >   	bool cr_sent = false;
>> > +	unsigned int i;
>> >     	while (s != end) {
>> > -		wait_for_lsr(up, UART_LSR_THRE);
>> > +		/* Allow timeout for each byte of a possibly full FIFO. */
>> > +		for (i = 0; i < fifosize; i++) {
>> > +			if (wait_for_lsr(up, UART_LSR_THRE))
>> > +				break;
>> > +		}
>> 
>> THRE only signals there is a space for one character.
>
>  Nope[1]:
>
> "In the FIFO mode, THRE is set when the transmit FIFO is empty; it is 
> cleared when at least one byte is written to the transmit FIFO."
>
> It seems common enough a misconception that once I actually had to fix the 
> bad interpretation of THRE in an unpublished platform driver to get decent 
> performance out of it at higher rates such as 230400bps, as it only pushed 
> one byte at a time to the FIFO while it had it all available once THRE has 
> been set.

I do not know if this is true for all 8250-variants. If there is some
variant where it functions as Jiri expected, then it would mean
significant text loss during longer messages. But that would already be
a problem in the current mainline driver.

>> > +	/* Allow timeout for each byte written. */
>> > +	for (i = 0; i < tx_count; i++) {
>> > +		if (wait_for_lsr(up, UART_LSR_THRE))
>> 
>> This ensures you sent one character from the FIFO. The FIFO still can contain
>> plenty of them. Did you want UART_LSR_TEMT?
>
>  The difference between THRE and TEMT is the state of the shift register 
> only[2]:
>
> "In the FIFO mode, TEMT is set when the transmitter FIFO and shift 
> register are both empty."

If we wait for TEMT, we lose significant advantages of having the FIFO.

>> But what's the purpose of spinning _here_? The kernel can run and FIFO
>> too. Without the kernel waiting for the FIFO.

When serial8250_console_fifo_write() exits, the caller just does a
single wait_for_xmitr() ... with a 10ms timeout. In the FIFO case, for
<=56k baudrates, it can easily hit the timeout and thus continue before
the FIFO has been emptied.

By waiting on UART_LSR_THRE after filling the FIFO,
serial8250_console_fifo_write() waits until the hardware has had a
chance to shift out all the data. Then the final wait_for_xmitr() in the
caller only waits for the final byte to go out on the line.

Please keep in mind that none of these timeouts should trigger during
normal operation.

For v4 I am doing some refactoring (as suggested by Andy) so that the
wait-code looks a bit cleaner.

John

> References:
>
> [1] "TL16C550C, TL16C550CI Asynchronous Communications Element with 
>     Autoflow Control", Texas Instruments, SLLS177F -- March 1994 -- 
>     Revised March 2001, p. 30
>
> [2] same
Maciej W. Rozycki Nov. 1, 2024, 1:24 a.m. UTC | #4
On Thu, 31 Oct 2024, John Ogness wrote:

> >> THRE only signals there is a space for one character.
> >
> >  Nope[1]:
> >
> > "In the FIFO mode, THRE is set when the transmit FIFO is empty; it is 
> > cleared when at least one byte is written to the transmit FIFO."
> >
> > It seems common enough a misconception that once I actually had to fix the 
> > bad interpretation of THRE in an unpublished platform driver to get decent 
> > performance out of it at higher rates such as 230400bps, as it only pushed 
> > one byte at a time to the FIFO while it had it all available once THRE has 
> > been set.
> 
> I do not know if this is true for all 8250-variants. If there is some
> variant where it functions as Jiri expected, then it would mean
> significant text loss during longer messages. But that would already be
> a problem in the current mainline driver.

 Or rather in my case it would prevent communication from working at all; 
I actually had to fix the issue for networking over a serial line rather 
than just exchanging text messages, and hence a particular need to make it 
run fast.

 I don't expect any 550 clone to work in a different manner, but I find 
the TI manual particularly unambiguous and well-written, and also old 
enough for the 550 to be the state of the art rather than just legacy.

  Maciej
Andy Shevchenko Nov. 1, 2024, 8:21 a.m. UTC | #5
On Fri, Nov 01, 2024 at 01:24:53AM +0000, Maciej W. Rozycki wrote:
> On Thu, 31 Oct 2024, John Ogness wrote:

>  I don't expect any 550 clone to work in a different manner, but I find
> the TI manual particularly unambiguous and well-written, and also old
> enough for the 550 to be the state of the art rather than just legacy.

Oh, one is living in the ideal world... :-)
Jiri Slaby (SUSE) Nov. 4, 2024, 6:34 a.m. UTC | #6
On 31. 10. 24, 5:44, Maciej W. Rozycki wrote:
> On Wed, 30 Oct 2024, Jiri Slaby wrote:
> 
>>> @@ -3306,13 +3310,18 @@ static void serial8250_console_restore(struct
>>> uart_8250_port *up)
>>>    static void serial8250_console_fifo_write(struct uart_8250_port *up,
>>>    					  const char *s, unsigned int count)
>>>    {
>>> -	int i;
>>>    	const char *end = s + count;
>>>    	unsigned int fifosize = up->tx_loadsz;
>>> +	unsigned int tx_count = 0;
>>>    	bool cr_sent = false;
>>> +	unsigned int i;
>>>      	while (s != end) {
>>> -		wait_for_lsr(up, UART_LSR_THRE);
>>> +		/* Allow timeout for each byte of a possibly full FIFO. */
>>> +		for (i = 0; i < fifosize; i++) {
>>> +			if (wait_for_lsr(up, UART_LSR_THRE))
>>> +				break;
>>> +		}
>>
>> THRE only signals there is a space for one character.
> 
>   Nope[1]:
> 
> "In the FIFO mode, THRE is set when the transmit FIFO is empty; it is
> cleared when at least one byte is written to the transmit FIFO."

Hmm, I was confused by NXP's 16c650b [1] datasheet then (or I cannot parse):
===
The THR empty flag in the LSR register will be set to a logic 1 when the 
transmitter is empty or when data is transferred to the TSR. Note that a 
write operation can be performed when the THR empty flag is set
(logic 0 = FIFO full; logic 1 = at least one FIFO location available).
===

But indeed in the LSR[5] bit description, they state:
===
In the FIFO mode, this bit is set when the transmit FIFO is
empty; it is cleared when at least 1 byte is written to the transmit FIFO.
===

Anyway, it still does not answer the original question: Instead of 
looping fifosize multiplied by random timeout, can we re-use 
port->frame_time?

[1] SC16C650B -- 5 V, 3.3 V and 2.5 V UART with 32-byte FIFOs and 
infrared (IrDA) encoder/decoder; Rev. 04 — 14 September 2009; Product 
data sheet

>>> +	/* Allow timeout for each byte written. */
>>> +	for (i = 0; i < tx_count; i++) {
>>> +		if (wait_for_lsr(up, UART_LSR_THRE))
>>
>> This ensures you sent one character from the FIFO. The FIFO still can contain
>> plenty of them. Did you want UART_LSR_TEMT?
> 
>   The difference between THRE and TEMT is the state of the shift register
> only[2]:
> 
> "In the FIFO mode, TEMT is set when the transmitter FIFO and shift
> register are both empty."

Sure. The question still holds:

 > But what's the purpose of spinning _here_? The kernel can run and 
FIFO too. Without the kernel waiting for the FIFO.

If we want to wait for fifo to empty, why not *also* the TSR. Meaning:

 > Did you want UART_LSR_TEMT?

thanks,
Jiri Slaby (SUSE) Nov. 4, 2024, 6:44 a.m. UTC | #7
On 31. 10. 24, 9:49, John Ogness wrote:
>>>> +	/* Allow timeout for each byte written. */
>>>> +	for (i = 0; i < tx_count; i++) {
>>>> +		if (wait_for_lsr(up, UART_LSR_THRE))
>>>
>>> This ensures you sent one character from the FIFO. The FIFO still can contain
>>> plenty of them. Did you want UART_LSR_TEMT?
>>
>>   The difference between THRE and TEMT is the state of the shift register
>> only[2]:
>>
>> "In the FIFO mode, TEMT is set when the transmitter FIFO and shift
>> register are both empty."
> 
> If we wait for TEMT, we lose significant advantages of having the FIFO.

But you wait for THRE, so effectively waiting for FIFO to flush. The 
difference is only one byte (TSR), or what am I missing?

>>> But what's the purpose of spinning _here_? The kernel can run and FIFO
>>> too. Without the kernel waiting for the FIFO.
> 
> When serial8250_console_fifo_write() exits, the caller just does a
> single wait_for_xmitr() ... with a 10ms timeout. In the FIFO case, for
> <=56k baudrates, it can easily hit the timeout and thus continue before
> the FIFO has been emptied.
>> By waiting on UART_LSR_THRE after filling the FIFO,
> serial8250_console_fifo_write() waits until the hardware has had a
> chance to shift out all the data. Then the final wait_for_xmitr() in the
> caller only waits for the final byte to go out on the line.

For the first loop, that's all right. But why would you want to wait for 
the FIFO to flush at the end of the function? It's not only the last 
byte, it's the last batch (aka 'tx_count'), right?

> Please keep in mind that none of these timeouts should trigger during
> normal operation.
> 
> For v4 I am doing some refactoring (as suggested by Andy) so that the
> wait-code looks a bit cleaner.

OK, let's see then :).

thanks,
John Ogness Nov. 4, 2024, 2:13 p.m. UTC | #8
On 2024-11-04, Jiri Slaby <jirislaby@kernel.org> wrote:
> Instead of looping fifosize multiplied by random timeout, can we
> re-use port->frame_time?

Rather than 10k loops, we could loop

	(port->frame_time * some_scaled_padding) / 1000

times. The padding is important because we should not timeout in the
normal scenario. Perhaps using ~2 as @some_padding. Something like:

	port->frame_time >> 9

?

>>   The difference between THRE and TEMT is the state of the shift register
>> only[2]:
>> 
>> "In the FIFO mode, TEMT is set when the transmitter FIFO and shift
>> register are both empty."
>
> But what's the purpose of spinning _here_? The kernel can run and
> FIFO too. Without the kernel waiting for the FIFO.
>
> If we want to wait for fifo to empty, why not *also* the TSR. Meaning:
>
> Did you want UART_LSR_TEMT?

Let us assume we have a line with 640 characters and a FIFO of 64
bytes. For this line, we must wait for the FIFO to empty 10 times. It is
enough to wait for THRE for each of the 64-byte blocks because we are
only interested in refilling the FIFO at this point. Only at the very
end (in the caller...  serial8250_console_write()) do we need to wait
for everything to flush to the wire (TEMT).

By waiting on TEMT for each of the 64-byte blocks, we are waiting longer
than necessary. This creates a small window where the FIFO is empty and
there is nothing being transmitted.

I did a simple test on my beaglebone-black hardware, sending 100 lines
of 924 bytes at 9600 bps. Since my hardware uses a 64-byte FIFO, each
line would have 14 such windows.

And indeed, waiting for TEMT rather than only THRE for the 64-byte
blocks resulted in an extra 30ms total transfer for all 92400
bytes. That is about 20us lost in each window by unnecessarily waiting
for TEMT.

Of course, we are only talking about console output, which is horribly
inefficient on system resources. But I would argue, if we do not care
about unnecessary waiting, then why even have the FIFO optimization in
the first place?

John
Maciej W. Rozycki Dec. 1, 2024, 12:04 a.m. UTC | #9
On Mon, 4 Nov 2024, Jiri Slaby wrote:

> > > THRE only signals there is a space for one character.
> > 
> >   Nope[1]:
> > 
> > "In the FIFO mode, THRE is set when the transmit FIFO is empty; it is
> > cleared when at least one byte is written to the transmit FIFO."
> 
> Hmm, I was confused by NXP's 16c650b [1] datasheet then (or I cannot parse):
> ===
> The THR empty flag in the LSR register will be set to a logic 1 when the
> transmitter is empty or when data is transferred to the TSR. Note that a write
> operation can be performed when the THR empty flag is set
> (logic 0 = FIFO full; logic 1 = at least one FIFO location available).
> ===

 This description seems broken indeed and I find your interpretation of it
correct.  I do hope this is just an editorial mistake with the NXP device.

> But indeed in the LSR[5] bit description, they state:
> ===
> In the FIFO mode, this bit is set when the transmit FIFO is
> empty; it is cleared when at least 1 byte is written to the transmit FIFO.
> ===

 FWIW I chased documentation for the original 16550A device and it uses 
analogous wording[1]:

"Bit 5: This bit is the Transmitter Holding Register Empty (THRE)
indicator. Bit 5 indicates that the UART is ready to
accept a new character for transmission. In addition, this bit
causes the UART to issue an interrupt to the CPU when the
Transmit Holding Register Empty Interrupt enable is set
high. The THRE bit is set to a logic 1 when a character is
transferred from the Transmitter Holding Register into the
Transmitter Shift Register. The bit is reset to logic 0 concur-
rently with the loading of the Transmitter Holding Register
by the CPU. In the FIFO mode this bit is set when the XMIT
FIFO is empty; it is cleared when at least 1 byte is written to
the XMIT FIFO."

It further documents polled operation for THRE[2]:

"LSR5 will indicate when the XMIT FIFO is empty."

and provides further details as to the transmit FIFO[3]:

"The NS16550A transmitter optimizes the CPU/UART data
transaction via the following features:

"1. The depth of the Transmitter (Tx) FIFO ensures that as
    many as 16 characters can be transferred when the
    CPU services the Tx interrupt. Once again, this effec-
    tively buffers the CPU transfer rate from the serial data
    rate.

"2. The Transmitter (Tx) FIFO is similar in structure to
    FIFOs the user may have previously set up in RAM. The
    Tx depth allows the CPU to load 16 characters each
    time it switches context to the service routine. This re-
    duces the impact of the CPU time lost in context switch-
    ing.

"3. Since a time lag in servicing an asynchronous transmit-
    ter usually has no penalty, CPU latency time is of no
    concern to transmitter operation."

This design choice may result in "choppy" transmission due to the transmit 
handler invocation latency, but as noted above it is intentional.  DMA can 
be used with the NS16550A if it is required to avoid this issue, and later 
compatible UART designs such as the 650 give more flexibility as to the Tx 
FIFO fill level trigger threshold.

References:

[1] National Semiconductor Corporation, "Microcommunication Elements 
    Databook, 1987 Edition", "NS16550A Universal Asynchronous 
    Receiver/Transmitter with FIFOs", Section 8.4 "LINE STATUS REGISTER", 
    p. 2-73

[2] same, Section 8.12 "FIFO POLLED MODE OPERATION", p. 2-75

[3] same, Martin S. Michael, Daniel G. Durich, Application Note 491, "The 
    NS 16550A: UART Design and Application Considerations", Section 1.0 
    "Design Considerations and Operation of the New UART Features", p. 5-4

Available from: 
<http://bitsavers.org/components/national/_dataBooks/1987_National_Microcommunications_Elements_Data_Book.pdf>.

 HTH,

  Maciej
Jiri Slaby (SUSE) Dec. 2, 2024, 6:12 a.m. UTC | #10
On 04. 11. 24, 15:13, John Ogness wrote:
> On 2024-11-04, Jiri Slaby <jirislaby@kernel.org> wrote:
>> Instead of looping fifosize multiplied by random timeout, can we
>> re-use port->frame_time?
> 
> Rather than 10k loops, we could loop
> 
> 	(port->frame_time * some_scaled_padding) / 1000
> 
> times. The padding is important because we should not timeout in the
> normal scenario. Perhaps using ~2 as @some_padding. Something like:
> 
> 	port->frame_time >> 9
> 
> ?

No, spell it out as you did above:
   port->frame_time * 2 / NSEC_PER_USEC

>>>    The difference between THRE and TEMT is the state of the shift register
>>> only[2]:
>>>
>>> "In the FIFO mode, TEMT is set when the transmitter FIFO and shift
>>> register are both empty."
>>
>> But what's the purpose of spinning _here_? The kernel can run and
>> FIFO too. Without the kernel waiting for the FIFO.
>>
>> If we want to wait for fifo to empty, why not *also* the TSR. Meaning:
>>
>> Did you want UART_LSR_TEMT?
> 
> Let us assume we have a line with 640 characters and a FIFO of 64
> bytes. For this line, we must wait for the FIFO to empty 10 times. It is
> enough to wait for THRE for each of the 64-byte blocks because we are
> only interested in refilling the FIFO at this point. Only at the very
> end (in the caller...  serial8250_console_write()) do we need to wait
> for everything to flush to the wire (TEMT).
> 
> By waiting on TEMT for each of the 64-byte blocks, we are waiting longer
> than necessary. This creates a small window where the FIFO is empty and
> there is nothing being transmitted.
> 
> I did a simple test on my beaglebone-black hardware, sending 100 lines
> of 924 bytes at 9600 bps. Since my hardware uses a 64-byte FIFO, each
> line would have 14 such windows.
> 
> And indeed, waiting for TEMT rather than only THRE for the 64-byte
> blocks resulted in an extra 30ms total transfer for all 92400
> bytes. That is about 20us lost in each window by unnecessarily waiting
> for TEMT.

Sure -- you still misunderstand me. I am still asking why do you want to 
wait for the TX machinery at the *end* (for the last 64 B of the 640 B 
line) of transmission at all? It occurs to me as wasted cycles.

thanks,
John Ogness Dec. 2, 2024, 4:41 p.m. UTC | #11
On 2024-12-02, Jiri Slaby <jirislaby@kernel.org> wrote:
> I am still asking why do you want to wait for the TX machinery at the
> *end* (for the last 64 B of the 640 B line) of transmission at all? It
> occurs to me as wasted cycles.

The printk-framework has always expected that when console->write()
returns, the data has been flushed out of the hardware. I am guessing
because it is easiest to avoid possible data loss, for example, due to
suspending hardware.

If you want me to change the current behavior, I can do that in a
separate patch. I would like this patch to only be about fixing the FIFO
timeout issue.

John
diff mbox series

Patch

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 3509af7dc52b..adc48eeeac2b 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2059,11 +2059,12 @@  static void serial8250_break_ctl(struct uart_port *port, int break_state)
 	serial8250_rpm_put(up);
 }
 
-static void wait_for_lsr(struct uart_8250_port *up, int bits)
+/* Returns true if @bits were set, false on timeout. */
+static bool wait_for_lsr(struct uart_8250_port *up, int bits)
 {
 	unsigned int status, tmout = 10000;
 
-	/* Wait up to 10ms for the character(s) to be sent. */
+	/* Wait up to 10ms for the character to be sent. */
 	for (;;) {
 		status = serial_lsr_in(up);
 
@@ -2074,10 +2075,13 @@  static void wait_for_lsr(struct uart_8250_port *up, int bits)
 		udelay(1);
 		touch_nmi_watchdog();
 	}
+
+	return (tmout != 0);
 }
 
 /*
  *	Wait for transmitter & holding register to empty
+ *	with timeout
  */
 static void wait_for_xmitr(struct uart_8250_port *up, int bits)
 {
@@ -3306,13 +3310,18 @@  static void serial8250_console_restore(struct uart_8250_port *up)
 static void serial8250_console_fifo_write(struct uart_8250_port *up,
 					  const char *s, unsigned int count)
 {
-	int i;
 	const char *end = s + count;
 	unsigned int fifosize = up->tx_loadsz;
+	unsigned int tx_count = 0;
 	bool cr_sent = false;
+	unsigned int i;
 
 	while (s != end) {
-		wait_for_lsr(up, UART_LSR_THRE);
+		/* Allow timeout for each byte of a possibly full FIFO. */
+		for (i = 0; i < fifosize; i++) {
+			if (wait_for_lsr(up, UART_LSR_THRE))
+				break;
+		}
 
 		for (i = 0; i < fifosize && s != end; ++i) {
 			if (*s == '\n' && !cr_sent) {
@@ -3323,6 +3332,13 @@  static void serial8250_console_fifo_write(struct uart_8250_port *up,
 				cr_sent = false;
 			}
 		}
+		tx_count = i;
+	}
+
+	/* Allow timeout for each byte written. */
+	for (i = 0; i < tx_count; i++) {
+		if (wait_for_lsr(up, UART_LSR_THRE))
+			break;
 	}
 }