mbox series

[net-next,0/2] tcp: exponential backoff in tcp_send_ack()

Message ID 20200930125457.1579469-1-eric.dumazet@gmail.com
Headers show
Series tcp: exponential backoff in tcp_send_ack() | expand

Message

Eric Dumazet Sept. 30, 2020, 12:54 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

We had outages caused by repeated skb allocation failures in tcp_send_ack()

It is time to add exponential backoff to reduce number of attempts.
Before doing so, first patch removes icsk_ack.blocked to make
room for a new field (icsk_ack.retry)

Eric Dumazet (2):
  inet: remove icsk_ack.blocked
  tcp: add exponential backoff in __tcp_send_ack()

 include/net/inet_connection_sock.h |  5 +++--
 net/dccp/timer.c                   |  1 -
 net/ipv4/inet_connection_sock.c    |  2 +-
 net/ipv4/tcp.c                     |  6 ++----
 net/ipv4/tcp_output.c              | 18 ++++++++++--------
 net/ipv4/tcp_timer.c               |  1 -
 6 files changed, 16 insertions(+), 17 deletions(-)

Comments

Soheil Hassas Yeganeh Sept. 30, 2020, 1:45 p.m. UTC | #1
On Wed, Sep 30, 2020 at 8:55 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> From: Eric Dumazet <edumazet@google.com>
>
> TCP has been using it to work around the possibility of tcp_delack_timer()
> finding the socket owned by user.
>
> After commit 6f458dfb4092 ("tcp: improve latencies of timer triggered events")
> we added TCP_DELACK_TIMER_DEFERRED atomic bit for more immediate recovery,
> so we can get rid of icsk_ack.blocked
>
> This frees space that following patch will reuse.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Acked-by: Soheil Hassas Yeganeh <soheil@google.com>

> ---
>  include/net/inet_connection_sock.h | 4 ++--
>  net/dccp/timer.c                   | 1 -
>  net/ipv4/inet_connection_sock.c    | 2 +-
>  net/ipv4/tcp.c                     | 6 ++----
>  net/ipv4/tcp_output.c              | 7 ++-----
>  net/ipv4/tcp_timer.c               | 1 -
>  6 files changed, 7 insertions(+), 14 deletions(-)
>
> diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
> index dc763ca9413cc9c6279a59f9d1776cf2dbb1e853..79875f976190750819948425e63dd0309c699050 100644
> --- a/include/net/inet_connection_sock.h
> +++ b/include/net/inet_connection_sock.h
> @@ -110,7 +110,7 @@ struct inet_connection_sock {
>                 __u8              pending;       /* ACK is pending                         */
>                 __u8              quick;         /* Scheduled number of quick acks         */
>                 __u8              pingpong;      /* The session is interactive             */
> -               __u8              blocked;       /* Delayed ACK was blocked by socket lock */
> +               /* one byte hole. */
>                 __u32             ato;           /* Predicted tick of soft clock           */
>                 unsigned long     timeout;       /* Currently scheduled timeout            */
>                 __u32             lrcvtime;      /* timestamp of last received data packet */
> @@ -198,7 +198,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
>                 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
>  #endif
>         } else if (what == ICSK_TIME_DACK) {
> -               icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
> +               icsk->icsk_ack.pending = 0;
>  #ifdef INET_CSK_CLEAR_TIMERS
>                 sk_stop_timer(sk, &icsk->icsk_delack_timer);
>  #endif
> diff --git a/net/dccp/timer.c b/net/dccp/timer.c
> index 927c796d76825439a35c4deb3fb2e45e4313f9b3..a934d293237366aeca87bd3c32241880639291c5 100644
> --- a/net/dccp/timer.c
> +++ b/net/dccp/timer.c
> @@ -176,7 +176,6 @@ static void dccp_delack_timer(struct timer_list *t)
>         bh_lock_sock(sk);
>         if (sock_owned_by_user(sk)) {
>                 /* Try again later. */
> -               icsk->icsk_ack.blocked = 1;
>                 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
>                 sk_reset_timer(sk, &icsk->icsk_delack_timer,
>                                jiffies + TCP_DELACK_MIN);
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index b457dd2d6c75b2f63bc7849474ac909adb14d603..4148f5f78f313cde1e0596b9eb3696df16e3f990 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -564,7 +564,7 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
>  {
>         struct inet_connection_sock *icsk = inet_csk(sk);
>
> -       icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
> +       icsk->icsk_pending = icsk->icsk_ack.pending = 0;
>
>         sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
>         sk_stop_timer(sk, &icsk->icsk_delack_timer);
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 2a8bfa89a5159837e3687e4e0f8cddba7fe54899..ed2805564424a90f003eed867bbed7f5ac4ae833 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1538,10 +1538,8 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
>
>         if (inet_csk_ack_scheduled(sk)) {
>                 const struct inet_connection_sock *icsk = inet_csk(sk);
> -                  /* Delayed ACKs frequently hit locked sockets during bulk
> -                   * receive. */
> -               if (icsk->icsk_ack.blocked ||
> -                   /* Once-per-two-segments ACK was not sent by tcp_input.c */
> +
> +               if (/* Once-per-two-segments ACK was not sent by tcp_input.c */
>                     tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
>                     /*
>                      * If this read emptied read buffer, we send ACK, if
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 386978dcd318d84af486d0d1a5bb1786f4a493cf..6bd4e383030ea20441332a30e98fbda8cd90f84a 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -3911,11 +3911,8 @@ void tcp_send_delayed_ack(struct sock *sk)
>
>         /* Use new timeout only if there wasn't a older one earlier. */
>         if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
> -               /* If delack timer was blocked or is about to expire,
> -                * send ACK now.
> -                */
> -               if (icsk->icsk_ack.blocked ||
> -                   time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
> +               /* If delack timer is about to expire, send ACK now. */
> +               if (time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
>                         tcp_send_ack(sk);
>                         return;
>                 }
> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> index 0c08c420fbc21a98dedf72148ea2a6f85bf3ff7a..6c62b9ea1320d9bbd26ed86b9f41de02fee6c491 100644
> --- a/net/ipv4/tcp_timer.c
> +++ b/net/ipv4/tcp_timer.c
> @@ -331,7 +331,6 @@ static void tcp_delack_timer(struct timer_list *t)
>         if (!sock_owned_by_user(sk)) {
>                 tcp_delack_timer_handler(sk);
>         } else {
> -               icsk->icsk_ack.blocked = 1;
>                 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
>                 /* deleguate our work to tcp_release_cb() */
>                 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
> --
> 2.28.0.806.g8561365e88-goog
>
David Miller Sept. 30, 2020, 9:21 p.m. UTC | #2
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 30 Sep 2020 05:54:55 -0700

> We had outages caused by repeated skb allocation failures in tcp_send_ack()
> 
> It is time to add exponential backoff to reduce number of attempts.
> Before doing so, first patch removes icsk_ack.blocked to make
> room for a new field (icsk_ack.retry)

Series applied, thanks Eric.