diff mbox series

migration/multifd: fix hangup with TLS-Multifd due to blocking handshake

Message ID 1604643893-8223-1-git-send-email-zhengchuan@huawei.com
State Accepted
Commit a1af605bd5ade1a6dd571f553a6746b97f3d6869
Headers show
Series migration/multifd: fix hangup with TLS-Multifd due to blocking handshake | expand

Commit Message

Zheng Chuan Nov. 6, 2020, 6:24 a.m. UTC
The qemu main loop could hang up forever when we enable TLS+Multifd.
The Src multifd_send_0 invokes tls handshake, it sends hello to sever
and wait response.
However, the Dst main qemu loop has been waiting recvmsg() for multifd_recv_1.
Both of Src and Dst main qemu loop are blocking and waiting for reponse which
results in hanging up forever.

Src: (multifd_send_0)                                              Dst: (multifd_recv_1)
multifd_channel_connect                                            migration_channel_process_incoming
  multifd_tls_channel_connect                                        migration_tls_channel_process_incoming
    multifd_tls_channel_connect                                        qio_channel_tls_handshake_task
       qio_channel_tls_handshake                                         gnutls_handshake
          qio_channel_tls_handshake_task                                       ...
            qcrypto_tls_session_handshake                                      ...
              gnutls_handshake                                                 ...
                   ...                                                         ...
                recvmsg (Blocking I/O waiting for response)                recvmsg (Blocking I/O waiting for response)

Fix this by offloadinig handshake work to a background thread.

Reported-by: Yan Jin <jinyan12@huawei.com>
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
---
 migration/multifd.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

Comments

Dr. David Alan Gilbert Nov. 12, 2020, 2:35 p.m. UTC | #1
* Chuan Zheng (zhengchuan@huawei.com) wrote:
> The qemu main loop could hang up forever when we enable TLS+Multifd.

> The Src multifd_send_0 invokes tls handshake, it sends hello to sever

> and wait response.

> However, the Dst main qemu loop has been waiting recvmsg() for multifd_recv_1.

> Both of Src and Dst main qemu loop are blocking and waiting for reponse which

> results in hanging up forever.

> 

> Src: (multifd_send_0)                                              Dst: (multifd_recv_1)

> multifd_channel_connect                                            migration_channel_process_incoming

>   multifd_tls_channel_connect                                        migration_tls_channel_process_incoming

>     multifd_tls_channel_connect                                        qio_channel_tls_handshake_task

>        qio_channel_tls_handshake                                         gnutls_handshake

>           qio_channel_tls_handshake_task                                       ...

>             qcrypto_tls_session_handshake                                      ...

>               gnutls_handshake                                                 ...

>                    ...                                                         ...

>                 recvmsg (Blocking I/O waiting for response)                recvmsg (Blocking I/O waiting for response)

> 

> Fix this by offloadinig handshake work to a background thread.

> 

> Reported-by: Yan Jin <jinyan12@huawei.com>

> Suggested-by: Daniel P. Berrangé <berrange@redhat.com>

> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>


Queued

> ---

>  migration/multifd.c | 23 +++++++++++++++++------

>  1 file changed, 17 insertions(+), 6 deletions(-)

> 

> diff --git a/migration/multifd.c b/migration/multifd.c

> index 68b171f..88486b9 100644

> --- a/migration/multifd.c

> +++ b/migration/multifd.c

> @@ -739,6 +739,19 @@ static void multifd_tls_outgoing_handshake(QIOTask *task,

>      multifd_channel_connect(p, ioc, err);

>  }

>  

> +static void *multifd_tls_handshake_thread(void *opaque)

> +{

> +    MultiFDSendParams *p = opaque;

> +    QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c);

> +

> +    qio_channel_tls_handshake(tioc,

> +                              multifd_tls_outgoing_handshake,

> +                              p,

> +                              NULL,

> +                              NULL);

> +    return NULL;

> +}

> +

>  static void multifd_tls_channel_connect(MultiFDSendParams *p,

>                                          QIOChannel *ioc,

>                                          Error **errp)

> @@ -754,12 +767,10 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p,

>  

>      trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname);

>      qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing");

> -    qio_channel_tls_handshake(tioc,

> -                              multifd_tls_outgoing_handshake,

> -                              p,

> -                              NULL,

> -                              NULL);

> -

> +    p->c = QIO_CHANNEL(tioc);

> +    qemu_thread_create(&p->thread, "multifd-tls-handshake-worker",

> +                       multifd_tls_handshake_thread, p,

> +                       QEMU_THREAD_JOINABLE);

>  }

>  

>  static bool multifd_channel_connect(MultiFDSendParams *p,

> -- 

> 1.8.3.1

> 

> 

-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/migration/multifd.c b/migration/multifd.c
index 68b171f..88486b9 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -739,6 +739,19 @@  static void multifd_tls_outgoing_handshake(QIOTask *task,
     multifd_channel_connect(p, ioc, err);
 }
 
+static void *multifd_tls_handshake_thread(void *opaque)
+{
+    MultiFDSendParams *p = opaque;
+    QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c);
+
+    qio_channel_tls_handshake(tioc,
+                              multifd_tls_outgoing_handshake,
+                              p,
+                              NULL,
+                              NULL);
+    return NULL;
+}
+
 static void multifd_tls_channel_connect(MultiFDSendParams *p,
                                         QIOChannel *ioc,
                                         Error **errp)
@@ -754,12 +767,10 @@  static void multifd_tls_channel_connect(MultiFDSendParams *p,
 
     trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname);
     qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing");
-    qio_channel_tls_handshake(tioc,
-                              multifd_tls_outgoing_handshake,
-                              p,
-                              NULL,
-                              NULL);
-
+    p->c = QIO_CHANNEL(tioc);
+    qemu_thread_create(&p->thread, "multifd-tls-handshake-worker",
+                       multifd_tls_handshake_thread, p,
+                       QEMU_THREAD_JOINABLE);
 }
 
 static bool multifd_channel_connect(MultiFDSendParams *p,