diff mbox series

[PULL,3/6] qemu/osdep: Split qemu_close_all_open_fd() and add fallback

Message ID 20240805003130.1421051-5-richard.henderson@linaro.org
State New
Headers show
Series [PULL,1/6] linux-user/elfload: Fix pr_pid values in core files | expand

Commit Message

Richard Henderson Aug. 5, 2024, 12:31 a.m. UTC
From: Clément Léger <cleger@rivosinc.com>

In order to make it cleaner, split qemu_close_all_open_fd() logic into
multiple subfunctions (close with close_range(), with /proc/self/fd and
fallback).

Signed-off-by: Clément Léger <cleger@rivosinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20240802145423.3232974-3-cleger@rivosinc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 util/oslib-posix.c | 50 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 13 deletions(-)

Comments

Daniel P. Berrangé Aug. 28, 2024, 12:48 p.m. UTC | #1
This is already merged, but I have two comments - one improvement
and one bug which we should probably fix before release.

On Mon, Aug 05, 2024 at 10:31:26AM +1000, Richard Henderson wrote:
> From: Clément Léger <cleger@rivosinc.com>
> 
> In order to make it cleaner, split qemu_close_all_open_fd() logic into
> multiple subfunctions (close with close_range(), with /proc/self/fd and
> fallback).
> 
> Signed-off-by: Clément Léger <cleger@rivosinc.com>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Message-ID: <20240802145423.3232974-3-cleger@rivosinc.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  util/oslib-posix.c | 50 ++++++++++++++++++++++++++++++++++------------
>  1 file changed, 37 insertions(+), 13 deletions(-)
> 
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 1e867efa47..9b79fc7cff 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -808,27 +808,16 @@ int qemu_msync(void *addr, size_t length, int fd)
>      return msync(addr, length, MS_SYNC);
>  }
>  
> -/*
> - * Close all open file descriptors.
> - */
> -void qemu_close_all_open_fd(void)
> +static bool qemu_close_all_open_fd_proc(void)
>  {
>      struct dirent *de;
>      int fd, dfd;
>      DIR *dir;
>  
> -#ifdef CONFIG_CLOSE_RANGE
> -    int r = close_range(0, ~0U, 0);
> -    if (!r) {
> -        /* Success, no need to try other ways. */
> -        return;
> -    }
> -#endif
> -
>      dir = opendir("/proc/self/fd");

IIUC from previous threads this is valid on Linux and on Solaris.

On FreeBSD & macOS, you need /dev/fd though.

>      if (!dir) {
>          /* If /proc is not mounted, there is nothing that can be done. */
> -        return;
> +        return false;
>      }
>      /* Avoid closing the directory. */
>      dfd = dirfd(dir);
> @@ -840,4 +829,39 @@ void qemu_close_all_open_fd(void)
>          }
>      }
>      closedir(dir);
> +
> +    return true;
> +}
> +
> +static bool qemu_close_all_open_fd_close_range(void)
> +{
> +#ifdef CONFIG_CLOSE_RANGE
> +    int r = close_range(0, ~0U, 0);
> +    if (!r) {
> +        /* Success, no need to try other ways. */
> +        return true;
> +    }
> +#endif
> +    return false;
> +}
> +
> +static void qemu_close_all_open_fd_fallback(void)
> +{
> +    int open_max = sysconf(_SC_OPEN_MAX), i;
> +
> +    /* Fallback */
> +    for (i = 0; i < open_max; i++) {
> +        close(i);
> +    }

I'm told that sysconf(_SC_OPEN_MAX) returns -1 on some versions of
macOS. "Luckily" since we assigned to 'int' rather than 'unsigned int'
this will result in us not closing any FDs in this fallback path,
rather than trying to close several billion FDs (an effective hang).

If _SC_OPEN_MAX returns -1, we should fallback to the OPEN_MAX
constant on macOS (see commit de448e0f26e710e9d2b7fc91393c40ac24b75847
which tackled a similar issue wrt getrlimit), and fallback to perhaps
a hardcoded 1024 on non-macOS.


> +}
> +
> +/*
> + * Close all open file descriptors.
> + */
> +void qemu_close_all_open_fd(void)
> +{
> +    if (!qemu_close_all_open_fd_close_range() &&
> +        !qemu_close_all_open_fd_proc()) {
> +        qemu_close_all_open_fd_fallback();
> +    }
>  }
> -- 
> 2.43.0
> 
> 

With regards,
Daniel

[1] https://github.com/open-mpi/ompi/issues/10358
Clément Léger Aug. 28, 2024, 1:09 p.m. UTC | #2
On 28/08/2024 14:48, Daniel P. Berrangé wrote:
> This is already merged, but I have two comments - one improvement
> and one bug which we should probably fix before release.
> 
> On Mon, Aug 05, 2024 at 10:31:26AM +1000, Richard Henderson wrote:
>> From: Clément Léger <cleger@rivosinc.com>
>>
>> In order to make it cleaner, split qemu_close_all_open_fd() logic into
>> multiple subfunctions (close with close_range(), with /proc/self/fd and
>> fallback).
>>
>> Signed-off-by: Clément Léger <cleger@rivosinc.com>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> Message-ID: <20240802145423.3232974-3-cleger@rivosinc.com>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>  util/oslib-posix.c | 50 ++++++++++++++++++++++++++++++++++------------
>>  1 file changed, 37 insertions(+), 13 deletions(-)
>>
>> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
>> index 1e867efa47..9b79fc7cff 100644
>> --- a/util/oslib-posix.c
>> +++ b/util/oslib-posix.c
>> @@ -808,27 +808,16 @@ int qemu_msync(void *addr, size_t length, int fd)
>>      return msync(addr, length, MS_SYNC);
>>  }
>>  
>> -/*
>> - * Close all open file descriptors.
>> - */
>> -void qemu_close_all_open_fd(void)
>> +static bool qemu_close_all_open_fd_proc(void)
>>  {
>>      struct dirent *de;
>>      int fd, dfd;
>>      DIR *dir;
>>  
>> -#ifdef CONFIG_CLOSE_RANGE
>> -    int r = close_range(0, ~0U, 0);
>> -    if (!r) {
>> -        /* Success, no need to try other ways. */
>> -        return;
>> -    }
>> -#endif
>> -
>>      dir = opendir("/proc/self/fd");
> 
> IIUC from previous threads this is valid on Linux and on Solaris.
> 
> On FreeBSD & macOS, you need /dev/fd though.

Acked.

> 
>>      if (!dir) {
>>          /* If /proc is not mounted, there is nothing that can be done. */
>> -        return;
>> +        return false;
>>      }
>>      /* Avoid closing the directory. */
>>      dfd = dirfd(dir);
>> @@ -840,4 +829,39 @@ void qemu_close_all_open_fd(void)
>>          }
>>      }
>>      closedir(dir);
>> +
>> +    return true;
>> +}
>> +
>> +static bool qemu_close_all_open_fd_close_range(void)
>> +{
>> +#ifdef CONFIG_CLOSE_RANGE
>> +    int r = close_range(0, ~0U, 0);
>> +    if (!r) {
>> +        /* Success, no need to try other ways. */
>> +        return true;
>> +    }
>> +#endif
>> +    return false;
>> +}
>> +
>> +static void qemu_close_all_open_fd_fallback(void)
>> +{
>> +    int open_max = sysconf(_SC_OPEN_MAX), i;
>> +
>> +    /* Fallback */
>> +    for (i = 0; i < open_max; i++) {
>> +        close(i);
>> +    }
> 
> I'm told that sysconf(_SC_OPEN_MAX) returns -1 on some versions of
> macOS. "Luckily" since we assigned to 'int' rather than 'unsigned int'
> this will result in us not closing any FDs in this fallback path,
> rather than trying to close several billion FDs (an effective hang).
> 
> If _SC_OPEN_MAX returns -1, we should fallback to the OPEN_MAX
> constant on macOS (see commit de448e0f26e710e9d2b7fc91393c40ac24b75847
> which tackled a similar issue wrt getrlimit), and fallback to perhaps
> a hardcoded 1024 on non-macOS.

Thanks for catching this, I can submit these fixes except if you already
prepared something though.

Clément

> 
> 
>> +}
>> +
>> +/*
>> + * Close all open file descriptors.
>> + */
>> +void qemu_close_all_open_fd(void)
>> +{
>> +    if (!qemu_close_all_open_fd_close_range() &&
>> +        !qemu_close_all_open_fd_proc()) {
>> +        qemu_close_all_open_fd_fallback();
>> +    }
>>  }
>> -- 
>> 2.43.0
>>
>>
> 
> With regards,
> Daniel
> 
> [1] https://github.com/open-mpi/ompi/issues/10358
Richard Henderson Aug. 28, 2024, 10:47 p.m. UTC | #3
On 8/28/24 22:48, Daniel P. Berrangé wrote:
>>       dir = opendir("/proc/self/fd");
> 
> IIUC from previous threads this is valid on Linux and on Solaris.
> 
> On FreeBSD & macOS, you need /dev/fd though.

Fair, but importantly, it doesn't do anything *incorrect* those systems: it merely skips 
this method with ENOENT.

>> +    int open_max = sysconf(_SC_OPEN_MAX), i;
>> +
>> +    /* Fallback */
>> +    for (i = 0; i < open_max; i++) {
>> +        close(i);
>> +    }
> 
> I'm told that sysconf(_SC_OPEN_MAX) returns -1 on some versions of
> macOS. "Luckily" since we assigned to 'int' rather than 'unsigned int'
> this will result in us not closing any FDs in this fallback path,
> rather than trying to close several billion FDs (an effective hang).

Ouch.

> 
> If _SC_OPEN_MAX returns -1, we should fallback to the OPEN_MAX
> constant on macOS (see commit de448e0f26e710e9d2b7fc91393c40ac24b75847
> which tackled a similar issue wrt getrlimit), and fallback to perhaps
> a hardcoded 1024 on non-macOS.

I wish the timing on this had been better -- 25 minutes earlier and I would have delayed rc4.

Since macOS simply doesn't close fds, I'm of a mind to release 9.1.0 without this, and fix 
it for 9.1.1.  Thoughts?


r~
Daniel P. Berrangé Aug. 29, 2024, 7:14 a.m. UTC | #4
On Thu, Aug 29, 2024 at 08:47:27AM +1000, Richard Henderson wrote:
> On 8/28/24 22:48, Daniel P. Berrangé wrote:
> > >       dir = opendir("/proc/self/fd");
> > 
> > IIUC from previous threads this is valid on Linux and on Solaris.
> > 
> > On FreeBSD & macOS, you need /dev/fd though.
> 
> Fair, but importantly, it doesn't do anything *incorrect* those systems: it
> merely skips this method with ENOENT.
> 
> > > +    int open_max = sysconf(_SC_OPEN_MAX), i;
> > > +
> > > +    /* Fallback */
> > > +    for (i = 0; i < open_max; i++) {
> > > +        close(i);
> > > +    }
> > 
> > I'm told that sysconf(_SC_OPEN_MAX) returns -1 on some versions of
> > macOS. "Luckily" since we assigned to 'int' rather than 'unsigned int'
> > this will result in us not closing any FDs in this fallback path,
> > rather than trying to close several billion FDs (an effective hang).
> 
> Ouch.
> 
> > 
> > If _SC_OPEN_MAX returns -1, we should fallback to the OPEN_MAX
> > constant on macOS (see commit de448e0f26e710e9d2b7fc91393c40ac24b75847
> > which tackled a similar issue wrt getrlimit), and fallback to perhaps
> > a hardcoded 1024 on non-macOS.
> 
> I wish the timing on this had been better -- 25 minutes earlier and I would have delayed rc4.
> 
> Since macOS simply doesn't close fds, I'm of a mind to release 9.1.0 without
> this, and fix it for 9.1.1.  Thoughts?

The original net/tap.c code for closing FDs has the same bug, so in that
area we do NOT have a regression.

The original async teardown code would fail to close FDs as it is looking
for close_range or /proc/$PID/fd, and has no sysconf fallback, so again
no regression there.

IOW, I think this is acceptable to fix in 9.1 stable.

With regards,
Daniel
diff mbox series

Patch

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 1e867efa47..9b79fc7cff 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -808,27 +808,16 @@  int qemu_msync(void *addr, size_t length, int fd)
     return msync(addr, length, MS_SYNC);
 }
 
-/*
- * Close all open file descriptors.
- */
-void qemu_close_all_open_fd(void)
+static bool qemu_close_all_open_fd_proc(void)
 {
     struct dirent *de;
     int fd, dfd;
     DIR *dir;
 
-#ifdef CONFIG_CLOSE_RANGE
-    int r = close_range(0, ~0U, 0);
-    if (!r) {
-        /* Success, no need to try other ways. */
-        return;
-    }
-#endif
-
     dir = opendir("/proc/self/fd");
     if (!dir) {
         /* If /proc is not mounted, there is nothing that can be done. */
-        return;
+        return false;
     }
     /* Avoid closing the directory. */
     dfd = dirfd(dir);
@@ -840,4 +829,39 @@  void qemu_close_all_open_fd(void)
         }
     }
     closedir(dir);
+
+    return true;
+}
+
+static bool qemu_close_all_open_fd_close_range(void)
+{
+#ifdef CONFIG_CLOSE_RANGE
+    int r = close_range(0, ~0U, 0);
+    if (!r) {
+        /* Success, no need to try other ways. */
+        return true;
+    }
+#endif
+    return false;
+}
+
+static void qemu_close_all_open_fd_fallback(void)
+{
+    int open_max = sysconf(_SC_OPEN_MAX), i;
+
+    /* Fallback */
+    for (i = 0; i < open_max; i++) {
+        close(i);
+    }
+}
+
+/*
+ * Close all open file descriptors.
+ */
+void qemu_close_all_open_fd(void)
+{
+    if (!qemu_close_all_open_fd_close_range() &&
+        !qemu_close_all_open_fd_proc()) {
+        qemu_close_all_open_fd_fallback();
+    }
 }