diff mbox series

[v2,4/4] selftests/mm: Fix test result reporting in gup_longterm

Message ID 20250527-selftests-mm-cow-dedupe-v2-4-ff198df8e38e@kernel.org
State New
Headers show
Series selftests/mm: cow and gup_longterm cleanups | expand

Commit Message

Mark Brown May 27, 2025, 4:04 p.m. UTC
The kselftest framework uses the string logged when a test result is
reported as the unique identifier for a test, using it to track test
results between runs. The gup_longterm test fails to follow this
pattern, it runs a single test function repeatedly with various
parameters but each result report is a string logging an error message
which is fixed between runs.

Since the code already logs each test uniquely before it starts refactor
to also print this to a buffer, then use that name as the test result.
This isn't especially pretty but is relatively straightforward and is a
great help to tooling.

Signed-off-by: Mark Brown <broonie@kernel.org>
---
 tools/testing/selftests/mm/gup_longterm.c | 150 +++++++++++++++++++-----------
 1 file changed, 94 insertions(+), 56 deletions(-)

Comments

Mark Brown June 3, 2025, 1:05 p.m. UTC | #1
On Tue, Jun 03, 2025 at 02:36:07PM +0200, David Hildenbrand wrote:
> On 27.05.25 18:04, Mark Brown wrote:

> > +	int result = KSFT_PASS;
> >   	int ret;
> > +	if (fd < 0) {
> > +		result = KSFT_FAIL;
> > +		goto report;
> > +	}

> Not a fan of that, especially as it suddenly converts
> ksft_test_result_skip() -- e.g., on the memfd path -- to KSFT_FAIL.

It looks like the memfd path was an outlier here, the others all failed
if they couldn't allocate the FD.

> Can we just do the log_test_result(KSFT_FAIL/KSFT_SKIP) in the caller?

Nothing stopping that, or doing it for the cases where we want to skip.
IIRC this simplified some of the other callers a little.
Lorenzo Stoakes June 5, 2025, 4 p.m. UTC | #2
This seems to be causing tests to fail rather than be skipped if hugetlb
isn't configured. I bisected the problem to this patch so it's definitely
changed how things are handled (though of course it might just be
_revealing_ some previously existing bug in this test...).

Using a couple of tests as an example:

Before this patch:

# [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
ok 39 # SKIP memfd_create() failed (Cannot allocate memory)
# [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
ok 40 # SKIP memfd_create() failed (Cannot allocate memory)

After:

# [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
# memfd_create() failed (Cannot allocate memory)
not ok 39 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
# [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
# memfd_create() failed (Cannot allocate memory)
not ok 40 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)

See below, I think I've found where it happens...

On Tue, May 27, 2025 at 05:04:48PM +0100, Mark Brown wrote:
> The kselftest framework uses the string logged when a test result is
> reported as the unique identifier for a test, using it to track test
> results between runs. The gup_longterm test fails to follow this
> pattern, it runs a single test function repeatedly with various
> parameters but each result report is a string logging an error message
> which is fixed between runs.
>
> Since the code already logs each test uniquely before it starts refactor
> to also print this to a buffer, then use that name as the test result.
> This isn't especially pretty but is relatively straightforward and is a
> great help to tooling.
>
> Signed-off-by: Mark Brown <broonie@kernel.org>
> ---
>  tools/testing/selftests/mm/gup_longterm.c | 150 +++++++++++++++++++-----------
>  1 file changed, 94 insertions(+), 56 deletions(-)
>
> diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
> index e60e62809186..f84ea97c2543 100644
> --- a/tools/testing/selftests/mm/gup_longterm.c
> +++ b/tools/testing/selftests/mm/gup_longterm.c
> @@ -93,33 +93,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>  	__fsword_t fs_type = get_fs_type(fd);
>  	bool should_work;
>  	char *mem;
> +	int result = KSFT_PASS;
>  	int ret;
>
> +	if (fd < 0) {
> +		result = KSFT_FAIL;
> +		goto report;
> +	}
> +
>  	if (ftruncate(fd, size)) {
>  		if (errno == ENOENT) {
>  			skip_test_dodgy_fs("ftruncate()");
>  		} else {
> -			ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno));
> +			ksft_print_msg("ftruncate() failed (%s)\n",
> +				       strerror(errno));
> +			result = KSFT_FAIL;
> +			goto report;
>  		}
>  		return;
>  	}
>
>  	if (fallocate(fd, 0, 0, size)) {
> -		if (size == pagesize)
> -			ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno));
> -		else
> -			ksft_test_result_skip("need more free huge pages\n");
> -		return;
> +		if (size == pagesize) {
> +			ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
> +			result = KSFT_FAIL;
> +		} else {
> +			ksft_print_msg("need more free huge pages\n");
> +			result = KSFT_SKIP;
> +		}
> +		goto report;
>  	}
>
>  	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
>  		   shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
>  	if (mem == MAP_FAILED) {
> -		if (size == pagesize || shared)
> -			ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno));
> -		else
> -			ksft_test_result_skip("need more free huge pages\n");
> -		return;
> +		if (size == pagesize || shared) {
> +			ksft_print_msg("mmap() failed (%s)\n", strerror(errno));
> +			result = KSFT_FAIL;
> +		} else {
> +			ksft_print_msg("need more free huge pages\n");
> +			result = KSFT_SKIP;
> +		}
> +		goto report;
>  	}
>
>  	/* Fault in the page such that GUP-fast can pin it directly. */
> @@ -134,7 +149,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>  		 */
>  		ret = mprotect(mem, size, PROT_READ);
>  		if (ret) {
> -			ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno));
> +			ksft_print_msg("mprotect() failed (%s)\n", strerror(errno));
> +			result = KSFT_FAIL;
>  			goto munmap;
>  		}
>  		/* FALLTHROUGH */
> @@ -147,12 +163,14 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>  				type == TEST_TYPE_RW_FAST;
>
>  		if (gup_fd < 0) {
> -			ksft_test_result_skip("gup_test not available\n");
> +			ksft_print_msg("gup_test not available\n");
> +			result = KSFT_SKIP;
>  			break;
>  		}
>
>  		if (rw && shared && fs_is_unknown(fs_type)) {
> -			ksft_test_result_skip("Unknown filesystem\n");
> +			ksft_print_msg("Unknown filesystem\n");
> +			result = KSFT_SKIP;
>  			return;
>  		}
>  		/*
> @@ -169,14 +187,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>  		args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
>  		ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
>  		if (ret && errno == EINVAL) {
> -			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n");
> +			ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n");
> +			result = KSFT_SKIP;
>  			break;
>  		} else if (ret && errno == EFAULT) {
> -			ksft_test_result(!should_work, "Should have failed\n");
> +			if (should_work)
> +				result = KSFT_FAIL;
> +			else
> +				result = KSFT_PASS;
>  			break;
>  		} else if (ret) {
> -			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n",
> -					      strerror(errno));
> +			ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n",
> +				       strerror(errno));
> +			result = KSFT_FAIL;
>  			break;
>  		}
>
> @@ -189,7 +212,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>  		 * some previously unsupported filesystems, we might want to
>  		 * perform some additional tests for possible data corruptions.
>  		 */
> -		ksft_test_result(should_work, "Should have worked\n");
> +		if (should_work)
> +			result = KSFT_PASS;
> +		else
> +			result = KSFT_FAIL;
>  		break;
>  	}
>  #ifdef LOCAL_CONFIG_HAVE_LIBURING
> @@ -199,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>
>  		/* io_uring always pins pages writable. */
>  		if (shared && fs_is_unknown(fs_type)) {
> -			ksft_test_result_skip("Unknown filesystem\n");
> -			return;
> +			ksft_print_msg("Unknown filesystem\n");
> +			result = KSFT_SKIP;
> +			goto report;
>  		}
>  		should_work = !shared ||
>  			      fs_supports_writable_longterm_pinning(fs_type);
> @@ -208,8 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>  		/* Skip on errors, as we might just lack kernel support. */
>  		ret = io_uring_queue_init(1, &ring, 0);
>  		if (ret < 0) {
> -			ksft_test_result_skip("io_uring_queue_init() failed (%s)\n",
> -					      strerror(-ret));
> +			ksft_print_msg("io_uring_queue_init() failed (%s)\n",
> +				       strerror(-ret));
> +			result = KSFT_SKIP;
>  			break;
>  		}
>  		/*
> @@ -222,17 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>  		/* Only new kernels return EFAULT. */
>  		if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
>  			    errno == EFAULT)) {
> -			ksft_test_result(!should_work, "Should have failed (%s)\n",
> -					 strerror(errno));
> +			if (should_work) {
> +				ksft_print_msg("Should have failed (%s)\n",
> +					       strerror(errno));
> +				result = KSFT_FAIL;
> +			} else {
> +				result = KSFT_PASS;
> +			}
>  		} else if (ret) {
>  			/*
>  			 * We might just lack support or have insufficient
>  			 * MEMLOCK limits.
>  			 */
> -			ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n",
> -					      strerror(-ret));
> +			ksft_print_msg("io_uring_register_buffers() failed (%s)\n",
> +				       strerror(-ret));
> +			result = KSFT_SKIP;
>  		} else {
> -			ksft_test_result(should_work, "Should have worked\n");
> +			if (should_work) {
> +				result = KSFT_PASS;
> +			} else {
> +				ksft_print_msg("Should have worked\n");
> +				result = KSFT_FAIL;
> +			}
>  			io_uring_unregister_buffers(&ring);
>  		}
>
> @@ -246,6 +285,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
>
>  munmap:
>  	munmap(mem, size);
> +report:
> +	log_test_result(result);
>  }
>
>  typedef void (*test_fn)(int fd, size_t size);
> @@ -254,13 +295,11 @@ static void run_with_memfd(test_fn fn, const char *desc)
>  {
>  	int fd;
>
> -	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
> +	log_test_start("%s ... with memfd", desc);
>
>  	fd = memfd_create("test", 0);
> -	if (fd < 0) {
> -		ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno));
> -		return;
> -	}
> +	if (fd < 0)
> +		ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
>
>  	fn(fd, pagesize);
>  	close(fd);
> @@ -271,23 +310,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc)
>  	FILE *file;
>  	int fd;
>
> -	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
> +	log_test_start("%s ... with tmpfile", desc);
>
>  	file = tmpfile();
>  	if (!file) {
> -		ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno));
> -		return;
> -	}
> -
> -	fd = fileno(file);
> -	if (fd < 0) {
> -		ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno));
> -		goto close;
> +		ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno));
> +		fd = -1;
> +	} else {
> +		fd = fileno(file);
> +		if (fd < 0) {
> +			ksft_print_msg("fileno() failed (%s)\n", strerror(errno));
> +		}
>  	}
>
>  	fn(fd, pagesize);
> -close:
> -	fclose(file);
> +
> +	if (file)
> +		fclose(file);
>  }
>
>  static void run_with_local_tmpfile(test_fn fn, const char *desc)
> @@ -295,22 +334,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc)
>  	char filename[] = __FILE__"_tmpfile_XXXXXX";
>  	int fd;
>
> -	ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
> +	log_test_start("%s ... with local tmpfile", desc);
>
>  	fd = mkstemp(filename);
> -	if (fd < 0) {
> -		ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno));
> -		return;
> -	}
> +	if (fd < 0)
> +		ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
>
>  	if (unlink(filename)) {
> -		ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno));
> -		goto close;
> +		ksft_print_msg("unlink() failed (%s)\n", strerror(errno));
> +		close(fd);
> +		fd = -1;
>  	}
>
>  	fn(fd, pagesize);
> -close:
> -	close(fd);
> +
> +	if (fd >= 0)
> +		close(fd);
>  }
>
>  static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
> @@ -319,15 +358,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
>  	int flags = MFD_HUGETLB;
>  	int fd;
>
> -	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
> +	log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
>  		       hugetlbsize / 1024);
>
>  	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
>
>  	fd = memfd_create("test", flags);
>  	if (fd < 0) {
> -		ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno));
> -		return;
> +		ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));

I think it's this change that does it (probably).

I wonder if it's worth checking for errno == ENOMEM and skipping in this
case? Or is that too general?

>  	}
>
>  	fn(fd, hugetlbsize);
>
> --
> 2.39.5
>
Mark Brown June 5, 2025, 4:15 p.m. UTC | #3
On Thu, Jun 05, 2025 at 05:00:49PM +0100, Lorenzo Stoakes wrote:

> This seems to be causing tests to fail rather than be skipped if hugetlb
> isn't configured. I bisected the problem to this patch so it's definitely
> changed how things are handled (though of course it might just be
> _revealing_ some previously existing bug in this test...).

> Using a couple of tests as an example:

> Before this patch:

> # [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
> # memfd_create() failed (Cannot allocate memory)
> not ok 39 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
> # [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
> # memfd_create() failed (Cannot allocate memory)
> not ok 40 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)

That's the thing with memfd being special and skipping on setup failure
that David mentioned, I've got a patch as part of the formatting series
I was going to send after the merge window.
Lorenzo Stoakes June 5, 2025, 4:26 p.m. UTC | #4
On Thu, Jun 05, 2025 at 05:15:51PM +0100, Mark Brown wrote:
> On Thu, Jun 05, 2025 at 05:00:49PM +0100, Lorenzo Stoakes wrote:
>
> > This seems to be causing tests to fail rather than be skipped if hugetlb
> > isn't configured. I bisected the problem to this patch so it's definitely
> > changed how things are handled (though of course it might just be
> > _revealing_ some previously existing bug in this test...).
>
> > Using a couple of tests as an example:
>
> > Before this patch:
>
> > # [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
> > # memfd_create() failed (Cannot allocate memory)
> > not ok 39 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
> > # [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
> > # memfd_create() failed (Cannot allocate memory)
> > not ok 40 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
>
> That's the thing with memfd being special and skipping on setup failure
> that David mentioned, I've got a patch as part of the formatting series
> I was going to send after the merge window.

where did he mention this?

I mean I'd argue that making a test that previously worked now fail due to how
somebody's set up their system is a reason not to merge that patch.

Better to do all of these formating fixes and maintain the _same behaviour_ then
separately tackle whether or not we should skip.

Obviously the better option would be to somehow determine if hugetlb is
available in advance (of course, theoretically somebody could come in and
reserve pages but that's not veyr likely).

Anyway, mm-new accepts patches during the merge window, one of the advantages of
having this separated out from mm-unstable, so there's no reason not to send
something now.
Mark Brown June 5, 2025, 4:42 p.m. UTC | #5
On Thu, Jun 05, 2025 at 05:26:05PM +0100, Lorenzo Stoakes wrote:
> On Thu, Jun 05, 2025 at 05:15:51PM +0100, Mark Brown wrote:

> > That's the thing with memfd being special and skipping on setup failure
> > that David mentioned, I've got a patch as part of the formatting series
> > I was going to send after the merge window.

> where did he mention this?

I can't remember off hand, sorry.

> I mean I'd argue that making a test that previously worked now fail due to how
> somebody's set up their system is a reason not to merge that patch.

Well, it's a bit late now given that this is in Linus' tree and actually
it turns out this was the only update for gup_longterm so I just rebased
it onto Linus' tree and kicked off my tests.

> Better to do all of these formating fixes and maintain the _same behaviour_ then
> separately tackle whether or not we should skip.

I'm confused, that's generally the opposite of the standard advice for
the kernel - usually it's fixes first, then deal with anything cosmetic
or new?

> Obviously the better option would be to somehow determine if hugetlb is
> available in advance (of course, theoretically somebody could come in and
> reserve pages but that's not veyr likely).

The tests do enumerate the set of available hugepage sizes at runtime
(see the loop in run_test_case()) but detect_hugetlb_page_sizes() just
looks in /sys/kernel/mm/hugepages/ for subdirectories and doesn't look
inside those directories to see if there are actually any huge pages
available for the huge page sizes advertised.  There's probably utility
in at least a version of that function that checks.
David Hildenbrand June 5, 2025, 4:48 p.m. UTC | #6
On 05.06.25 18:15, Mark Brown wrote:
> On Thu, Jun 05, 2025 at 05:00:49PM +0100, Lorenzo Stoakes wrote:
> 
>> This seems to be causing tests to fail rather than be skipped if hugetlb
>> isn't configured. I bisected the problem to this patch so it's definitely
>> changed how things are handled (though of course it might just be
>> _revealing_ some previously existing bug in this test...).
> 
>> Using a couple of tests as an example:
> 
>> Before this patch:
> 
>> # [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
>> # memfd_create() failed (Cannot allocate memory)
>> not ok 39 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
>> # [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
>> # memfd_create() failed (Cannot allocate memory)
>> not ok 40 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
> 
> That's the thing with memfd being special and skipping on setup failure
> that David mentioned, I've got a patch as part of the formatting series
> I was going to send after the merge window.

@Andew, why did this series get merged already?
David Hildenbrand June 5, 2025, 4:55 p.m. UTC | #7
On 05.06.25 18:42, Mark Brown wrote:
> On Thu, Jun 05, 2025 at 05:26:05PM +0100, Lorenzo Stoakes wrote:
>> On Thu, Jun 05, 2025 at 05:15:51PM +0100, Mark Brown wrote:
> 
>>> That's the thing with memfd being special and skipping on setup failure
>>> that David mentioned, I've got a patch as part of the formatting series
>>> I was going to send after the merge window.
> 
>> where did he mention this?
> 
> I can't remember off hand, sorry.

I assume in ... my review to patch #4?

What an unpleasant upstream experience.
Lorenzo Stoakes June 5, 2025, 5:09 p.m. UTC | #8
On Thu, Jun 05, 2025 at 05:42:55PM +0100, Mark Brown wrote:
> On Thu, Jun 05, 2025 at 05:26:05PM +0100, Lorenzo Stoakes wrote:
> > On Thu, Jun 05, 2025 at 05:15:51PM +0100, Mark Brown wrote:
>
> > > That's the thing with memfd being special and skipping on setup failure
> > > that David mentioned, I've got a patch as part of the formatting series
> > > I was going to send after the merge window.
>
> > where did he mention this?
>
> I can't remember off hand, sorry.
>

I see his reply here and I guess it's because you now determine the result at
the top level, I see you are doing this in do_test():

+       int result = KSFT_PASS;
        int ret;

+       if (fd < 0) {
+               result = KSFT_FAIL;
+               goto report;
+       }
+

And in run_with_memfd_hugetlb():

        fd = memfd_create("test", flags);
        if (fd < 0) {
-               ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno));
-               return;
+               ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
        }

So previously this would skip, now it fails.

I've not double, triple checked this is it, but seems likely!

You said you had a fix

> > I mean I'd argue that making a test that previously worked now fail due to how
> > somebody's set up their system is a reason not to merge that patch.
>
> Well, it's a bit late now given that this is in Linus' tree and actually
> it turns out this was the only update for gup_longterm so I just rebased
> it onto Linus' tree and kicked off my tests.

Ack.

>
> > Better to do all of these formating fixes and maintain the _same behaviour_ then
> > separately tackle whether or not we should skip.
>
> I'm confused, that's generally the opposite of the standard advice for
> the kernel - usually it's fixes first, then deal with anything cosmetic
> or new?

I mean the crux is that the 'cosmetic' changes also included a 'this might
break things' change.

I'm saying do the cosmetic things in _isolation_, or fix the brokenness
before doing the whole lot.

Anyway there's no point dwelling on this, let's just get a fix sorted.

>
> > Obviously the better option would be to somehow determine if hugetlb is
> > available in advance (of course, theoretically somebody could come in and
> > reserve pages but that's not veyr likely).
>
> The tests do enumerate the set of available hugepage sizes at runtime
> (see the loop in run_test_case()) but detect_hugetlb_page_sizes() just
> looks in /sys/kernel/mm/hugepages/ for subdirectories and doesn't look
> inside those directories to see if there are actually any huge pages
> available for the huge page sizes advertised.  There's probably utility
> in at least a version of that function that checks.

Right yes, I mean obviously this whole thing is a mess already that's not
your fault, and ideally we'd have some general way of looking this up
across _all_ tests and just switch things on/off accordingly.

There's a whole Pandora's box about what the tests should assume/not and
yeah. Anyway. Maybe leave it closed for now :)

Cheers, Lorenzo
Mark Brown June 5, 2025, 5:19 p.m. UTC | #9
On Thu, Jun 05, 2025 at 06:55:53PM +0200, David Hildenbrand wrote:
> On 05.06.25 18:42, Mark Brown wrote:

> > I can't remember off hand, sorry.

> I assume in ... my review to patch #4?

Oh, yeah - it's there.  I did look there but the "not a fan" bit made me
think it was one of the stylistic things as I quickly scanned through.

> What an unpleasant upstream experience.

TBH this has been a lot better than the more common failure mode with
working on selftests where people just completely ignore or are openly
dismissive about them :/ .  Probably room for a middle ground though.
David Hildenbrand June 5, 2025, 5:34 p.m. UTC | #10
On 05.06.25 19:19, Mark Brown wrote:
> On Thu, Jun 05, 2025 at 06:55:53PM +0200, David Hildenbrand wrote:
>> On 05.06.25 18:42, Mark Brown wrote:
> 
>>> I can't remember off hand, sorry.
> 
>> I assume in ... my review to patch #4?
> 
> Oh, yeah - it's there.  I did look there but the "not a fan" bit made me
> think it was one of the stylistic things as I quickly scanned through.
> 
>> What an unpleasant upstream experience.
> 
> TBH this has been a lot better than the more common failure mode with
> working on selftests where people just completely ignore or are openly
> dismissive about them :/ .  Probably room for a middle ground though.

Can we *please* limit such reworks to mechanical changes in the future?

It's just absolutely hard to spot these things during review (I did at 
least on patch #4).

And Andrew apparently just merges them -- and I am left with the feeling 
that we create more mess by "accident".

Anyhow, thanks for working on these tests ...
Mark Brown June 5, 2025, 5:38 p.m. UTC | #11
On Thu, Jun 05, 2025 at 06:09:09PM +0100, Lorenzo Stoakes wrote:
> On Thu, Jun 05, 2025 at 05:42:55PM +0100, Mark Brown wrote:

> > > Better to do all of these formating fixes and maintain the _same behaviour_ then
> > > separately tackle whether or not we should skip.

> > I'm confused, that's generally the opposite of the standard advice for
> > the kernel - usually it's fixes first, then deal with anything cosmetic
> > or new?

> I mean the crux is that the 'cosmetic' changes also included a 'this might
> break things' change.

No, the cosmetic changes are separate.  I'm just saying I have a small
bunch of stuff based on David's feedback to send out after the merge
window.

> I'm saying do the cosmetic things in _isolation_, or fix the brokenness
> before doing the whole lot.

Some subsystems will complain if you send anything that isn't urgent
during the merge window, this looked more like an "I suppose you could
configure the kernel that way" problem than a "people will routinely run
into this" one, I was expecting it (or something) to go in as a fix but
that it was safer to wait for -rc1 to send.

> > > Obviously the better option would be to somehow determine if hugetlb is
> > > available in advance (of course, theoretically somebody could come in and
> > > reserve pages but that's not veyr likely).

> > The tests do enumerate the set of available hugepage sizes at runtime
> > (see the loop in run_test_case()) but detect_hugetlb_page_sizes() just
> > looks in /sys/kernel/mm/hugepages/ for subdirectories and doesn't look
> > inside those directories to see if there are actually any huge pages
> > available for the huge page sizes advertised.  There's probably utility
> > in at least a version of that function that checks.

> Right yes, I mean obviously this whole thing is a mess already that's not
> your fault, and ideally we'd have some general way of looking this up
> across _all_ tests and just switch things on/off accordingly.

That is at least library code so it'd get the three tests that use it,
though possibly one of them actually wants the current behaviour for
some reason?

> There's a whole Pandora's box about what the tests should assume/not and
> yeah. Anyway. Maybe leave it closed for now :)

It's separate, yeah.  It'd also be good to document what you need to
enable all the tests somewhere as well - there's the config fragment
already which is good, but you also at least need a bunch of command
line options to set up huge pages and enable secretmem.
Lorenzo Stoakes June 5, 2025, 5:47 p.m. UTC | #12
Mark, I'm not finding this productive.

Bottom line is you've broken the tests, please fix them or if you're not
willing to I'll send a fix.

Thanks.

On Thu, Jun 05, 2025 at 06:38:36PM +0100, Mark Brown wrote:
> On Thu, Jun 05, 2025 at 06:09:09PM +0100, Lorenzo Stoakes wrote:
> > On Thu, Jun 05, 2025 at 05:42:55PM +0100, Mark Brown wrote:
>
> > > > Better to do all of these formating fixes and maintain the _same behaviour_ then
> > > > separately tackle whether or not we should skip.
>
> > > I'm confused, that's generally the opposite of the standard advice for
> > > the kernel - usually it's fixes first, then deal with anything cosmetic
> > > or new?
>
> > I mean the crux is that the 'cosmetic' changes also included a 'this might
> > break things' change.
>
> No, the cosmetic changes are separate.  I'm just saying I have a small
> bunch of stuff based on David's feedback to send out after the merge
> window.
>
> > I'm saying do the cosmetic things in _isolation_, or fix the brokenness
> > before doing the whole lot.
>
> Some subsystems will complain if you send anything that isn't urgent
> during the merge window, this looked more like an "I suppose you could
> configure the kernel that way" problem than a "people will routinely run
> into this" one, I was expecting it (or something) to go in as a fix but
> that it was safer to wait for -rc1 to send.
>
> > > > Obviously the better option would be to somehow determine if hugetlb is
> > > > available in advance (of course, theoretically somebody could come in and
> > > > reserve pages but that's not veyr likely).
>
> > > The tests do enumerate the set of available hugepage sizes at runtime
> > > (see the loop in run_test_case()) but detect_hugetlb_page_sizes() just
> > > looks in /sys/kernel/mm/hugepages/ for subdirectories and doesn't look
> > > inside those directories to see if there are actually any huge pages
> > > available for the huge page sizes advertised.  There's probably utility
> > > in at least a version of that function that checks.
>
> > Right yes, I mean obviously this whole thing is a mess already that's not
> > your fault, and ideally we'd have some general way of looking this up
> > across _all_ tests and just switch things on/off accordingly.
>
> That is at least library code so it'd get the three tests that use it,
> though possibly one of them actually wants the current behaviour for
> some reason?
>
> > There's a whole Pandora's box about what the tests should assume/not and
> > yeah. Anyway. Maybe leave it closed for now :)
>
> It's separate, yeah.  It'd also be good to document what you need to
> enable all the tests somewhere as well - there's the config fragment
> already which is good, but you also at least need a bunch of command
> line options to set up huge pages and enable secretmem.
Mark Brown June 5, 2025, 6:24 p.m. UTC | #13
On Thu, Jun 05, 2025 at 07:34:28PM +0200, David Hildenbrand wrote:
> On 05.06.25 19:19, Mark Brown wrote:

> > TBH this has been a lot better than the more common failure mode with
> > working on selftests where people just completely ignore or are openly
> > dismissive about them :/ .  Probably room for a middle ground though.

> Can we *please* limit such reworks to mechanical changes in the future?

Yes, that's better in general.
Mark Brown June 5, 2025, 6:29 p.m. UTC | #14
On Thu, Jun 05, 2025 at 06:47:28PM +0100, Lorenzo Stoakes wrote:

> Mark, I'm not finding this productive.

> Bottom line is you've broken the tests, please fix them or if you're not
> willing to I'll send a fix.

Sure, like I said further up I'm just running my patch through testing
at the minute.
Lorenzo Stoakes June 5, 2025, 6:35 p.m. UTC | #15
On Thu, Jun 05, 2025 at 07:29:58PM +0100, Mark Brown wrote:
> On Thu, Jun 05, 2025 at 06:47:28PM +0100, Lorenzo Stoakes wrote:
>
> > Mark, I'm not finding this productive.
>
> > Bottom line is you've broken the tests, please fix them or if you're not
> > willing to I'll send a fix.
>
> Sure, like I said further up I'm just running my patch through testing
> at the minute.

Awesome thanks, appreciate it!
Andrew Morton June 5, 2025, 8:32 p.m. UTC | #16
On Thu, 5 Jun 2025 18:48:49 +0200 David Hildenbrand <david@redhat.com> wrote:

> On 05.06.25 18:15, Mark Brown wrote:
> > On Thu, Jun 05, 2025 at 05:00:49PM +0100, Lorenzo Stoakes wrote:
> > 
> >> This seems to be causing tests to fail rather than be skipped if hugetlb
> >> isn't configured. I bisected the problem to this patch so it's definitely
> >> changed how things are handled (though of course it might just be
> >> _revealing_ some previously existing bug in this test...).
> > 
> >> Using a couple of tests as an example:
> > 
> >> Before this patch:
> > 
> >> # [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
> >> # memfd_create() failed (Cannot allocate memory)
> >> not ok 39 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (2048 kB)
> >> # [RUN] R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
> >> # memfd_create() failed (Cannot allocate memory)
> >> not ok 40 R/O longterm GUP-fast pin in MAP_PRIVATE file mapping ... with memfd hugetlb (1048576 kB)
> > 
> > That's the thing with memfd being special and skipping on setup failure
> > that David mentioned, I've got a patch as part of the formatting series
> > I was going to send after the merge window.
> 
> @Andew, why did this series get merged already?

Late merge window hastiness :(  And I saw nothing worrisome in the review.
diff mbox series

Patch

diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index e60e62809186..f84ea97c2543 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -93,33 +93,48 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 	__fsword_t fs_type = get_fs_type(fd);
 	bool should_work;
 	char *mem;
+	int result = KSFT_PASS;
 	int ret;
 
+	if (fd < 0) {
+		result = KSFT_FAIL;
+		goto report;
+	}
+
 	if (ftruncate(fd, size)) {
 		if (errno == ENOENT) {
 			skip_test_dodgy_fs("ftruncate()");
 		} else {
-			ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno));
+			ksft_print_msg("ftruncate() failed (%s)\n",
+				       strerror(errno));
+			result = KSFT_FAIL;
+			goto report;
 		}
 		return;
 	}
 
 	if (fallocate(fd, 0, 0, size)) {
-		if (size == pagesize)
-			ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno));
-		else
-			ksft_test_result_skip("need more free huge pages\n");
-		return;
+		if (size == pagesize) {
+			ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
+			result = KSFT_FAIL;
+		} else {
+			ksft_print_msg("need more free huge pages\n");
+			result = KSFT_SKIP;
+		}
+		goto report;
 	}
 
 	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
 		   shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
 	if (mem == MAP_FAILED) {
-		if (size == pagesize || shared)
-			ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno));
-		else
-			ksft_test_result_skip("need more free huge pages\n");
-		return;
+		if (size == pagesize || shared) {
+			ksft_print_msg("mmap() failed (%s)\n", strerror(errno));
+			result = KSFT_FAIL;
+		} else {
+			ksft_print_msg("need more free huge pages\n");
+			result = KSFT_SKIP;
+		}
+		goto report;
 	}
 
 	/* Fault in the page such that GUP-fast can pin it directly. */
@@ -134,7 +149,8 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 		 */
 		ret = mprotect(mem, size, PROT_READ);
 		if (ret) {
-			ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno));
+			ksft_print_msg("mprotect() failed (%s)\n", strerror(errno));
+			result = KSFT_FAIL;
 			goto munmap;
 		}
 		/* FALLTHROUGH */
@@ -147,12 +163,14 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 				type == TEST_TYPE_RW_FAST;
 
 		if (gup_fd < 0) {
-			ksft_test_result_skip("gup_test not available\n");
+			ksft_print_msg("gup_test not available\n");
+			result = KSFT_SKIP;
 			break;
 		}
 
 		if (rw && shared && fs_is_unknown(fs_type)) {
-			ksft_test_result_skip("Unknown filesystem\n");
+			ksft_print_msg("Unknown filesystem\n");
+			result = KSFT_SKIP;
 			return;
 		}
 		/*
@@ -169,14 +187,19 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 		args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
 		ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
 		if (ret && errno == EINVAL) {
-			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n");
+			ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n");
+			result = KSFT_SKIP;
 			break;
 		} else if (ret && errno == EFAULT) {
-			ksft_test_result(!should_work, "Should have failed\n");
+			if (should_work)
+				result = KSFT_FAIL;
+			else
+				result = KSFT_PASS;
 			break;
 		} else if (ret) {
-			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n",
-					      strerror(errno));
+			ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n",
+				       strerror(errno));
+			result = KSFT_FAIL;
 			break;
 		}
 
@@ -189,7 +212,10 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 		 * some previously unsupported filesystems, we might want to
 		 * perform some additional tests for possible data corruptions.
 		 */
-		ksft_test_result(should_work, "Should have worked\n");
+		if (should_work)
+			result = KSFT_PASS;
+		else
+			result = KSFT_FAIL;
 		break;
 	}
 #ifdef LOCAL_CONFIG_HAVE_LIBURING
@@ -199,8 +225,9 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 
 		/* io_uring always pins pages writable. */
 		if (shared && fs_is_unknown(fs_type)) {
-			ksft_test_result_skip("Unknown filesystem\n");
-			return;
+			ksft_print_msg("Unknown filesystem\n");
+			result = KSFT_SKIP;
+			goto report;
 		}
 		should_work = !shared ||
 			      fs_supports_writable_longterm_pinning(fs_type);
@@ -208,8 +235,9 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 		/* Skip on errors, as we might just lack kernel support. */
 		ret = io_uring_queue_init(1, &ring, 0);
 		if (ret < 0) {
-			ksft_test_result_skip("io_uring_queue_init() failed (%s)\n",
-					      strerror(-ret));
+			ksft_print_msg("io_uring_queue_init() failed (%s)\n",
+				       strerror(-ret));
+			result = KSFT_SKIP;
 			break;
 		}
 		/*
@@ -222,17 +250,28 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 		/* Only new kernels return EFAULT. */
 		if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
 			    errno == EFAULT)) {
-			ksft_test_result(!should_work, "Should have failed (%s)\n",
-					 strerror(errno));
+			if (should_work) {
+				ksft_print_msg("Should have failed (%s)\n",
+					       strerror(errno));
+				result = KSFT_FAIL;
+			} else {
+				result = KSFT_PASS;
+			}
 		} else if (ret) {
 			/*
 			 * We might just lack support or have insufficient
 			 * MEMLOCK limits.
 			 */
-			ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n",
-					      strerror(-ret));
+			ksft_print_msg("io_uring_register_buffers() failed (%s)\n",
+				       strerror(-ret));
+			result = KSFT_SKIP;
 		} else {
-			ksft_test_result(should_work, "Should have worked\n");
+			if (should_work) {
+				result = KSFT_PASS;
+			} else {
+				ksft_print_msg("Should have worked\n");
+				result = KSFT_FAIL;
+			}
 			io_uring_unregister_buffers(&ring);
 		}
 
@@ -246,6 +285,8 @@  static void do_test(int fd, size_t size, enum test_type type, bool shared)
 
 munmap:
 	munmap(mem, size);
+report:
+	log_test_result(result);
 }
 
 typedef void (*test_fn)(int fd, size_t size);
@@ -254,13 +295,11 @@  static void run_with_memfd(test_fn fn, const char *desc)
 {
 	int fd;
 
-	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+	log_test_start("%s ... with memfd", desc);
 
 	fd = memfd_create("test", 0);
-	if (fd < 0) {
-		ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno));
-		return;
-	}
+	if (fd < 0)
+		ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
 
 	fn(fd, pagesize);
 	close(fd);
@@ -271,23 +310,23 @@  static void run_with_tmpfile(test_fn fn, const char *desc)
 	FILE *file;
 	int fd;
 
-	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+	log_test_start("%s ... with tmpfile", desc);
 
 	file = tmpfile();
 	if (!file) {
-		ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno));
-		return;
-	}
-
-	fd = fileno(file);
-	if (fd < 0) {
-		ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno));
-		goto close;
+		ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno));
+		fd = -1;
+	} else {
+		fd = fileno(file);
+		if (fd < 0) {
+			ksft_print_msg("fileno() failed (%s)\n", strerror(errno));
+		}
 	}
 
 	fn(fd, pagesize);
-close:
-	fclose(file);
+
+	if (file)
+		fclose(file);
 }
 
 static void run_with_local_tmpfile(test_fn fn, const char *desc)
@@ -295,22 +334,22 @@  static void run_with_local_tmpfile(test_fn fn, const char *desc)
 	char filename[] = __FILE__"_tmpfile_XXXXXX";
 	int fd;
 
-	ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
+	log_test_start("%s ... with local tmpfile", desc);
 
 	fd = mkstemp(filename);
-	if (fd < 0) {
-		ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno));
-		return;
-	}
+	if (fd < 0)
+		ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
 
 	if (unlink(filename)) {
-		ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno));
-		goto close;
+		ksft_print_msg("unlink() failed (%s)\n", strerror(errno));
+		close(fd);
+		fd = -1;
 	}
 
 	fn(fd, pagesize);
-close:
-	close(fd);
+
+	if (fd >= 0)
+		close(fd);
 }
 
 static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
@@ -319,15 +358,14 @@  static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
 	int flags = MFD_HUGETLB;
 	int fd;
 
-	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+	log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
 		       hugetlbsize / 1024);
 
 	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
 
 	fd = memfd_create("test", flags);
 	if (fd < 0) {
-		ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno));
-		return;
+		ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
 	}
 
 	fn(fd, hugetlbsize);