Message ID | 20230523074535.249802-15-hch@lst.de |
---|---|
State | Superseded |
Headers | show |
Series | [01/24] driver core: return bool from driver_probe_done | expand |
Hi, On Tue, May 23, 2023 at 09:45:25AM +0200, Christoph Hellwig wrote: > Instead of only clearing root_wait in devt_from_partuuid when the UUID > format was invalid, do that in parse_root_device for all strings that > failed to parse. > > Signed-off-by: Christoph Hellwig <hch@lst.de> In linux-next, almost all of my boot tests from usb drives as well as a few others fail with "Disabling rootwait; root= is invalid." in the log. Bisect points to this patch. It can not easily be reverted, but the following change fixes the problem. --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -413,10 +413,12 @@ static dev_t __init parse_root_device(char *root_device_name) error = early_lookup_bdev(root_device_name, &dev); if (error) { +#if 0 if (error == -EINVAL && root_wait) { pr_err("Disabling rootwait; root= is invalid.\n"); root_wait = 0; } +#endif return 0; } return dev; Debugging shows that early_lookup_bdev() indeed returns -EINVAL. Looking into it further, it turns out that devt_from_devname() returns -EINVAL for root devices such as root=/dev/sda if the device is not found, making it impossible to rootwait for such a device (this might for example be a raw USB drive without partitions, or any qemu drive with format=raw). Guenter --- # bad: [15e71592dbae49a674429c618a10401d7f992ac3] Add linux-next specific files for 20230621 # good: [45a3e24f65e90a047bef86f927ebdc4c710edaa1] Linux 6.4-rc7 git bisect start 'HEAD' 'v6.4-rc7' # good: [e867e67cd55ae460c860ffd896c7fc96add2821c] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git git bisect good e867e67cd55ae460c860ffd896c7fc96add2821c # bad: [0ab4015a11182e2a19c3dd52db85418f370cef39] Merge branch 'for-next' of git://git.kernel.dk/linux-block.git git bisect bad 0ab4015a11182e2a19c3dd52db85418f370cef39 # good: [901bdf5ea1a836400ee69aa32b04e9c209271ec7] Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next git bisect good 901bdf5ea1a836400ee69aa32b04e9c209271ec7 # good: [07164956fbc26eff280f3a044a489460ae36413c] Merge branch 'for-next' of https://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394.git git bisect good 07164956fbc26eff280f3a044a489460ae36413c # good: [3067e020d361ed346957eb5e253911f7a3e18f59] add snd_soc_{of_}get_dlc() git bisect good 3067e020d361ed346957eb5e253911f7a3e18f59 # bad: [0dbbd269fb6a8799f312dfc9b1ae1244a144cfc6] Merge branch 'for-6.5/block' into for-next git bisect bad 0dbbd269fb6a8799f312dfc9b1ae1244a144cfc6 # good: [6c500000af037f74b66dd01b565c8ee1b501cc1b] block: mark bio_add_folio as __must_check git bisect good 6c500000af037f74b66dd01b565c8ee1b501cc1b # bad: [1a0ddd56e545b743af510b5a1b8dbdfe7d35cd3b] pktcdvd: replace sscanf() by kstrtoul() git bisect bad 1a0ddd56e545b743af510b5a1b8dbdfe7d35cd3b # good: [e3102722ffe77094ba9e7e46380792b3dd8a7abd] init: rename mount_block_root to mount_root_generic git bisect good e3102722ffe77094ba9e7e46380792b3dd8a7abd # bad: [d4a28d7defe79006e59293a4b43d518ba8483fb0] dm: remove dm_get_dev_t git bisect bad d4a28d7defe79006e59293a4b43d518ba8483fb0 # good: [c0c1a7dcb6f5db4500e6574294674213bc24940c] init: move the nfs/cifs/ram special cases out of name_to_dev_t git bisect good c0c1a7dcb6f5db4500e6574294674213bc24940c # bad: [702f3189e454b3c3c2f3c99dbf30acf41aab707c] block: move the code to do early boot lookup of block devices to block/ git bisect bad 702f3189e454b3c3c2f3c99dbf30acf41aab707c # bad: [079caa35f7863cd9958b4555ae873ea4d352a502] init: clear root_wait on all invalid root= strings git bisect bad 079caa35f7863cd9958b4555ae873ea4d352a502 # good: [cf056a43121559d3642419917d405c3237ded90a] init: improve the name_to_dev_t interface git bisect good cf056a43121559d3642419917d405c3237ded90a # first bad commit: [079caa35f7863cd9958b4555ae873ea4d352a502] init: clear root_wait on all invalid root= strings
On Wed, Jun 21, 2023 at 02:07:13PM -0700, Guenter Roeck wrote: > On Tue, May 23, 2023 at 09:45:25AM +0200, Christoph Hellwig wrote: > > Instead of only clearing root_wait in devt_from_partuuid when the UUID > > format was invalid, do that in parse_root_device for all strings that > > failed to parse. > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > In linux-next, almost all of my boot tests from usb drives as well > as a few others fail with "Disabling rootwait; root= is invalid." > in the log. Bisect points to this patch. Can you send such a log, and the command line you've used?
On 6/21/23 20:51, Christoph Hellwig wrote: > On Wed, Jun 21, 2023 at 02:07:13PM -0700, Guenter Roeck wrote: >> On Tue, May 23, 2023 at 09:45:25AM +0200, Christoph Hellwig wrote: >>> Instead of only clearing root_wait in devt_from_partuuid when the UUID >>> format was invalid, do that in parse_root_device for all strings that >>> failed to parse. >>> >>> Signed-off-by: Christoph Hellwig <hch@lst.de> >> >> In linux-next, almost all of my boot tests from usb drives as well >> as a few others fail with "Disabling rootwait; root= is invalid." >> in the log. Bisect points to this patch. > > Can you send such a log, and the command line you've used? > There are lots of logs at https://kerneltests.org/builders, in the 'next' column of qemu tests. One example is https://kerneltests.org/builders/qemu-arm-v7-next/builds/511/steps/qemubuildcommand/logs/stdio Sample command line: qemu-system-arm -M mcimx7d-sabre \ -kernel arch/arm/boot/zImage -no-reboot -snapshot \ -usb -device usb-storage,drive=d0,bus=usb-bus.1 \ -drive file=rootfs-armv7a.ext2,if=none,id=d0,format=raw \ -m 256 -nic user -display none \ --append "root=/dev/sda rootwait earlycon=ec_imx6q,mmio,0x30860000,115200n8 console=ttymxc0,115200" \ -dtb arch/arm/boot/dts/imx7d-sdb.dtb \ -nographic -monitor null -serial stdio This is with a modified imx_v6_v7_defconfig and root file system from https://github.com/groeck/linux-build-test/blob/master/rootfs/arm-v7/rootfs-armv7a.ext2.gz The -EINVAL return value is from if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') in devt_from_devname(). Guenter
Hi Guenter, can you try this patch? diff --git a/block/early-lookup.c b/block/early-lookup.c index a5be3c68ed079c..66e4514d671179 100644 --- a/block/early-lookup.c +++ b/block/early-lookup.c @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt) while (p > s && isdigit(p[-1])) p--; if (p == s || !*p || *p == '0') - return -EINVAL; + return -ENODEV; /* try disk name without <part number> */ part = simple_strtoul(p, NULL, 10);
On 6/21/23 23:00, Christoph Hellwig wrote: > Hi Guenter, > > can you try this patch? > > diff --git a/block/early-lookup.c b/block/early-lookup.c > index a5be3c68ed079c..66e4514d671179 100644 > --- a/block/early-lookup.c > +++ b/block/early-lookup.c > @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt) > while (p > s && isdigit(p[-1])) > p--; > if (p == s || !*p || *p == '0') > - return -EINVAL; > + return -ENODEV; > > /* try disk name without <part number> */ > part = simple_strtoul(p, NULL, 10); Not completely. Tests with root=/dev/sda still fail. "name" passed to devt_from_devname() is "sda". for (p = s; *p; p++) { if (*p == '/') *p = '!'; } advances 'p' to the end of the string. while (p > s && isdigit(p[-1])) p--; moves it back to point to the first digit (if there is one). if (p == s || !*p || *p == '0') return -EINVAL; then fails because *p is 0. In other words, the function only accepts drive names with digits at the end (and the first digit must not be '0'). I don't recall how I hit the other condition earlier. I have various "/dev/mmcblkX" in my tests, where X can be any number including 0. Maybe those fail randomly as well. Overall I am not sure though what an "invalid" devicename is supposed to be in this context. I have "sda", "sr0", "vda", "mtdblkX", "nvme0n1", "mmcblkX", and "hda". Why would any of those not be eligible for "rootwait" ? In practice, everything not ending with a digit, or ending with '0', fails the first test. Everything ending with a digit > 0 fails the second test. But "humptydump3p4" passes all those tests. Guenter --- #include <stdio.h> #include <string.h> #include <ctype.h> #include <stdlib.h> #define EINVAL1 1 #define EINVAL2 2 #define EINVAL3 3 #define ENODEV 4 static int devt_from_devname(const char *name) { int part; char s[32]; char *p; if (strlen(name) > 31) return EINVAL1; strcpy(s, name); for (p = s; *p; p++) { if (*p == '/') *p = '!'; } /* * Try non-existent, but valid partition, which may only exist after * opening the device, like partitioned md devices. */ while (p > s && isdigit(p[-1])) p--; if (p == s || !*p || *p == '0') { return EINVAL2; } /* try disk name without <part number> */ part = strtoul(p, NULL, 10); *p = '\0'; /* try disk name without p<part number> */ if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') { return EINVAL3; } return ENODEV; } char *devnames[] = { "sda", "sda1", "mmcblk0", "mmcblk1", "mtdblk0", "mtdblk1", "vda", "hda", "nvme0n1", "sr0", "sr1", "humptydump3p4", NULL }; int main(int argc, char **argv) { char *str; int i; for (i = 0, str = devnames[0]; str; str = devnames[++i]) { printf("%s: %d\n", str, devt_from_devname(str)); } }
On Thu, Jun 22, 2023 at 06:54:41AM -0700, Guenter Roeck wrote: > On 6/21/23 23:00, Christoph Hellwig wrote: >> Hi Guenter, >> >> can you try this patch? >> >> diff --git a/block/early-lookup.c b/block/early-lookup.c >> index a5be3c68ed079c..66e4514d671179 100644 >> --- a/block/early-lookup.c >> +++ b/block/early-lookup.c >> @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt) >> while (p > s && isdigit(p[-1])) >> p--; >> if (p == s || !*p || *p == '0') >> - return -EINVAL; >> + return -ENODEV; >> /* try disk name without <part number> */ >> part = simple_strtoul(p, NULL, 10); > > Not completely. Tests with root=/dev/sda still fail. > > "name" passed to devt_from_devname() is "sda". > > for (p = s; *p; p++) { > if (*p == '/') > *p = '!'; > } > > advances 'p' to the end of the string. > > while (p > s && isdigit(p[-1])) > p--; > > moves it back to point to the first digit (if there is one). > > if (p == s || !*p || *p == '0') > return -EINVAL; > > then fails because *p is 0. In other words, the function only accepts > drive names with digits at the end (and the first digit must not be '0'). > > I don't recall how I hit the other condition earlier. I have various > "/dev/mmcblkX" in my tests, where X can be any number including 0. > Maybe those fail randomly as well. > > Overall I am not sure though what an "invalid" devicename is supposed > to be in this context. I have "sda", "sr0", "vda", "mtdblkX", > "nvme0n1", "mmcblkX", and "hda". Why would any of those not be eligible > for "rootwait" ? > > In practice, everything not ending with a digit, or ending with > '0', fails the first test. Everything ending with a digit > 0 > fails the second test. But "humptydump3p4" passes all those tests. Yeah. I guess I should give up on the idea of error out in this particular parser. The idea sounded good, but I guess it doesn't work. So we'll probably want his fix: diff --git a/block/early-lookup.c b/block/early-lookup.c index a5be3c68ed079c..9e2d5a19de1b3b 100644 --- a/block/early-lookup.c +++ b/block/early-lookup.c @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt) while (p > s && isdigit(p[-1])) p--; if (p == s || !*p || *p == '0') - return -EINVAL; + return -ENODEV; /* try disk name without <part number> */ part = simple_strtoul(p, NULL, 10); @@ -185,7 +185,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt) /* try disk name without p<part number> */ if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') - return -EINVAL; + return -ENODEV; p[-1] = '\0'; *devt = blk_lookup_devt(s, part); if (*devt)
On 6/22/23 07:40, Christoph Hellwig wrote: > On Thu, Jun 22, 2023 at 06:54:41AM -0700, Guenter Roeck wrote: >> On 6/21/23 23:00, Christoph Hellwig wrote: >>> Hi Guenter, >>> >>> can you try this patch? >>> >>> diff --git a/block/early-lookup.c b/block/early-lookup.c >>> index a5be3c68ed079c..66e4514d671179 100644 >>> --- a/block/early-lookup.c >>> +++ b/block/early-lookup.c >>> @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt) >>> while (p > s && isdigit(p[-1])) >>> p--; >>> if (p == s || !*p || *p == '0') >>> - return -EINVAL; >>> + return -ENODEV; >>> /* try disk name without <part number> */ >>> part = simple_strtoul(p, NULL, 10); >> >> Not completely. Tests with root=/dev/sda still fail. >> >> "name" passed to devt_from_devname() is "sda". >> >> for (p = s; *p; p++) { >> if (*p == '/') >> *p = '!'; >> } >> >> advances 'p' to the end of the string. >> >> while (p > s && isdigit(p[-1])) >> p--; >> >> moves it back to point to the first digit (if there is one). >> >> if (p == s || !*p || *p == '0') >> return -EINVAL; >> >> then fails because *p is 0. In other words, the function only accepts >> drive names with digits at the end (and the first digit must not be '0'). >> >> I don't recall how I hit the other condition earlier. I have various >> "/dev/mmcblkX" in my tests, where X can be any number including 0. >> Maybe those fail randomly as well. >> >> Overall I am not sure though what an "invalid" devicename is supposed >> to be in this context. I have "sda", "sr0", "vda", "mtdblkX", >> "nvme0n1", "mmcblkX", and "hda". Why would any of those not be eligible >> for "rootwait" ? >> >> In practice, everything not ending with a digit, or ending with >> '0', fails the first test. Everything ending with a digit > 0 >> fails the second test. But "humptydump3p4" passes all those tests. > > > Yeah. I guess I should give up on the idea of error out in this > particular parser. The idea sounded good, but I guess it doesn't > work. So we'll probably want his fix: > Yes, that fixes the problem for me. Guenter > > diff --git a/block/early-lookup.c b/block/early-lookup.c > index a5be3c68ed079c..9e2d5a19de1b3b 100644 > --- a/block/early-lookup.c > +++ b/block/early-lookup.c > @@ -174,7 +174,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt) > while (p > s && isdigit(p[-1])) > p--; > if (p == s || !*p || *p == '0') > - return -EINVAL; > + return -ENODEV; > > /* try disk name without <part number> */ > part = simple_strtoul(p, NULL, 10); > @@ -185,7 +185,7 @@ static int __init devt_from_devname(const char *name, dev_t *devt) > > /* try disk name without p<part number> */ > if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') > - return -EINVAL; > + return -ENODEV; > p[-1] = '\0'; > *devt = blk_lookup_devt(s, part); > if (*devt)
diff --git a/init/do_mounts.c b/init/do_mounts.c index f1953aeb321978..0b36a5f39ee8e2 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -112,14 +112,14 @@ static int devt_from_partuuid(const char *uuid_str, dev_t *devt) /* Explicitly fail on poor PARTUUID syntax. */ if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1) - goto clear_root_wait; + goto out_invalid; cmp.len = slash - uuid_str; } else { cmp.len = strlen(uuid_str); } if (!cmp.len) - goto clear_root_wait; + goto out_invalid; dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid); if (!dev) @@ -139,12 +139,9 @@ static int devt_from_partuuid(const char *uuid_str, dev_t *devt) put_device(dev); return 0; -clear_root_wait: +out_invalid: pr_err("VFS: PARTUUID= is invalid.\n" "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n"); - if (root_wait) - pr_err("Disabling rootwait; root= is invalid.\n"); - root_wait = 0; return -EINVAL; } @@ -611,6 +608,7 @@ static void __init wait_for_root(char *root_device_name) static dev_t __init parse_root_device(char *root_device_name) { + int error; dev_t dev; if (!strncmp(root_device_name, "mtd", 3) || @@ -623,8 +621,14 @@ static dev_t __init parse_root_device(char *root_device_name) if (strcmp(root_device_name, "/dev/ram") == 0) return Root_RAM0; - if (early_lookup_bdev(root_device_name, &dev)) + error = early_lookup_bdev(root_device_name, &dev); + if (error) { + if (error == -EINVAL && root_wait) { + pr_err("Disabling rootwait; root= is invalid.\n"); + root_wait = 0; + } return 0; + } return dev; }
Instead of only clearing root_wait in devt_from_partuuid when the UUID format was invalid, do that in parse_root_device for all strings that failed to parse. Signed-off-by: Christoph Hellwig <hch@lst.de> --- init/do_mounts.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-)