diff mbox series

[RFC,v2,3/3] usb: scan multiple buses simultaneously with coroutines

Message ID 58d6721203ea81b9c20f162ef40f79bc73243a42.1738059345.git.jerome.forissier@linaro.org
State New
Headers show
Series Coroutines | expand

Commit Message

Jerome Forissier Jan. 28, 2025, 10:19 a.m. UTC
Use the coroutines framework to scan USB buses in parallel for better
performance. Tested on arm64 QEMU on a somewhat contrived example
(4 USB buses, each with one audio device, one keyboard, one mouse and
one tablet).

$ make qemu_arm64_defconfig
$ make -j$(nproc) CROSS_COMPILE="ccache aarch64-linux-gnu-"
$ qemu-system-aarch64 -M virt -nographic -cpu max -bios u-boot.bin \
    $(for i in {1..4}; do echo -device qemu-xhci,id=xhci$i \
        -device\ usb-{audio,kbd,mouse,tablet},bus=xhci$i.0; \
    done)

The time spent in usb_init() is reported on the console and shows a
significant improvement with COROUTINES enabled.

** Without COROUTINES

 Bus xhci_pci: Register 8001040 NbrPorts 8
 Starting the controller
 USB XHCI 1.00
 Bus xhci_pci: Register 8001040 NbrPorts 8
 Starting the controller
 USB XHCI 1.00
 Bus xhci_pci: Register 8001040 NbrPorts 8
 Starting the controller
 USB XHCI 1.00
 Bus xhci_pci: Register 8001040 NbrPorts 8
 Starting the controller
 USB XHCI 1.00
 scanning bus xhci_pci for devices... 6 USB Device(s) found
 scanning bus xhci_pci for devices... 6 USB Device(s) found
 scanning bus xhci_pci for devices... 6 USB Device(s) found
 scanning bus xhci_pci for devices... 6 USB Device(s) found
 USB: 4 bus(es) scanned in 5873 ms

** With COROUTINES

 Bus xhci_pci: Register 8001040 NbrPorts 8
 Starting the controller
 USB XHCI 1.00
 Bus xhci_pci: Register 8001040 NbrPorts 8
 Starting the controller
 USB XHCI 1.00
 Bus xhci_pci: Register 8001040 NbrPorts 8
 Starting the controller
 USB XHCI 1.00
 Bus xhci_pci: Register 8001040 NbrPorts 8
 Starting the controller
 USB XHCI 1.00
 Scanning 4 USB bus(es)... done
 Bus xhci_pci: 6 USB device(s) found
 Bus xhci_pci: 6 USB device(s) found
 Bus xhci_pci: 6 USB device(s) found
 Bus xhci_pci: 6 USB device(s) found
 USB: 4 bus(es) scanned in 2213 ms

Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
---
 drivers/usb/host/usb-uclass.c | 152 +++++++++++++++++++++++++++++++++-
 1 file changed, 149 insertions(+), 3 deletions(-)

Comments

Michal Simek Jan. 28, 2025, 11:58 a.m. UTC | #1
Hi,

út 28. 1. 2025 v 11:20 odesílatel Jerome Forissier
<jerome.forissier@linaro.org> napsal:
>
> Use the coroutines framework to scan USB buses in parallel for better
> performance. Tested on arm64 QEMU on a somewhat contrived example
> (4 USB buses, each with one audio device, one keyboard, one mouse and
> one tablet).
>
> $ make qemu_arm64_defconfig
> $ make -j$(nproc) CROSS_COMPILE="ccache aarch64-linux-gnu-"
> $ qemu-system-aarch64 -M virt -nographic -cpu max -bios u-boot.bin \
>     $(for i in {1..4}; do echo -device qemu-xhci,id=xhci$i \
>         -device\ usb-{audio,kbd,mouse,tablet},bus=xhci$i.0; \
>     done)
>
> The time spent in usb_init() is reported on the console and shows a
> significant improvement with COROUTINES enabled.
>
> ** Without COROUTINES
>
>  Bus xhci_pci: Register 8001040 NbrPorts 8
>  Starting the controller
>  USB XHCI 1.00
>  Bus xhci_pci: Register 8001040 NbrPorts 8
>  Starting the controller
>  USB XHCI 1.00
>  Bus xhci_pci: Register 8001040 NbrPorts 8
>  Starting the controller
>  USB XHCI 1.00
>  Bus xhci_pci: Register 8001040 NbrPorts 8
>  Starting the controller
>  USB XHCI 1.00
>  scanning bus xhci_pci for devices... 6 USB Device(s) found
>  scanning bus xhci_pci for devices... 6 USB Device(s) found
>  scanning bus xhci_pci for devices... 6 USB Device(s) found
>  scanning bus xhci_pci for devices... 6 USB Device(s) found
>  USB: 4 bus(es) scanned in 5873 ms
>
> ** With COROUTINES
>
>  Bus xhci_pci: Register 8001040 NbrPorts 8
>  Starting the controller
>  USB XHCI 1.00
>  Bus xhci_pci: Register 8001040 NbrPorts 8
>  Starting the controller
>  USB XHCI 1.00
>  Bus xhci_pci: Register 8001040 NbrPorts 8
>  Starting the controller
>  USB XHCI 1.00
>  Bus xhci_pci: Register 8001040 NbrPorts 8
>  Starting the controller
>  USB XHCI 1.00
>  Scanning 4 USB bus(es)... done
>  Bus xhci_pci: 6 USB device(s) found
>  Bus xhci_pci: 6 USB device(s) found
>  Bus xhci_pci: 6 USB device(s) found
>  Bus xhci_pci: 6 USB device(s) found
>  USB: 4 bus(es) scanned in 2213 ms
>
> Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
> ---
>  drivers/usb/host/usb-uclass.c | 152 +++++++++++++++++++++++++++++++++-
>  1 file changed, 149 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/usb/host/usb-uclass.c b/drivers/usb/host/usb-uclass.c
> index bfec303e7af..3104efe7f9e 100644
> --- a/drivers/usb/host/usb-uclass.c
> +++ b/drivers/usb/host/usb-uclass.c
> @@ -9,6 +9,7 @@
>  #define LOG_CATEGORY UCLASS_USB
>
>  #include <bootdev.h>
> +#include <coroutines.h>
>  #include <dm.h>
>  #include <errno.h>
>  #include <log.h>
> @@ -18,6 +19,8 @@
>  #include <dm/lists.h>
>  #include <dm/uclass-internal.h>
>
> +#include <time.h>
> +
>  static bool asynch_allowed;
>
>  struct usb_uclass_priv {
> @@ -221,6 +224,40 @@ int usb_stop(void)
>         return err;
>  }
>
> +static int nbus;
> +
> +#if CONFIG_IS_ENABLED(COROUTINES)
> +static void usb_scan_bus(struct udevice *bus, bool recurse)
> +{
> +       struct usb_bus_priv *priv;
> +       struct udevice *dev;
> +       int ret;
> +
> +       priv = dev_get_uclass_priv(bus);
> +
> +       assert(recurse);        /* TODO: Support non-recusive */
> +
> +       debug("\n");
> +       ret = usb_scan_device(bus, 0, USB_SPEED_FULL, &dev);
> +       if (ret)
> +               printf("Scanning bus %s failed, error %d\n", bus->name, ret);
> +}
> +
> +static void usb_report_devices(struct uclass *uc)
> +{
> +       struct usb_bus_priv *priv;
> +       struct udevice *bus;
> +
> +       uclass_foreach_dev(bus, uc) {
> +               priv = dev_get_uclass_priv(bus);
> +               printf("Bus %s: ", bus->name);
> +               if (priv->next_addr == 0)
> +                       printf("No USB device found\n");
> +               else
> +                       printf("%d USB device(s) found\n", priv->next_addr);
> +       }
> +}
> +#else
>  static void usb_scan_bus(struct udevice *bus, bool recurse)
>  {
>         struct usb_bus_priv *priv;
> @@ -240,7 +277,81 @@ static void usb_scan_bus(struct udevice *bus, bool recurse)
>                 printf("No USB Device found\n");
>         else
>                 printf("%d USB Device(s) found\n", priv->next_addr);
> +       nbus++;
>  }
> +#endif
> +
> +#if CONFIG_IS_ENABLED(COROUTINES)
> +extern int udelay_yield;
> +
> +static void usb_scan_bus_co(void)
> +{
> +       usb_scan_bus((struct udevice *)co_get_arg(), true);
> +       co_exit();
> +}
> +
> +static struct co_stack *stk;
> +static struct co *main_co;
> +static struct co **co;
> +static int co_sz = 8;
> +
> +static int add_usb_scan_bus_co(struct udevice *bus)
> +{
> +       if (!co) {
> +               co = malloc(co_sz * sizeof(*co));
> +               if (!co)
> +                       return -ENOMEM;
> +       }
> +       if (nbus == co_sz) {
> +               struct co **nco;
> +
> +               co_sz *= 2;
> +               nco = realloc(co, co_sz * sizeof(*co));
> +               if (!nco)
> +                       return -ENOMEM;
> +               co = nco;
> +       }
> +       if (!main_co) {
> +               main_co = co_create(NULL, NULL, 0, NULL, NULL);
> +               if (!main_co)
> +                       return -ENOMEM;
> +       }
> +       if (!stk) {
> +               stk = co_stack_new(32768);
> +               if (!stk)
> +                       return -ENOMEM;
> +       }
> +       co[nbus] = co_create(main_co, stk, 0, usb_scan_bus_co, bus);
> +       if (!co[nbus])
> +               return -ENOMEM;
> +       nbus++;
> +       return 0;
> +}
> +
> +static void usb_scan_cleanup(void)
> +{
> +       int i;
> +
> +       for (i = 0; i < nbus; i++) {
> +               co_destroy(co[i]);
> +               co[i] = NULL;
> +       }
> +       nbus = 0;
> +       co_destroy(main_co);
> +       main_co = NULL;
> +       co_stack_destroy(stk);
> +       stk = NULL;
> +}
> +#else
> +static int add_usb_scan_bus_co(struct udevice *bus)
> +{
> +       return 0;
> +}
> +
> +static void usb_scan_cleanup(void)
> +{
> +}
> +#endif
>
>  static void remove_inactive_children(struct uclass *uc, struct udevice *bus)
>  {
> @@ -289,6 +400,7 @@ static int usb_probe_companion(struct udevice *bus)
>
>  int usb_init(void)
>  {
> +       unsigned long t0 = timer_get_us();
>         int controllers_initialized = 0;
>         struct usb_uclass_priv *uc_priv;
>         struct usb_bus_priv *priv;
> @@ -355,10 +467,40 @@ int usb_init(void)
>                         continue;
>
>                 priv = dev_get_uclass_priv(bus);
> -               if (!priv->companion)
> -                       usb_scan_bus(bus, true);
> +               if (!priv->companion) {
> +                       if (CONFIG_IS_ENABLED(COROUTINES)) {
> +                               ret = add_usb_scan_bus_co(bus);
> +                               if (ret)
> +                                       goto out;
> +                       } else {
> +                               usb_scan_bus(bus, true);
> +                       }
> +               }
>         }
>
> +#if CONFIG_IS_ENABLED(COROUTINES)
> +       {
> +               bool done;
> +               int i;
> +
> +               printf("Scanning %d USB bus(es)... ", nbus);
> +               udelay_yield = 0xCAFEDECA;
> +               do {
> +                       done = true;
> +                       for (i = 0; i < nbus; i++) {
> +                               if (!co[i]->done) {
> +                                       done = false;
> +                                       co_resume(co[i]);
> +                               }
> +                       }
> +               } while (!done);
> +               udelay_yield = 0;
> +               printf("done\n");
> +
> +               usb_report_devices(uc);
> +       }
> +#endif
> +
>         /*
>          * Now that the primary controllers have been scanned and have handed
>          * over any devices they do not understand to their companions, scan
> @@ -388,7 +530,11 @@ int usb_init(void)
>         /* if we were not able to find at least one working bus, bail out */
>         if (controllers_initialized == 0)
>                 printf("No USB controllers found\n");
> -
> +out:
> +       if (nbus)
> +               printf("USB: %d bus(es) scanned in %ld ms\n", nbus,
> +                      (timer_get_us() - t0) / 1000);
> +       usb_scan_cleanup();
>         return usb_started ? 0 : -ENOENT;
>  }
>
> --
> 2.43.0
>

I have tested it on kr260 which is using 2 usb interfaces and there is
an issue with usb hub initialization.
That board has two hubs connected over i2c and only one of them is
initialized over i2c.
It means there is some work which needs to happen and likely some
locking should be in place.

Thanks,
Michal
Jerome Forissier Jan. 28, 2025, 1:53 p.m. UTC | #2
On 1/28/25 12:58, Michal Simek wrote:
> Hi,
> 
> út 28. 1. 2025 v 11:20 odesílatel Jerome Forissier
> <jerome.forissier@linaro.org> napsal:
>>
>> Use the coroutines framework to scan USB buses in parallel for better
>> performance. Tested on arm64 QEMU on a somewhat contrived example
>> (4 USB buses, each with one audio device, one keyboard, one mouse and
>> one tablet).
>>
>> $ make qemu_arm64_defconfig
>> $ make -j$(nproc) CROSS_COMPILE="ccache aarch64-linux-gnu-"
>> $ qemu-system-aarch64 -M virt -nographic -cpu max -bios u-boot.bin \
>>     $(for i in {1..4}; do echo -device qemu-xhci,id=xhci$i \
>>         -device\ usb-{audio,kbd,mouse,tablet},bus=xhci$i.0; \
>>     done)
>>
>> The time spent in usb_init() is reported on the console and shows a
>> significant improvement with COROUTINES enabled.
>>
>> ** Without COROUTINES
>>
>>  Bus xhci_pci: Register 8001040 NbrPorts 8
>>  Starting the controller
>>  USB XHCI 1.00
>>  Bus xhci_pci: Register 8001040 NbrPorts 8
>>  Starting the controller
>>  USB XHCI 1.00
>>  Bus xhci_pci: Register 8001040 NbrPorts 8
>>  Starting the controller
>>  USB XHCI 1.00
>>  Bus xhci_pci: Register 8001040 NbrPorts 8
>>  Starting the controller
>>  USB XHCI 1.00
>>  scanning bus xhci_pci for devices... 6 USB Device(s) found
>>  scanning bus xhci_pci for devices... 6 USB Device(s) found
>>  scanning bus xhci_pci for devices... 6 USB Device(s) found
>>  scanning bus xhci_pci for devices... 6 USB Device(s) found
>>  USB: 4 bus(es) scanned in 5873 ms
>>
>> ** With COROUTINES
>>
>>  Bus xhci_pci: Register 8001040 NbrPorts 8
>>  Starting the controller
>>  USB XHCI 1.00
>>  Bus xhci_pci: Register 8001040 NbrPorts 8
>>  Starting the controller
>>  USB XHCI 1.00
>>  Bus xhci_pci: Register 8001040 NbrPorts 8
>>  Starting the controller
>>  USB XHCI 1.00
>>  Bus xhci_pci: Register 8001040 NbrPorts 8
>>  Starting the controller
>>  USB XHCI 1.00
>>  Scanning 4 USB bus(es)... done
>>  Bus xhci_pci: 6 USB device(s) found
>>  Bus xhci_pci: 6 USB device(s) found
>>  Bus xhci_pci: 6 USB device(s) found
>>  Bus xhci_pci: 6 USB device(s) found
>>  USB: 4 bus(es) scanned in 2213 ms
>>
>> Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
>> ---
>>  drivers/usb/host/usb-uclass.c | 152 +++++++++++++++++++++++++++++++++-
>>  1 file changed, 149 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/usb/host/usb-uclass.c b/drivers/usb/host/usb-uclass.c
>> index bfec303e7af..3104efe7f9e 100644
>> --- a/drivers/usb/host/usb-uclass.c
>> +++ b/drivers/usb/host/usb-uclass.c
>> @@ -9,6 +9,7 @@
>>  #define LOG_CATEGORY UCLASS_USB
>>
>>  #include <bootdev.h>
>> +#include <coroutines.h>
>>  #include <dm.h>
>>  #include <errno.h>
>>  #include <log.h>
>> @@ -18,6 +19,8 @@
>>  #include <dm/lists.h>
>>  #include <dm/uclass-internal.h>
>>
>> +#include <time.h>
>> +
>>  static bool asynch_allowed;
>>
>>  struct usb_uclass_priv {
>> @@ -221,6 +224,40 @@ int usb_stop(void)
>>         return err;
>>  }
>>
>> +static int nbus;
>> +
>> +#if CONFIG_IS_ENABLED(COROUTINES)
>> +static void usb_scan_bus(struct udevice *bus, bool recurse)
>> +{
>> +       struct usb_bus_priv *priv;
>> +       struct udevice *dev;
>> +       int ret;
>> +
>> +       priv = dev_get_uclass_priv(bus);
>> +
>> +       assert(recurse);        /* TODO: Support non-recusive */
>> +
>> +       debug("\n");
>> +       ret = usb_scan_device(bus, 0, USB_SPEED_FULL, &dev);
>> +       if (ret)
>> +               printf("Scanning bus %s failed, error %d\n", bus->name, ret);
>> +}
>> +
>> +static void usb_report_devices(struct uclass *uc)
>> +{
>> +       struct usb_bus_priv *priv;
>> +       struct udevice *bus;
>> +
>> +       uclass_foreach_dev(bus, uc) {
>> +               priv = dev_get_uclass_priv(bus);
>> +               printf("Bus %s: ", bus->name);
>> +               if (priv->next_addr == 0)
>> +                       printf("No USB device found\n");
>> +               else
>> +                       printf("%d USB device(s) found\n", priv->next_addr);
>> +       }
>> +}
>> +#else
>>  static void usb_scan_bus(struct udevice *bus, bool recurse)
>>  {
>>         struct usb_bus_priv *priv;
>> @@ -240,7 +277,81 @@ static void usb_scan_bus(struct udevice *bus, bool recurse)
>>                 printf("No USB Device found\n");
>>         else
>>                 printf("%d USB Device(s) found\n", priv->next_addr);
>> +       nbus++;
>>  }
>> +#endif
>> +
>> +#if CONFIG_IS_ENABLED(COROUTINES)
>> +extern int udelay_yield;
>> +
>> +static void usb_scan_bus_co(void)
>> +{
>> +       usb_scan_bus((struct udevice *)co_get_arg(), true);
>> +       co_exit();
>> +}
>> +
>> +static struct co_stack *stk;
>> +static struct co *main_co;
>> +static struct co **co;
>> +static int co_sz = 8;
>> +
>> +static int add_usb_scan_bus_co(struct udevice *bus)
>> +{
>> +       if (!co) {
>> +               co = malloc(co_sz * sizeof(*co));
>> +               if (!co)
>> +                       return -ENOMEM;
>> +       }
>> +       if (nbus == co_sz) {
>> +               struct co **nco;
>> +
>> +               co_sz *= 2;
>> +               nco = realloc(co, co_sz * sizeof(*co));
>> +               if (!nco)
>> +                       return -ENOMEM;
>> +               co = nco;
>> +       }
>> +       if (!main_co) {
>> +               main_co = co_create(NULL, NULL, 0, NULL, NULL);
>> +               if (!main_co)
>> +                       return -ENOMEM;
>> +       }
>> +       if (!stk) {
>> +               stk = co_stack_new(32768);
>> +               if (!stk)
>> +                       return -ENOMEM;
>> +       }
>> +       co[nbus] = co_create(main_co, stk, 0, usb_scan_bus_co, bus);
>> +       if (!co[nbus])
>> +               return -ENOMEM;
>> +       nbus++;
>> +       return 0;
>> +}
>> +
>> +static void usb_scan_cleanup(void)
>> +{
>> +       int i;
>> +
>> +       for (i = 0; i < nbus; i++) {
>> +               co_destroy(co[i]);
>> +               co[i] = NULL;
>> +       }
>> +       nbus = 0;
>> +       co_destroy(main_co);
>> +       main_co = NULL;
>> +       co_stack_destroy(stk);
>> +       stk = NULL;
>> +}
>> +#else
>> +static int add_usb_scan_bus_co(struct udevice *bus)
>> +{
>> +       return 0;
>> +}
>> +
>> +static void usb_scan_cleanup(void)
>> +{
>> +}
>> +#endif
>>
>>  static void remove_inactive_children(struct uclass *uc, struct udevice *bus)
>>  {
>> @@ -289,6 +400,7 @@ static int usb_probe_companion(struct udevice *bus)
>>
>>  int usb_init(void)
>>  {
>> +       unsigned long t0 = timer_get_us();
>>         int controllers_initialized = 0;
>>         struct usb_uclass_priv *uc_priv;
>>         struct usb_bus_priv *priv;
>> @@ -355,10 +467,40 @@ int usb_init(void)
>>                         continue;
>>
>>                 priv = dev_get_uclass_priv(bus);
>> -               if (!priv->companion)
>> -                       usb_scan_bus(bus, true);
>> +               if (!priv->companion) {
>> +                       if (CONFIG_IS_ENABLED(COROUTINES)) {
>> +                               ret = add_usb_scan_bus_co(bus);
>> +                               if (ret)
>> +                                       goto out;
>> +                       } else {
>> +                               usb_scan_bus(bus, true);
>> +                       }
>> +               }
>>         }
>>
>> +#if CONFIG_IS_ENABLED(COROUTINES)
>> +       {
>> +               bool done;
>> +               int i;
>> +
>> +               printf("Scanning %d USB bus(es)... ", nbus);
>> +               udelay_yield = 0xCAFEDECA;
>> +               do {
>> +                       done = true;
>> +                       for (i = 0; i < nbus; i++) {
>> +                               if (!co[i]->done) {
>> +                                       done = false;
>> +                                       co_resume(co[i]);
>> +                               }
>> +                       }
>> +               } while (!done);
>> +               udelay_yield = 0;
>> +               printf("done\n");
>> +
>> +               usb_report_devices(uc);
>> +       }
>> +#endif
>> +
>>         /*
>>          * Now that the primary controllers have been scanned and have handed
>>          * over any devices they do not understand to their companions, scan
>> @@ -388,7 +530,11 @@ int usb_init(void)
>>         /* if we were not able to find at least one working bus, bail out */
>>         if (controllers_initialized == 0)
>>                 printf("No USB controllers found\n");
>> -
>> +out:
>> +       if (nbus)
>> +               printf("USB: %d bus(es) scanned in %ld ms\n", nbus,
>> +                      (timer_get_us() - t0) / 1000);
>> +       usb_scan_cleanup();
>>         return usb_started ? 0 : -ENOENT;
>>  }
>>
>> --
>> 2.43.0
>>
> 
> I have tested it on kr260 which is using 2 usb interfaces and there is
> an issue with usb hub initialization.
> That board has two hubs connected over i2c and only one of them is
> initialized over i2c.
> It means there is some work which needs to happen and likely some
> locking should be in place.

Hmmmm... I was kind of expecting that :-/ Do you think the kv260 has a
similar problem? If so I can use mine to troubleshoot.

Thanks,
Marek Vasut Jan. 28, 2025, 9:36 p.m. UTC | #3
On 1/28/25 11:19 AM, Jerome Forissier wrote:
> Use the coroutines framework to scan USB buses in parallel for better
> performance. Tested on arm64 QEMU on a somewhat contrived example
> (4 USB buses, each with one audio device, one keyboard, one mouse and
> one tablet).
> 
> $ make qemu_arm64_defconfig
> $ make -j$(nproc) CROSS_COMPILE="ccache aarch64-linux-gnu-"
> $ qemu-system-aarch64 -M virt -nographic -cpu max -bios u-boot.bin \
>      $(for i in {1..4}; do echo -device qemu-xhci,id=xhci$i \
>          -device\ usb-{audio,kbd,mouse,tablet},bus=xhci$i.0; \
>      done)
> 
> The time spent in usb_init() is reported on the console and shows a
> significant improvement with COROUTINES enabled.
Have you considered using the cyclic framework ( cyclic_register() and 
co. ) to fully offload USB operations away from the main thread, i.e. to 
make the U-Boot shell and e.g. 'usb start' run fully in parallel ? That 
could then be extended to block transfers, which could run in 
background, and ... we would also get a concept of shell pipes to move 
data around like we do in Linux.
diff mbox series

Patch

diff --git a/drivers/usb/host/usb-uclass.c b/drivers/usb/host/usb-uclass.c
index bfec303e7af..3104efe7f9e 100644
--- a/drivers/usb/host/usb-uclass.c
+++ b/drivers/usb/host/usb-uclass.c
@@ -9,6 +9,7 @@ 
 #define LOG_CATEGORY UCLASS_USB
 
 #include <bootdev.h>
+#include <coroutines.h>
 #include <dm.h>
 #include <errno.h>
 #include <log.h>
@@ -18,6 +19,8 @@ 
 #include <dm/lists.h>
 #include <dm/uclass-internal.h>
 
+#include <time.h>
+
 static bool asynch_allowed;
 
 struct usb_uclass_priv {
@@ -221,6 +224,40 @@  int usb_stop(void)
 	return err;
 }
 
+static int nbus;
+
+#if CONFIG_IS_ENABLED(COROUTINES)
+static void usb_scan_bus(struct udevice *bus, bool recurse)
+{
+	struct usb_bus_priv *priv;
+	struct udevice *dev;
+	int ret;
+
+	priv = dev_get_uclass_priv(bus);
+
+	assert(recurse);	/* TODO: Support non-recusive */
+
+	debug("\n");
+	ret = usb_scan_device(bus, 0, USB_SPEED_FULL, &dev);
+	if (ret)
+		printf("Scanning bus %s failed, error %d\n", bus->name, ret);
+}
+
+static void usb_report_devices(struct uclass *uc)
+{
+	struct usb_bus_priv *priv;
+	struct udevice *bus;
+
+	uclass_foreach_dev(bus, uc) {
+		priv = dev_get_uclass_priv(bus);
+		printf("Bus %s: ", bus->name);
+		if (priv->next_addr == 0)
+			printf("No USB device found\n");
+		else
+			printf("%d USB device(s) found\n", priv->next_addr);
+	}
+}
+#else
 static void usb_scan_bus(struct udevice *bus, bool recurse)
 {
 	struct usb_bus_priv *priv;
@@ -240,7 +277,81 @@  static void usb_scan_bus(struct udevice *bus, bool recurse)
 		printf("No USB Device found\n");
 	else
 		printf("%d USB Device(s) found\n", priv->next_addr);
+	nbus++;
 }
+#endif
+
+#if CONFIG_IS_ENABLED(COROUTINES)
+extern int udelay_yield;
+
+static void usb_scan_bus_co(void)
+{
+	usb_scan_bus((struct udevice *)co_get_arg(), true);
+	co_exit();
+}
+
+static struct co_stack *stk;
+static struct co *main_co;
+static struct co **co;
+static int co_sz = 8;
+
+static int add_usb_scan_bus_co(struct udevice *bus)
+{
+	if (!co) {
+		co = malloc(co_sz * sizeof(*co));
+		if (!co)
+			return -ENOMEM;
+	}
+	if (nbus == co_sz) {
+		struct co **nco;
+
+		co_sz *= 2;
+		nco = realloc(co, co_sz * sizeof(*co));
+		if (!nco)
+			return -ENOMEM;
+		co = nco;
+	}
+	if (!main_co) {
+		main_co = co_create(NULL, NULL, 0, NULL, NULL);
+		if (!main_co)
+			return -ENOMEM;
+	}
+	if (!stk) {
+		stk = co_stack_new(32768);
+		if (!stk)
+			return -ENOMEM;
+	}
+	co[nbus] = co_create(main_co, stk, 0, usb_scan_bus_co, bus);
+	if (!co[nbus])
+		return -ENOMEM;
+	nbus++;
+	return 0;
+}
+
+static void usb_scan_cleanup(void)
+{
+	int i;
+
+	for (i = 0; i < nbus; i++) {
+		co_destroy(co[i]);
+		co[i] = NULL;
+	}
+	nbus = 0;
+	co_destroy(main_co);
+	main_co = NULL;
+	co_stack_destroy(stk);
+	stk = NULL;
+}
+#else
+static int add_usb_scan_bus_co(struct udevice *bus)
+{
+	return 0;
+}
+
+static void usb_scan_cleanup(void)
+{
+}
+#endif
 
 static void remove_inactive_children(struct uclass *uc, struct udevice *bus)
 {
@@ -289,6 +400,7 @@  static int usb_probe_companion(struct udevice *bus)
 
 int usb_init(void)
 {
+	unsigned long t0 = timer_get_us();
 	int controllers_initialized = 0;
 	struct usb_uclass_priv *uc_priv;
 	struct usb_bus_priv *priv;
@@ -355,10 +467,40 @@  int usb_init(void)
 			continue;
 
 		priv = dev_get_uclass_priv(bus);
-		if (!priv->companion)
-			usb_scan_bus(bus, true);
+		if (!priv->companion) {
+			if (CONFIG_IS_ENABLED(COROUTINES)) {
+				ret = add_usb_scan_bus_co(bus);
+				if (ret)
+					goto out;
+			} else {
+				usb_scan_bus(bus, true);
+			}
+		}
 	}
 
+#if CONFIG_IS_ENABLED(COROUTINES)
+	{
+		bool done;
+		int i;
+
+		printf("Scanning %d USB bus(es)... ", nbus);
+		udelay_yield = 0xCAFEDECA;
+		do {
+			done = true;
+			for (i = 0; i < nbus; i++) {
+				if (!co[i]->done) {
+					done = false;
+					co_resume(co[i]);
+				}
+			}
+		} while (!done);
+		udelay_yield = 0;
+		printf("done\n");
+
+		usb_report_devices(uc);
+	}
+#endif
+
 	/*
 	 * Now that the primary controllers have been scanned and have handed
 	 * over any devices they do not understand to their companions, scan
@@ -388,7 +530,11 @@  int usb_init(void)
 	/* if we were not able to find at least one working bus, bail out */
 	if (controllers_initialized == 0)
 		printf("No USB controllers found\n");
-
+out:
+	if (nbus)
+		printf("USB: %d bus(es) scanned in %ld ms\n", nbus,
+		       (timer_get_us() - t0) / 1000);
+	usb_scan_cleanup();
 	return usb_started ? 0 : -ENOENT;
 }