Message ID | 3dfe07c7ad08d4dfd7eac7bd54e6b821319abe90.camel@suse.com |
---|---|
State | New |
Headers | show |
Series | circular submissions in cdc-wdm and how to break them on disconnect | expand |
On 2021/01/22 0:30, Oliver Neukum wrote: > Hi, > > you have moved kill_urbs() below > > cancel_work_sync(&desc->rxwork); > cancel_work_sync(&desc->service_outs_intr); > > to close a race, as > > rv = usb_submit_urb(desc->response, GFP_KERNEL); > > in service_outstanding_interrupt() would submit the response URB, > right? Right. Shouldn't remaining kill_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); sequence in wdm_suspend() and wdm_pre_reset() be updated as well? > Unfortunately we have in wdm_in_callback() the following code path > > if (desc->rerr) { > /* > * Since there was an error, userspace may decide to not read > * any data after poll'ing. > * We should respond to further attempts from the device to send > * data, so that we can get unstuck. > */ > schedule_work(&desc->service_outs_intr); > > It looks to me like we have a circular dependency here and this needs some > change to break. What do you think about the attached patch? I don't know how poisoning works. But why can't we simply use test_bit() on WDM_SUSPENDING/WDM_RESETTING/WDM_DISCONNECTING flags, for schedule_work() in wdm_in_callback() is called with desc->iuspin (which serializes setting of these flags) held. By the way, since someone might interpret "broken" as "out of order / not working", I expect not using "This needs to be broken." in the commit message. There would be some better idiom.
Am Samstag, den 23.01.2021, 11:57 +0900 schrieb Tetsuo Handa: > On 2021/01/22 0:30, Oliver Neukum wrote: Hi, > Right. Shouldn't remaining > > kill_urbs(desc); > cancel_work_sync(&desc->rxwork); > cancel_work_sync(&desc->service_outs_intr); > > sequence in wdm_suspend() and wdm_pre_reset() be updated as well? Yes, they should. > > Unfortunately we have in wdm_in_callback() the following code path > > > > if (desc->rerr) { > > /* > > * Since there was an error, userspace may decide to not read > > * any data after poll'ing. > > * We should respond to further attempts from the device to send > > * data, so that we can get unstuck. > > */ > > schedule_work(&desc->service_outs_intr); > > > > It looks to me like we have a circular dependency here and this needs some > > change to break. What do you think about the attached patch? > > I don't know how poisoning works. But why can't we simply use test_bit() on It makes subsequent usb_submit_urb() fail. > WDM_SUSPENDING/WDM_RESETTING/WDM_DISCONNECTING flags, for schedule_work() in > wdm_in_callback() is called with desc->iuspin (which serializes setting of > these flags) held. In theory this could be done, yet that would take three additional tests as opposed to the test for poisoning that usbcore does anyway. > By the way, since someone might interpret "broken" as "out of order / not working", > I expect not using "This needs to be broken." in the commit message. There would be > some better idiom. Right. I changed the message. Could you test whether the attached patch fixes your issue? Regards Oliver From 307097e80657ca44ac99da8efc8397070b1aff3f Mon Sep 17 00:00:00 2001 From: Oliver Neukum <oneukum@suse.com> Date: Thu, 18 Feb 2021 13:42:40 +0100 Subject: [PATCH 1/2] cdc-wdm: untangle a circular dependency between callback and softint We have a cycle of callbacks scheduling works which submit URBs with thos callbacks. This needs to be blocked, stopped and unblocked to untangle the circle.. Signed-off-by: Oliver Neukum <oneukum@suse.com> --- drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 508b1c3f8b73..d1e4a7379beb 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -321,12 +321,23 @@ static void wdm_int_callback(struct urb *urb) } -static void kill_urbs(struct wdm_device *desc) +static void poison_urbs(struct wdm_device *desc) { /* the order here is essential */ - usb_kill_urb(desc->command); - usb_kill_urb(desc->validity); - usb_kill_urb(desc->response); + usb_poison_urb(desc->command); + usb_poison_urb(desc->validity); + usb_poison_urb(desc->response); +} + +static void unpoison_urbs(struct wdm_device *desc) +{ + /* + * the order here is not essential + * it is symmetrical just to be nice + */ + usb_unpoison_urb(desc->response); + usb_unpoison_urb(desc->validity); + usb_unpoison_urb(desc->command); } static void free_urbs(struct wdm_device *desc) @@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file) if (!desc->count) { if (!test_bit(WDM_DISCONNECTING, &desc->flags)) { dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n"); - kill_urbs(desc); + poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); + unpoison_urbs(desc); } else { /* must avoid dev_printk here as desc->intf is invalid */ pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__); @@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); - kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); @@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message) set_bit(WDM_SUSPENDING, &desc->flags); spin_unlock_irq(&desc->iuspin); /* callback submits work - order is essential */ - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + unpoison_urbs(desc); } if (!PMSG_IS_AUTO(message)) { mutex_unlock(&desc->wlock); @@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); return 0; @@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf) struct wdm_device *desc = wdm_find_device(intf); int rv; + unpoison_urbs(desc); clear_bit(WDM_OVERFLOW, &desc->flags); clear_bit(WDM_RESETTING, &desc->flags); rv = recover_from_urb_loss(desc);
On 2021/02/18 21:55, Oliver Neukum wrote: > Could you test whether the attached patch fixes your issue? "INFO: task hung in wdm_flush" was fixed on 2020/11/16 12:12, and as far as I know syzbot is not reporting any problem with this driver. I don't have issues regardless of your patch. I can't test your patch. > URBs with thos callbacks. This needs to be blocked, stopped s/thos/those/
From efdd52b67f24e4fa2552f8cc2cbedb7118f71291 Mon Sep 17 00:00:00 2001 From: Oliver Neukum <oneukum@suse.com> Date: Mon, 4 Jan 2021 17:26:33 +0100 Subject: [PATCH] cdc-wdm: support for poisoning and unpoisoning the URBs We have a cycle of callbacks scheduling works which submit URBs. This needs to be broken. Signed-off-by: Oliver Neukum <oneukum@suse.com> --- drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 508b1c3f8b73..d1e4a7379beb 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -321,12 +321,23 @@ static void wdm_int_callback(struct urb *urb) } -static void kill_urbs(struct wdm_device *desc) +static void poison_urbs(struct wdm_device *desc) { /* the order here is essential */ - usb_kill_urb(desc->command); - usb_kill_urb(desc->validity); - usb_kill_urb(desc->response); + usb_poison_urb(desc->command); + usb_poison_urb(desc->validity); + usb_poison_urb(desc->response); +} + +static void unpoison_urbs(struct wdm_device *desc) +{ + /* + * the order here is not essential + * it is symmetrical just to be nice + */ + usb_unpoison_urb(desc->response); + usb_unpoison_urb(desc->validity); + usb_unpoison_urb(desc->command); } static void free_urbs(struct wdm_device *desc) @@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file) if (!desc->count) { if (!test_bit(WDM_DISCONNECTING, &desc->flags)) { dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n"); - kill_urbs(desc); + poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); + unpoison_urbs(desc); } else { /* must avoid dev_printk here as desc->intf is invalid */ pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__); @@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); - kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); @@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message) set_bit(WDM_SUSPENDING, &desc->flags); spin_unlock_irq(&desc->iuspin); /* callback submits work - order is essential */ - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + unpoison_urbs(desc); } if (!PMSG_IS_AUTO(message)) { mutex_unlock(&desc->wlock); @@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); return 0; @@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf) struct wdm_device *desc = wdm_find_device(intf); int rv; + unpoison_urbs(desc); clear_bit(WDM_OVERFLOW, &desc->flags); clear_bit(WDM_RESETTING, &desc->flags); rv = recover_from_urb_loss(desc); -- 2.26.2