diff mbox series

UVC Gadget Driver shows glitched frames with a Linux host

Message ID CAMHf4WKbi6KBPQztj9FA4kPvESc1fVKrC8G73-cs6tTeQby9=w@mail.gmail.com
State New
Headers show
Series UVC Gadget Driver shows glitched frames with a Linux host | expand

Commit Message

Avichal Rakesh April 14, 2023, 9:03 p.m. UTC
Hey all,

First off, I am very new to the kernel space, so apologies for any
newb mistakes. Please feel free to point them out!

I've been trying to get the UVC gadget driver to work on an Android
device and have been seeing partial/glitched frames when streaming to
a Linux (or Windows) host fairly frequently (once every 2-4s). The UVC
gadget driver shows no error/exception logs, and neither does the
host's UVC driver.

Enabling tracing for the UVC gadget driver and the host's UVC driver
shows that the gadget sees a missed ISOC transfer, and the host sees a
frame with suspiciously low packet count at the same time as the
glitched frame.

```
[691171.704583] usb 1-4: frame 9274 stats: 0/8/8 packets, 0/0/0 pts
(!early !initial), 7/8 scr, last pts/stc/sof 0/259279856/14353
[691171.704602] usb 1-4: Frame complete (EOF found)
[691171.732584] usb 1-4: frame 9275 stats: 0/4/4 packets, 0/0/0 pts
(!early !initial), 3/4 scr, last pts/stc/sof 0/261131744/14661
[691171.732621] usb 1-4: Frame complete (EOF found)
[691171.768578] usb 1-4: frame 9276 stats: 0/8/8 packets, 0/0/0 pts
(!early !initial), 7/8 scr, last pts/stc/sof 0/262525136/14894
[691171.768616] usb 1-4: Frame complete (EOF found)
```

For reference, I am streaming 640x480 MJPEG frames of a (mostly)
static scene, and every frame takes 7-8 packets (~22kB). So 4 packets
for a frame is definitely not normal. From the logs, it looks like the
host sees an EOF header for the video frame that had the ISOC failure
and tries to display the frame with a partial buffer resulting in the
glitched frame.

This can happen because the UVC gadget driver waits for the encode
loop to drop the video frame
(https://lore.kernel.org/all/20221018215044.765044-4-w36195@motorola.com/).
However, because video frames are encoded (to usb_requests)
asynchronously, it is possible that the video frame is completely
encoded before the ISOC transfer failure is reported. In this case,
the next frame would be dropped, but the frame with missed ISOC will
remain in queue.

This problem may be further exaggerated by the DWC3 controller driver
(which is what my device has) not setting the IMI flag when
no_interrupt flag is set
(https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?
UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
usb_request, so an ISOC failure may not immediately interrupt the UVC
gadget driver, leaving more time for the frame to finish encoding.

I couldn't find any concrete error handling rules in the UVC specs, so
I am not sure what the proper solution here is. To try out, I created
a patch (attached below) that dequeues all queued usb_requests from
the endpoint in case of an ISOC failure and clears the uvc buffer
queue. This eliminated the partial frames with no perceivable frame
drops.

So my questions here are:
1. Is this a known issue, and if so are there workarounds for it?
2. If the answer to above is "No", does the explanation and mitigation
seem reasonable?

Patch follows (mostly for illustration, I can formalize it if
needed!). It adds a new 'req_inflight' list to track queued
usb_requests that have not been given back to the gadget driver and
drops all the queued requests in case of an ISOC failure. The other
changes are for the extra bookkeeping required to handle dropping all
frames. I haven't been able to confirm it, but as far as I can tell
the issue exists at ToT as well.

---
drivers/usb/gadget/function/uvc.h | 6 ++-
drivers/usb/gadget/function/uvc_queue.c | 16 ++++--
drivers/usb/gadget/function/uvc_queue.h | 2 +-
drivers/usb/gadget/function/uvc_video.c | 71 +++++++++++++++++++------
4 files changed, 72 insertions(+), 23 deletions(-)

}
@@ -181,15 +180,15 @@ uvc_video_encode_isoc_sg(struct usb_request
*req, struct uvc_video *video,

req->length -= len;
video->queue.buf_used += req->length - header_len;
+ ureq->uvc_buf = buf;

- if (buf->bytesused == video->queue.buf_used || !buf->sg ||
- video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) {
+ if (buf->bytesused == video->queue.buf_used || !buf->sg) {
video->queue.buf_used = 0;
buf->state = UVC_BUF_STATE_DONE;
buf->offset = 0;
list_del(&buf->queue);
video->fid ^= UVC_STREAM_FID;
- ureq->last_buf = buf;
+ ureq->is_last = true;
}
}

@@ -212,14 +211,14 @@ uvc_video_encode_isoc(struct usb_request *req,
struct uvc_video *video,
len -= ret;

req->length = video->req_size - len;
+ ureq->uvc_buf = buf;

- if (buf->bytesused == video->queue.buf_used ||
- video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) {
+ if (buf->bytesused == video->queue.buf_used) {
video->queue.buf_used = 0;
buf->state = UVC_BUF_STATE_DONE;
list_del(&buf->queue);
video->fid ^= UVC_STREAM_FID;
- ureq->last_buf = buf;
+ ureq->is_last = true;
}
}

@@ -231,11 +230,13 @@ static int uvcg_video_ep_queue(struct uvc_video
*video, struct usb_request *req)
{
int ret;

+ list_add_tail(&req->list, &video->req_inflight);
ret = usb_ep_queue(video->ep, req, GFP_ATOMIC);
if (ret < 0) {
uvcg_err(&video->uvc->func, "Failed to queue request (%d).\n",
ret);

+ list_del(&req->list);
/* If the endpoint is disabled the descriptor may be NULL. */
if (video->ep->desc) {
/* Isochronous endpoints can't be halted. */
@@ -254,15 +255,46 @@ uvc_video_complete(struct usb_ep *ep, struct
usb_request *req)
struct uvc_video *video = ureq->video;
struct uvc_video_queue *queue = &video->queue;
struct uvc_device *uvc = video->uvc;
+ struct uvc_buffer *uvc_buf = ureq->uvc_buf;
+ struct usb_request *itr;
unsigned long flags;

+ spin_lock_irqsave(&queue->irqlock, flags);
+ list_del(&req->list);
+ spin_unlock_irqrestore(&queue->irqlock, flags);
+
switch (req->status) {
case 0:
break;

case -EXDEV:
uvcg_dbg(&video->uvc->func, "VS request missed xfer.\n");
- queue->flags |= UVC_QUEUE_DROP_INCOMPLETE;
+ spin_lock_irqsave(&queue->irqlock, flags);
+ list_for_each_entry(itr, &video->req_inflight, list) {
+ usb_ep_dequeue(ep, itr);
+ }
+
+ if (uvc_buf->state != UVC_BUF_STATE_DONE) {
+ video->fid = UVC_STREAM_FID;
+ } else {
+ queue->flags |= UVC_QUEUE_DROP_INCOMPLETE;
+ }
+
+ /*
+ * We cancel the entire queue because calling usb_ep_dequeue on any
pending request will
+ * cancel all queued requests (on some USB controllers). ISOC
transfers are "best effort"
+ * and a missed frame or two won't really be noticeable. Rather than
adding extra
+ * bookkeeping to try and find the precise set of frames that need to
be dropped, we drop
+ * the entire queue.
+ */
+ uvcg_queue_cancel_locked(queue, 0);
+ video->req_int_count = 0;
+ spin_unlock_irqrestore(&queue->irqlock, flags);
+ break;
+
+ case -ECONNRESET:
+ uvcg_dbg(&video->uvc->func, "VS request dqed.\n");
+ // Request dequeued using usb_ep_dequeue, no special handling required
break;

case -ESHUTDOWN: /* disconnect from host. */
@@ -277,10 +309,11 @@ uvc_video_complete(struct usb_ep *ep, struct
usb_request *req)
uvcg_queue_cancel(queue, 0);
}

- if (ureq->last_buf) {
- uvcg_complete_buffer(&video->queue, ureq->last_buf);
- ureq->last_buf = NULL;
+ if (ureq->is_last) {
+ uvcg_complete_buffer(&video->queue, ureq->uvc_buf);
}
+ ureq->is_last = false;
+ ureq->uvc_buf = NULL;

spin_lock_irqsave(&video->req_lock, flags);
list_add_tail(&req->list, &video->req_free);
@@ -315,6 +348,7 @@ uvc_video_free_requests(struct uvc_video *video)
}

INIT_LIST_HEAD(&video->req_free);
+ INIT_LIST_HEAD(&video->req_inflight);
video->req_size = 0;
return 0;
}
@@ -350,7 +384,8 @@ uvc_video_alloc_requests(struct uvc_video *video)
video->ureq[i].req->complete = uvc_video_complete;
video->ureq[i].req->context = &video->ureq[i];
video->ureq[i].video = video;
- video->ureq[i].last_buf = NULL;
+ video->ureq[i].uvc_buf = NULL;
+ video->ureq[i].is_last = false;

list_add_tail(&video->ureq[i].req->list, &video->req_free);
/* req_size/PAGE_SIZE + 1 for overruns and + 1 for header */
@@ -385,6 +420,7 @@ static void uvcg_video_pump(struct work_struct *work)
struct usb_request *req = NULL;
struct uvc_buffer *buf;
unsigned long flags;
+ bool must_interrupt = false;
int ret;

while (video->ep->enabled) {
@@ -400,6 +436,7 @@ static void uvcg_video_pump(struct work_struct *work)
req = list_first_entry(&video->req_free, struct usb_request,
list);
list_del(&req->list);
+ must_interrupt = list_empty(&video->req_free);
spin_unlock_irqrestore(&video->req_lock, flags);

/*
@@ -420,14 +457,14 @@ static void uvcg_video_pump(struct work_struct *work)
* interrupt load to a quarter but also catches the corner
* cases, which needs to be handled.
*/
- if (list_empty(&video->req_free) ||
+ if (must_interrupt ||
buf->state == UVC_BUF_STATE_DONE ||
!(video->req_int_count %
DIV_ROUND_UP(video->uvc_num_requests, 4))) {
video->req_int_count = 0;
req->no_interrupt = 0;
} else {
- req->no_interrupt = 1;
+ req->no_interrupt = 0;
}

/* Queue the USB request */
@@ -507,6 +544,7 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
{
INIT_LIST_HEAD(&video->req_free);
+ INIT_LIST_HEAD(&video->req_inflight);
spin_lock_init(&video->req_lock);
INIT_WORK(&video->pump, uvcg_video_pump);

@@ -527,4 +565,3 @@ int uvcg_video_init(struct uvc_video *video,
struct uvc_device *uvc)
V4L2_BUF_TYPE_VIDEO_OUTPUT, &video->mutex);
return 0;
}
-
--

Comments

Greg KH April 17, 2023, 1:49 p.m. UTC | #1
On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> Hey all,
> 
> First off, I am very new to the kernel space, so apologies for any
> newb mistakes. Please feel free to point them out!
> 
> I've been trying to get the UVC gadget driver to work on an Android
> device and have been seeing partial/glitched frames when streaming to
> a Linux (or Windows) host fairly frequently (once every 2-4s). The UVC
> gadget driver shows no error/exception logs, and neither does the
> host's UVC driver.
> 
> Enabling tracing for the UVC gadget driver and the host's UVC driver
> shows that the gadget sees a missed ISOC transfer, and the host sees a
> frame with suspiciously low packet count at the same time as the
> glitched frame.
> 
> ```
> [691171.704583] usb 1-4: frame 9274 stats: 0/8/8 packets, 0/0/0 pts
> (!early !initial), 7/8 scr, last pts/stc/sof 0/259279856/14353
> [691171.704602] usb 1-4: Frame complete (EOF found)
> [691171.732584] usb 1-4: frame 9275 stats: 0/4/4 packets, 0/0/0 pts
> (!early !initial), 3/4 scr, last pts/stc/sof 0/261131744/14661
> [691171.732621] usb 1-4: Frame complete (EOF found)
> [691171.768578] usb 1-4: frame 9276 stats: 0/8/8 packets, 0/0/0 pts
> (!early !initial), 7/8 scr, last pts/stc/sof 0/262525136/14894
> [691171.768616] usb 1-4: Frame complete (EOF found)
> ```
> 
> For reference, I am streaming 640x480 MJPEG frames of a (mostly)
> static scene, and every frame takes 7-8 packets (~22kB). So 4 packets
> for a frame is definitely not normal. From the logs, it looks like the
> host sees an EOF header for the video frame that had the ISOC failure
> and tries to display the frame with a partial buffer resulting in the
> glitched frame.

But with isoc packets, it's ok to drop them, that's what the protocol is
for.  If you require loss-less data transfer, you can NOT use isoc
endpoints.

So this sounds like it is working correctly, but you need to figure out
why your device can't keep up on the data stream properly.

> This can happen because the UVC gadget driver waits for the encode
> loop to drop the video frame
> (https://lore.kernel.org/all/20221018215044.765044-4-w36195@motorola.com/).
> However, because video frames are encoded (to usb_requests)
> asynchronously, it is possible that the video frame is completely
> encoded before the ISOC transfer failure is reported. In this case,
> the next frame would be dropped, but the frame with missed ISOC will
> remain in queue.

So you need a faster processor?  :)

> This problem may be further exaggerated by the DWC3 controller driver
> (which is what my device has) not setting the IMI flag when
> no_interrupt flag is set
> (https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?
> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> usb_request, so an ISOC failure may not immediately interrupt the UVC
> gadget driver, leaving more time for the frame to finish encoding.
> 
> I couldn't find any concrete error handling rules in the UVC specs, so
> I am not sure what the proper solution here is. To try out, I created
> a patch (attached below) that dequeues all queued usb_requests from
> the endpoint in case of an ISOC failure and clears the uvc buffer
> queue. This eliminated the partial frames with no perceivable frame
> drops.
> 
> So my questions here are:
> 1. Is this a known issue, and if so are there workarounds for it?
> 2. If the answer to above is "No", does the explanation and mitigation
> seem reasonable?
> 
> Patch follows (mostly for illustration, I can formalize it if
> needed!). It adds a new 'req_inflight' list to track queued
> usb_requests that have not been given back to the gadget driver and
> drops all the queued requests in case of an ISOC failure. The other
> changes are for the extra bookkeeping required to handle dropping all
> frames. I haven't been able to confirm it, but as far as I can tell
> the issue exists at ToT as well.
> 
> ---
> drivers/usb/gadget/function/uvc.h | 6 ++-
> drivers/usb/gadget/function/uvc_queue.c | 16 ++++--
> drivers/usb/gadget/function/uvc_queue.h | 2 +-
> drivers/usb/gadget/function/uvc_video.c | 71 +++++++++++++++++++------
> 4 files changed, 72 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/uvc.h
> b/drivers/usb/gadget/function/uvc.h
> index 100475b1363e..d2c837011546 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -80,7 +80,8 @@ struct uvc_request {
> struct uvc_video *video;
> struct sg_table sgt;
> u8 header[UVCG_REQUEST_HEADER_LEN];
> - struct uvc_buffer *last_buf;
> + struct uvc_buffer *uvc_buf;
> + bool is_last;
> };

The patch is corrupted and can't be applied by anyone, sorry :(

Please fix up your email client and submit it as a real patch?

thanks,

greg k-h
Avichal Rakesh April 18, 2023, 1:34 a.m. UTC | #2
On 4/17/23 06:49, Greg KH wrote:
> On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
>> Hey all,
>>
>> First off, I am very new to the kernel space, so apologies for any
>> newb mistakes. Please feel free to point them out!
>>
>> I've been trying to get the UVC gadget driver to work on an Android
>> device and have been seeing partial/glitched frames when streaming to
>> a Linux (or Windows) host fairly frequently (once every 2-4s). The UVC
>> gadget driver shows no error/exception logs, and neither does the
>> host's UVC driver.
>>
>> Enabling tracing for the UVC gadget driver and the host's UVC driver
>> shows that the gadget sees a missed ISOC transfer, and the host sees a
>> frame with suspiciously low packet count at the same time as the
>> glitched frame.
>>
>> ```
>> [691171.704583] usb 1-4: frame 9274 stats: 0/8/8 packets, 0/0/0 pts
>> (!early !initial), 7/8 scr, last pts/stc/sof 0/259279856/14353
>> [691171.704602] usb 1-4: Frame complete (EOF found)
>> [691171.732584] usb 1-4: frame 9275 stats: 0/4/4 packets, 0/0/0 pts
>> (!early !initial), 3/4 scr, last pts/stc/sof 0/261131744/14661
>> [691171.732621] usb 1-4: Frame complete (EOF found)
>> [691171.768578] usb 1-4: frame 9276 stats: 0/8/8 packets, 0/0/0 pts
>> (!early !initial), 7/8 scr, last pts/stc/sof 0/262525136/14894
>> [691171.768616] usb 1-4: Frame complete (EOF found)
>> ```
>>
>> For reference, I am streaming 640x480 MJPEG frames of a (mostly)
>> static scene, and every frame takes 7-8 packets (~22kB). So 4 packets
>> for a frame is definitely not normal. From the logs, it looks like the
>> host sees an EOF header for the video frame that had the ISOC failure
>> and tries to display the frame with a partial buffer resulting in the
>> glitched frame.
> 
> But with isoc packets, it's ok to drop them, that's what the protocol is
> for.  If you require loss-less data transfer, you can NOT use isoc
> endpoints.

Maybe I am misunderstanding something here: It is okay for isoc packets
to be dropped, but if we are trying to send some contiguous information
over a sequence of isoc packets and one of the packets in the sequence
gets dropped, there should be a mechanism (either on the gadget side, or
the host side) to invalidate the set of packets associated with the
dropped packet?

In this instance, we "encode" one video frame into a sequence of isoc
packets, and queue them up, but if one of those isoc packets is dropped
after video frame is fully "encoded", there doesn't seem to be any
handling to invalidate the other packets that relates to the video
frame. It is okay to drop a video frame in this case as the drop is
invisible to the user, but the host needs to know about the invalidated
frame.

> 
> So this sounds like it is working correctly, but you need to figure out
> why your device can't keep up on the data stream properly.

I haven't profiled this, but as far as I can tell the device can keep up
with the data stream. The frame glitches and all other observations here
were observed at 30fps, and we've successfully pushed 60fps with no
streaming stutters, but the glitched frame still comes up at about the
same frequency (in terms of number of frames). So it doesn't seem like
the data stream is lagging behind.

> 
>> This can happen because the UVC gadget driver waits for the encode
>> loop to drop the video frame
>> (https://lore.kernel.org/all/20221018215044.765044-4-w36195@motorola.com/).
>> However, because video frames are encoded (to usb_requests)
>> asynchronously, it is possible that the video frame is completely
>> encoded before the ISOC transfer failure is reported. In this case,
>> the next frame would be dropped, but the frame with missed ISOC will
>> remain in queue.
> 
> So you need a faster processor?  :)

Haha! I wouldn't say no to that! Each "encode" iteration is effectively
a memcpy, so runs quite a bit faster than the USB controller can
pump out data to the host. The encode loop runs in its own thread and
doesn't wait for the complete callbacks from the usb controller to queue
up new requests. It is inevitable that we will end up with a set of
usb_requests queued up to the controller. In my testing, 5/6
usb_requests being queued up was not uncommon (which is quite the
fraction of one frame considering one frame is 7/8 usb_requests long).
Eventually the usb_request containing the EOF header will be queued up
and be behind other packets of the same video frame. A missed ISOC at
this point won't cancel the usb_request with the EOF header.

> 
>> This problem may be further exaggerated by the DWC3 controller driver
>> (which is what my device has) not setting the IMI flag when
>> no_interrupt flag is set
>> (https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?
>> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
>> usb_request, so an ISOC failure may not immediately interrupt the UVC
>> gadget driver, leaving more time for the frame to finish encoding.
>>
>> I couldn't find any concrete error handling rules in the UVC specs, so
>> I am not sure what the proper solution here is. To try out, I created
>> a patch (attached below) that dequeues all queued usb_requests from
>> the endpoint in case of an ISOC failure and clears the uvc buffer
>> queue. This eliminated the partial frames with no perceivable frame
>> drops.
>>
>> So my questions here are:
>> 1. Is this a known issue, and if so are there workarounds for it?
>> 2. If the answer to above is "No", does the explanation and mitigation
>> seem reasonable?
>>
>> Patch follows (mostly for illustration, I can formalize it if
>> needed!). It adds a new 'req_inflight' list to track queued
>> usb_requests that have not been given back to the gadget driver and
>> drops all the queued requests in case of an ISOC failure. The other
>> changes are for the extra bookkeeping required to handle dropping all
>> frames. I haven't been able to confirm it, but as far as I can tell
>> the issue exists at ToT as well.
>>
>> ---
>> drivers/usb/gadget/function/uvc.h | 6 ++-
>> drivers/usb/gadget/function/uvc_queue.c | 16 ++++--
>> drivers/usb/gadget/function/uvc_queue.h | 2 +-
>> drivers/usb/gadget/function/uvc_video.c | 71 +++++++++++++++++++------
>> 4 files changed, 72 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/usb/gadget/function/uvc.h
>> b/drivers/usb/gadget/function/uvc.h
>> index 100475b1363e..d2c837011546 100644
>> --- a/drivers/usb/gadget/function/uvc.h
>> +++ b/drivers/usb/gadget/function/uvc.h
>> @@ -80,7 +80,8 @@ struct uvc_request {
>> struct uvc_video *video;
>> struct sg_table sgt;
>> u8 header[UVCG_REQUEST_HEADER_LEN];
>> - struct uvc_buffer *last_buf;
>> + struct uvc_buffer *uvc_buf;
>> + bool is_last;
>> };
> 
> The patch is corrupted and can't be applied by anyone, sorry :(
> 
> Please fix up your email client and submit it as a real patch?
> 
> thanks,
> 
> greg k-h

Sorry about the patch, looks like all the tabs were converted to single
spaces :(. Reattaching:

---
 drivers/usb/gadget/function/uvc.h       |  6 ++-
 drivers/usb/gadget/function/uvc_queue.c | 16 ++++--
 drivers/usb/gadget/function/uvc_queue.h |  2 +-
 drivers/usb/gadget/function/uvc_video.c | 71 +++++++++++++++++++------
 4 files changed, 72 insertions(+), 23 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index 100475b1363e..d2c837011546 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -80,7 +80,8 @@ struct uvc_request {
 	struct uvc_video *video;
 	struct sg_table sgt;
 	u8 header[UVCG_REQUEST_HEADER_LEN];
-	struct uvc_buffer *last_buf;
+	struct uvc_buffer *uvc_buf;
+	bool is_last;
 };
 
 struct uvc_video {
@@ -103,8 +104,9 @@ struct uvc_video {
 	/* Requests */
 	unsigned int req_size;
 	struct uvc_request *ureq;
-	struct list_head req_free;
+	struct list_head req_free; /* protected by req_lock */
 	spinlock_t req_lock;
+	struct list_head req_inflight; /* protected by queue->irqlock */
 
 	unsigned int req_int_count;
 
diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
index 0aa3d7e1f3cc..32fab1e5b32d 100644
--- a/drivers/usb/gadget/function/uvc_queue.c
+++ b/drivers/usb/gadget/function/uvc_queue.c
@@ -250,10 +250,22 @@ unsigned long uvcg_queue_get_unmapped_area(struct uvc_video_queue *queue,
  */
 void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect)
 {
-	struct uvc_buffer *buf;
 	unsigned long flags;
 
 	spin_lock_irqsave(&queue->irqlock, flags);
+	uvcg_queue_cancel_locked(queue, disconnect);
+	spin_unlock_irqrestore(&queue->irqlock, flags);
+}
+
+/*
+ * see uvcg_queue_cancel()
+ *
+ * Must be called with &queue_irqlock held.
+ */
+void uvcg_queue_cancel_locked(struct uvc_video_queue *queue, int disconnect)
+{
+	struct uvc_buffer *buf;
+
 	while (!list_empty(&queue->irqqueue)) {
 		buf = list_first_entry(&queue->irqqueue, struct uvc_buffer,
 				       queue);
@@ -272,7 +284,6 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect)
 	 */
 	if (disconnect)
 		queue->flags |= UVC_QUEUE_DISCONNECTED;
-	spin_unlock_irqrestore(&queue->irqlock, flags);
 }
 
 /*
@@ -356,4 +367,3 @@ struct uvc_buffer *uvcg_queue_head(struct uvc_video_queue *queue)
 
 	return buf;
 }
-
diff --git a/drivers/usb/gadget/function/uvc_queue.h b/drivers/usb/gadget/function/uvc_queue.h
index 41f87b917f6b..622c0f146847 100644
--- a/drivers/usb/gadget/function/uvc_queue.h
+++ b/drivers/usb/gadget/function/uvc_queue.h
@@ -89,6 +89,7 @@ unsigned long uvcg_queue_get_unmapped_area(struct uvc_video_queue *queue,
 #endif /* CONFIG_MMU */
 
 void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect);
+void uvcg_queue_cancel_locked(struct uvc_video_queue *queue, int disconnect);
 
 int uvcg_queue_enable(struct uvc_video_queue *queue, int enable);
 
@@ -98,4 +99,3 @@ void uvcg_complete_buffer(struct uvc_video_queue *queue,
 struct uvc_buffer *uvcg_queue_head(struct uvc_video_queue *queue);
 
 #endif /* _UVC_QUEUE_H_ */
-
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index dd1c6b2ca7c6..6c2698eca242 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -109,19 +109,18 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video,
 
 	req->length = video->req_size - len;
 	req->zero = video->payload_size == video->max_payload_size;
+	ureq->uvc_buf = buf;
 
 	if (buf->bytesused == video->queue.buf_used) {
 		video->queue.buf_used = 0;
 		buf->state = UVC_BUF_STATE_DONE;
 		list_del(&buf->queue);
 		video->fid ^= UVC_STREAM_FID;
-		ureq->last_buf = buf;
-
+		ureq->is_last = true;
 		video->payload_size = 0;
 	}
 
 	if (video->payload_size == video->max_payload_size ||
-	    video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE ||
 	    buf->bytesused == video->queue.buf_used)
 		video->payload_size = 0;
 }
@@ -181,15 +180,15 @@ uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video,
 
 	req->length -= len;
 	video->queue.buf_used += req->length - header_len;
+	ureq->uvc_buf = buf;
 
-	if (buf->bytesused == video->queue.buf_used || !buf->sg ||
-			video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) {
+	if (buf->bytesused == video->queue.buf_used || !buf->sg) {
 		video->queue.buf_used = 0;
 		buf->state = UVC_BUF_STATE_DONE;
 		buf->offset = 0;
 		list_del(&buf->queue);
 		video->fid ^= UVC_STREAM_FID;
-		ureq->last_buf = buf;
+		ureq->is_last = true;
 	}
 }
 
@@ -212,14 +211,14 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
 	len -= ret;
 
 	req->length = video->req_size - len;
+	ureq->uvc_buf = buf;
 
-	if (buf->bytesused == video->queue.buf_used ||
-			video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) {
+	if (buf->bytesused == video->queue.buf_used) {
 		video->queue.buf_used = 0;
 		buf->state = UVC_BUF_STATE_DONE;
 		list_del(&buf->queue);
 		video->fid ^= UVC_STREAM_FID;
-		ureq->last_buf = buf;
+		ureq->is_last = true;
 	}
 }
 
@@ -231,11 +230,13 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
 {
 	int ret;
 
+	list_add_tail(&req->list, &video->req_inflight);
 	ret = usb_ep_queue(video->ep, req, GFP_ATOMIC);
 	if (ret < 0) {
 		uvcg_err(&video->uvc->func, "Failed to queue request (%d).\n",
 			 ret);
 
+		list_del(&req->list);
 		/* If the endpoint is disabled the descriptor may be NULL. */
 		if (video->ep->desc) {
 			/* Isochronous endpoints can't be halted. */
@@ -254,15 +255,46 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	struct uvc_video *video = ureq->video;
 	struct uvc_video_queue *queue = &video->queue;
 	struct uvc_device *uvc = video->uvc;
+	struct uvc_buffer *uvc_buf = ureq->uvc_buf;
+	struct usb_request *itr;
 	unsigned long flags;
 
+	spin_lock_irqsave(&queue->irqlock, flags);
+	list_del(&req->list);
+	spin_unlock_irqrestore(&queue->irqlock, flags);
+
 	switch (req->status) {
 	case 0:
 		break;
 
 	case -EXDEV:
 		uvcg_dbg(&video->uvc->func, "VS request missed xfer.\n");
-		queue->flags |= UVC_QUEUE_DROP_INCOMPLETE;
+		spin_lock_irqsave(&queue->irqlock, flags);
+		list_for_each_entry(itr, &video->req_inflight, list) {
+			usb_ep_dequeue(ep, itr);
+		}
+
+		if (uvc_buf->state != UVC_BUF_STATE_DONE) {
+			video->fid = UVC_STREAM_FID;
+		} else {
+			queue->flags |= UVC_QUEUE_DROP_INCOMPLETE;
+		}
+
+		/*
+		 * We cancel the entire queue because calling usb_ep_dequeue on any pending request will
+		 * cancel all queued requests (on some USB controllers). ISOC transfers are "best effort"
+		 * and a missed frame or two won't really be noticeable. Rather than adding extra
+		 * bookkeeping to try and find the precise set of frames that need to be dropped, we drop
+		 * the entire queue.
+		 */
+		uvcg_queue_cancel_locked(queue, 0);
+		video->req_int_count = 0;
+		spin_unlock_irqrestore(&queue->irqlock, flags);
+		break;
+
+	case -ECONNRESET:
+		uvcg_dbg(&video->uvc->func, "VS request dqed.\n");
+		// Request dequeued using usb_ep_dequeue, no special handling required
 		break;
 
 	case -ESHUTDOWN:	/* disconnect from host. */
@@ -277,10 +309,11 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 		uvcg_queue_cancel(queue, 0);
 	}
 
-	if (ureq->last_buf) {
-		uvcg_complete_buffer(&video->queue, ureq->last_buf);
-		ureq->last_buf = NULL;
+	if (ureq->is_last) {
+		uvcg_complete_buffer(&video->queue, ureq->uvc_buf);
 	}
+	ureq->is_last = false;
+	ureq->uvc_buf = NULL;
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	list_add_tail(&req->list, &video->req_free);
@@ -315,6 +348,7 @@ uvc_video_free_requests(struct uvc_video *video)
 	}
 
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_inflight);
 	video->req_size = 0;
 	return 0;
 }
@@ -350,7 +384,8 @@ uvc_video_alloc_requests(struct uvc_video *video)
 		video->ureq[i].req->complete = uvc_video_complete;
 		video->ureq[i].req->context = &video->ureq[i];
 		video->ureq[i].video = video;
-		video->ureq[i].last_buf = NULL;
+		video->ureq[i].uvc_buf = NULL;
+		video->ureq[i].is_last = false;
 
 		list_add_tail(&video->ureq[i].req->list, &video->req_free);
 		/* req_size/PAGE_SIZE + 1 for overruns and + 1 for header */
@@ -385,6 +420,7 @@ static void uvcg_video_pump(struct work_struct *work)
 	struct usb_request *req = NULL;
 	struct uvc_buffer *buf;
 	unsigned long flags;
+	bool must_interrupt = false;
 	int ret;
 
 	while (video->ep->enabled) {
@@ -400,6 +436,7 @@ static void uvcg_video_pump(struct work_struct *work)
 		req = list_first_entry(&video->req_free, struct usb_request,
 					list);
 		list_del(&req->list);
+		must_interrupt = list_empty(&video->req_free);
 		spin_unlock_irqrestore(&video->req_lock, flags);
 
 		/*
@@ -420,14 +457,14 @@ static void uvcg_video_pump(struct work_struct *work)
 		 * interrupt load to a quarter but also catches the corner
 		 * cases, which needs to be handled.
 		 */
-		if (list_empty(&video->req_free) ||
+		if (must_interrupt ||
 		    buf->state == UVC_BUF_STATE_DONE ||
 		    !(video->req_int_count %
 		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
 			video->req_int_count = 0;
 			req->no_interrupt = 0;
 		} else {
-			req->no_interrupt = 1;
+			req->no_interrupt = 0;
 		}
 
 		/* Queue the USB request */
@@ -507,6 +544,7 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
 int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 {
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_inflight);
 	spin_lock_init(&video->req_lock);
 	INIT_WORK(&video->pump, uvcg_video_pump);
 
@@ -527,4 +565,3 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 			V4L2_BUF_TYPE_VIDEO_OUTPUT, &video->mutex);
 	return 0;
 }
-
--
Thinh Nguyen April 18, 2023, 2:49 a.m. UTC | #3
On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> 
> On 4/17/23 06:49, Greg KH wrote:
> > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > 
> >> This problem may be further exaggerated by the DWC3 controller driver
> >> (which is what my device has) not setting the IMI flag when
> >> no_interrupt flag is set
> >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$ 
> >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> >> gadget driver, leaving more time for the frame to finish encoding.
> >>
> >> I couldn't find any concrete error handling rules in the UVC specs, so
> >> I am not sure what the proper solution here is. To try out, I created
> >> a patch (attached below) that dequeues all queued usb_requests from
> >> the endpoint in case of an ISOC failure and clears the uvc buffer
> >> queue. This eliminated the partial frames with no perceivable frame
> >> drops.
> >>
> >> So my questions here are:
> >> 1. Is this a known issue, and if so are there workarounds for it?
> >> 2. If the answer to above is "No", does the explanation and mitigation
> >> seem reasonable?
> >>
> >> Patch follows (mostly for illustration, I can formalize it if
> >> needed!). It adds a new 'req_inflight' list to track queued
> >> usb_requests that have not been given back to the gadget driver and
> >> drops all the queued requests in case of an ISOC failure. The other
> >> changes are for the extra bookkeeping required to handle dropping all
> >> frames. I haven't been able to confirm it, but as far as I can tell
> >> the issue exists at ToT as well.
> >>

Perhaps this conversation with Jeff may explain the issues you observed:
https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/

To summarized the long conversation, the UVC needs to maintain a
constant queue of usb requests. Each request is scheduled for a specific
interval. If the UVC couldn't keep up feeding requests to the
controller, then we will have stale requests and missed isoc.

From the discussion and review, the UVC couldn't queue requests fast
enough. The problem is exacerbated when you reduce the interrupt
frequency with no_interrupt setting. The dwc3 driver has some basic
mechanism to detect for underrun by checking if the queue is empty, but
that's not enough to workaround it.

Your suggestion to dequeue the request would mean the controller driver
will reschedule the isoc transfer again, which reschedule the next
request for a new interval (potentially avoid being stale). That's fine,
but that may not be a great solution because we're still playing catch
up and the missed isoc already happened.

You may try to do the followings to mitigate this issue:
1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
   requests in between time gaps if need be?

2) Enhance dwc3 to detect underrun from UVC on request queue. ie. If the
   queue is empty and a new request is queue, reschedule the isoc transfer.
   This will break some other devices if time guarantee is required. So
   perhaps an additional flag is needed for this change of behavior.

BR,
Thinh
Avichal Rakesh April 18, 2023, 6:56 a.m. UTC | #4
On Mon, Apr 17, 2023 at 7:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
>
> On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> >
> > On 4/17/23 06:49, Greg KH wrote:
> > > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > >
> > >> This problem may be further exaggerated by the DWC3 controller driver
> > >> (which is what my device has) not setting the IMI flag when
> > >> no_interrupt flag is set
> > >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$
> > >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> > >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> > >> gadget driver, leaving more time for the frame to finish encoding.
> > >>
> > >> I couldn't find any concrete error handling rules in the UVC specs, so
> > >> I am not sure what the proper solution here is. To try out, I created
> > >> a patch (attached below) that dequeues all queued usb_requests from
> > >> the endpoint in case of an ISOC failure and clears the uvc buffer
> > >> queue. This eliminated the partial frames with no perceivable frame
> > >> drops.
> > >>
> > >> So my questions here are:
> > >> 1. Is this a known issue, and if so are there workarounds for it?
> > >> 2. If the answer to above is "No", does the explanation and mitigation
> > >> seem reasonable?
> > >>
> > >> Patch follows (mostly for illustration, I can formalize it if
> > >> needed!). It adds a new 'req_inflight' list to track queued
> > >> usb_requests that have not been given back to the gadget driver and
> > >> drops all the queued requests in case of an ISOC failure. The other
> > >> changes are for the extra bookkeeping required to handle dropping all
> > >> frames. I haven't been able to confirm it, but as far as I can tell
> > >> the issue exists at ToT as well.
> > >>
>
> Perhaps this conversation with Jeff may explain the issues you observed:
> https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/
>
> To summarized the long conversation, the UVC needs to maintain a
> constant queue of usb requests. Each request is scheduled for a specific
> interval. If the UVC couldn't keep up feeding requests to the
> controller, then we will have stale requests and missed isoc.
>
> From the discussion and review, the UVC couldn't queue requests fast
> enough. The problem is exacerbated when you reduce the interrupt
> frequency with no_interrupt setting. The dwc3 driver has some basic
> mechanism to detect for underrun by checking if the queue is empty, but
> that's not enough to workaround it.
>
> Your suggestion to dequeue the request would mean the controller driver
> will reschedule the isoc transfer again, which reschedule the next
> request for a new interval (potentially avoid being stale). That's fine,
> but that may not be a great solution because we're still playing catch
> up and the missed isoc already happened.
>
> You may try to do the followings to mitigate this issue:
> 1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
>    requests in between time gaps if need be?
>
This is extremely helpful, thank you! I have a somewhat basic
question: For an isoc request, is its "freshness" determined from the
time at which usb_ep_queue is called, or from the time at which the
previous request was transferred to the host? If the "freshness" is
determined from the time 'usb_ep_request' was called, I wonder if the
"underrun" is artificial because UVC Gadget driver pumps all free
usb_requests at once, and then waits around doing nothing while the
usb_requests get stale in the controller's queue. In this case, just
slowing the encode loop to wait a little before queuing more requests
should do effectively what you suggest above?

> 2) Enhance dwc3 to detect underrun from UVC on request queue. ie. If the
>    queue is empty and a new request is queue, reschedule the isoc transfer.
>    This will break some other devices if time guarantee is required. So
>    perhaps an additional flag is needed for this change of behavior.
>

I am not as familiar with the DWC3, but just to clarify: Is your
suggestion to reschedule the stale requests if the controller's send
queue is empty (underrun) and the UVC gadget driver queues a new
request? Depending on how many stale requests there are, would that
run the risk of timing out the new request as well?

> BR,
> Thinh

Best
Avi
Thinh Nguyen April 18, 2023, 7:39 p.m. UTC | #5
On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> On Mon, Apr 17, 2023 at 7:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> >
> > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > >
> > > On 4/17/23 06:49, Greg KH wrote:
> > > > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > > >
> > > >> This problem may be further exaggerated by the DWC3 controller driver
> > > >> (which is what my device has) not setting the IMI flag when
> > > >> no_interrupt flag is set
> > > >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$
> > > >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> > > >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> > > >> gadget driver, leaving more time for the frame to finish encoding.
> > > >>
> > > >> I couldn't find any concrete error handling rules in the UVC specs, so
> > > >> I am not sure what the proper solution here is. To try out, I created
> > > >> a patch (attached below) that dequeues all queued usb_requests from
> > > >> the endpoint in case of an ISOC failure and clears the uvc buffer
> > > >> queue. This eliminated the partial frames with no perceivable frame
> > > >> drops.
> > > >>
> > > >> So my questions here are:
> > > >> 1. Is this a known issue, and if so are there workarounds for it?
> > > >> 2. If the answer to above is "No", does the explanation and mitigation
> > > >> seem reasonable?
> > > >>
> > > >> Patch follows (mostly for illustration, I can formalize it if
> > > >> needed!). It adds a new 'req_inflight' list to track queued
> > > >> usb_requests that have not been given back to the gadget driver and
> > > >> drops all the queued requests in case of an ISOC failure. The other
> > > >> changes are for the extra bookkeeping required to handle dropping all
> > > >> frames. I haven't been able to confirm it, but as far as I can tell
> > > >> the issue exists at ToT as well.
> > > >>
> >
> > Perhaps this conversation with Jeff may explain the issues you observed:
> > https://urldefense.com/v3/__https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/__;!!A4F2R9G_pg!eK3VyAq7vX70vA-WJLA6_bPMbZFx0n33wH39JItHzwCNVqKSkN2aG0izF0GZPWqYxkgL-fNinWlIW71HbGGC$ 
> >
> > To summarized the long conversation, the UVC needs to maintain a
> > constant queue of usb requests. Each request is scheduled for a specific
> > interval. If the UVC couldn't keep up feeding requests to the
> > controller, then we will have stale requests and missed isoc.
> >
> > From the discussion and review, the UVC couldn't queue requests fast
> > enough. The problem is exacerbated when you reduce the interrupt
> > frequency with no_interrupt setting. The dwc3 driver has some basic
> > mechanism to detect for underrun by checking if the queue is empty, but
> > that's not enough to workaround it.
> >
> > Your suggestion to dequeue the request would mean the controller driver
> > will reschedule the isoc transfer again, which reschedule the next
> > request for a new interval (potentially avoid being stale). That's fine,
> > but that may not be a great solution because we're still playing catch
> > up and the missed isoc already happened.
> >
> > You may try to do the followings to mitigate this issue:
> > 1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
> >    requests in between time gaps if need be?
> >
> This is extremely helpful, thank you! I have a somewhat basic
> question: For an isoc request, is its "freshness" determined from the
> time at which usb_ep_queue is called, or from the time at which the
> previous request was transferred to the host? If the "freshness" is
> determined from the time 'usb_ep_request' was called, I wonder if the
> "underrun" is artificial because UVC Gadget driver pumps all free
> usb_requests at once, and then waits around doing nothing while the
> usb_requests get stale in the controller's queue. In this case, just
> slowing the encode loop to wait a little before queuing more requests
> should do effectively what you suggest above?
> 

Here's a simplified version of how it works:
(Note that I'll be talking/using usb request as if it's TRBs and also
ignore SG. I will also only focus about the current handling of the
gadget driver and dwc3 driver.)

The controller tracks the current uframe. For isoc endpoint, when you
call usb_ep_queue(), the controller doesn't consume the request right
away. The request is scheduled for a specific interval. For UVC, an
interval is a single uframe (125us). As the current uframe keeps
incrementing, each request on a queue is consumed. If there's no request
available for the current uframe, the next queued request is considered
stale or expired. This case is considered underrun.

"freshness" means the request is scheduled for a future uframe. "stale"
means the request is queued for a past uframe. Isoc data is time
sensitive. So if the data isn't gone out at a specific time, then it's
dropped.

The problem with many gadget drivers that use isoc endpoint is that they
only queue more requests when they get notified that the previous set of
requests are completed. This creates time gaps which can starve the
controller of more requests.

We have some basic mechanism in dwc3 to reschedule new requests when
there's missed isoc and the queue is empty for UVC. However that's not
enough. The time the controller consumes the request and the time the
driver handles the interrupt is different. The driver may not know that
queue is empty until it's already too late. The gadget driver just needs
to make sure to keep the request queue to at least X amount of requests.
Note that 125us is relatively fast.

Note that this talking point is assuming that the UVC host behaving
properly and poll for data every uframe. If not, it's a different issue.

> > 2) Enhance dwc3 to detect underrun from UVC on request queue. ie. If the
> >    queue is empty and a new request is queue, reschedule the isoc transfer.
> >    This will break some other devices if time guarantee is required. So
> >    perhaps an additional flag is needed for this change of behavior.
> >
> 
> I am not as familiar with the DWC3, but just to clarify: Is your
> suggestion to reschedule the stale requests if the controller's send
> queue is empty (underrun) and the UVC gadget driver queues a new
> request? Depending on how many stale requests there are, would that
> run the risk of timing out the new request as well?
> 

No, rescheduling here means all the queued requests will be scheduled
for a new future uframe. But also note that the queue empty check is
insufficient as explained above. So this would only mitigate the issue.

BR,
Thinh
Avichal Rakesh April 18, 2023, 10:45 p.m. UTC | #6
On Tue, Apr 18, 2023 at 12:40 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
>
> On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > On Mon, Apr 17, 2023 at 7:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > >
> > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > >
> > > > On 4/17/23 06:49, Greg KH wrote:
> > > > > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > > > >
> > > > >> This problem may be further exaggerated by the DWC3 controller driver
> > > > >> (which is what my device has) not setting the IMI flag when
> > > > >> no_interrupt flag is set
> > > > >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$
> > > > >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> > > > >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> > > > >> gadget driver, leaving more time for the frame to finish encoding.
> > > > >>
> > > > >> I couldn't find any concrete error handling rules in the UVC specs, so
> > > > >> I am not sure what the proper solution here is. To try out, I created
> > > > >> a patch (attached below) that dequeues all queued usb_requests from
> > > > >> the endpoint in case of an ISOC failure and clears the uvc buffer
> > > > >> queue. This eliminated the partial frames with no perceivable frame
> > > > >> drops.
> > > > >>
> > > > >> So my questions here are:
> > > > >> 1. Is this a known issue, and if so are there workarounds for it?
> > > > >> 2. If the answer to above is "No", does the explanation and mitigation
> > > > >> seem reasonable?
> > > > >>
> > > > >> Patch follows (mostly for illustration, I can formalize it if
> > > > >> needed!). It adds a new 'req_inflight' list to track queued
> > > > >> usb_requests that have not been given back to the gadget driver and
> > > > >> drops all the queued requests in case of an ISOC failure. The other
> > > > >> changes are for the extra bookkeeping required to handle dropping all
> > > > >> frames. I haven't been able to confirm it, but as far as I can tell
> > > > >> the issue exists at ToT as well.
> > > > >>
> > >
> > > Perhaps this conversation with Jeff may explain the issues you observed:
> > > https://urldefense.com/v3/__https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/__;!!A4F2R9G_pg!eK3VyAq7vX70vA-WJLA6_bPMbZFx0n33wH39JItHzwCNVqKSkN2aG0izF0GZPWqYxkgL-fNinWlIW71HbGGC$
> > >
> > > To summarized the long conversation, the UVC needs to maintain a
> > > constant queue of usb requests. Each request is scheduled for a specific
> > > interval. If the UVC couldn't keep up feeding requests to the
> > > controller, then we will have stale requests and missed isoc.
> > >
> > > From the discussion and review, the UVC couldn't queue requests fast
> > > enough. The problem is exacerbated when you reduce the interrupt
> > > frequency with no_interrupt setting. The dwc3 driver has some basic
> > > mechanism to detect for underrun by checking if the queue is empty, but
> > > that's not enough to workaround it.
> > >
> > > Your suggestion to dequeue the request would mean the controller driver
> > > will reschedule the isoc transfer again, which reschedule the next
> > > request for a new interval (potentially avoid being stale). That's fine,
> > > but that may not be a great solution because we're still playing catch
> > > up and the missed isoc already happened.
> > >
> > > You may try to do the followings to mitigate this issue:
> > > 1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
> > >    requests in between time gaps if need be?
> > >
> > This is extremely helpful, thank you! I have a somewhat basic
> > question: For an isoc request, is its "freshness" determined from the
> > time at which usb_ep_queue is called, or from the time at which the
> > previous request was transferred to the host? If the "freshness" is
> > determined from the time 'usb_ep_request' was called, I wonder if the
> > "underrun" is artificial because UVC Gadget driver pumps all free
> > usb_requests at once, and then waits around doing nothing while the
> > usb_requests get stale in the controller's queue. In this case, just
> > slowing the encode loop to wait a little before queuing more requests
> > should do effectively what you suggest above?
> >
>
> Here's a simplified version of how it works:
> (Note that I'll be talking/using usb request as if it's TRBs and also
> ignore SG. I will also only focus about the current handling of the
> gadget driver and dwc3 driver.)

I appreciate you taking the time to explain this. This is very
enlightening, thank you!

>
> The controller tracks the current uframe. For isoc endpoint, when you
> call usb_ep_queue(), the controller doesn't consume the request right
> away. The request is scheduled for a specific interval. For UVC, an
> interval is a single uframe (125us). As the current uframe keeps
> incrementing, each request on a queue is consumed. If there's no request
> available for the current uframe, the next queued request is considered
> stale or expired. This case is considered underrun.
>
> "freshness" means the request is scheduled for a future uframe. "stale"
> means the request is queued for a past uframe. Isoc data is time
> sensitive. So if the data isn't gone out at a specific time, then it's
> dropped.
>
> The problem with many gadget drivers that use isoc endpoint is that they
> only queue more requests when they get notified that the previous set of
> requests are completed. This creates time gaps which can starve the
> controller of more requests.
>
> We have some basic mechanism in dwc3 to reschedule new requests when
> there's missed isoc and the queue is empty for UVC. However that's not
> enough. The time the controller consumes the request and the time the
> driver handles the interrupt is different. The driver may not know that
> queue is empty until it's already too late. The gadget driver just needs
> to make sure to keep the request queue to at least X amount of requests.
> Note that 125us is relatively fast.

I see, and I think I understand Greg's previous comment better as
well: The UVC driver isn't falling behind on the video stream, it is
falling behind the usb controller's monotonic isoc stream.
Thinh Nguyen April 19, 2023, 12:11 a.m. UTC | #7
On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> On Tue, Apr 18, 2023 at 12:40 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> >
> > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > On Mon, Apr 17, 2023 at 7:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > > >
> > > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > >
> > > > > On 4/17/23 06:49, Greg KH wrote:
> > > > > > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > > > > >
> > > > > >> This problem may be further exaggerated by the DWC3 controller driver
> > > > > >> (which is what my device has) not setting the IMI flag when
> > > > > >> no_interrupt flag is set
> > > > > >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$
> > > > > >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> > > > > >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> > > > > >> gadget driver, leaving more time for the frame to finish encoding.
> > > > > >>
> > > > > >> I couldn't find any concrete error handling rules in the UVC specs, so
> > > > > >> I am not sure what the proper solution here is. To try out, I created
> > > > > >> a patch (attached below) that dequeues all queued usb_requests from
> > > > > >> the endpoint in case of an ISOC failure and clears the uvc buffer
> > > > > >> queue. This eliminated the partial frames with no perceivable frame
> > > > > >> drops.
> > > > > >>
> > > > > >> So my questions here are:
> > > > > >> 1. Is this a known issue, and if so are there workarounds for it?
> > > > > >> 2. If the answer to above is "No", does the explanation and mitigation
> > > > > >> seem reasonable?
> > > > > >>
> > > > > >> Patch follows (mostly for illustration, I can formalize it if
> > > > > >> needed!). It adds a new 'req_inflight' list to track queued
> > > > > >> usb_requests that have not been given back to the gadget driver and
> > > > > >> drops all the queued requests in case of an ISOC failure. The other
> > > > > >> changes are for the extra bookkeeping required to handle dropping all
> > > > > >> frames. I haven't been able to confirm it, but as far as I can tell
> > > > > >> the issue exists at ToT as well.
> > > > > >>
> > > >
> > > > Perhaps this conversation with Jeff may explain the issues you observed:
> > > > https://urldefense.com/v3/__https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/__;!!A4F2R9G_pg!eK3VyAq7vX70vA-WJLA6_bPMbZFx0n33wH39JItHzwCNVqKSkN2aG0izF0GZPWqYxkgL-fNinWlIW71HbGGC$
> > > >
> > > > To summarized the long conversation, the UVC needs to maintain a
> > > > constant queue of usb requests. Each request is scheduled for a specific
> > > > interval. If the UVC couldn't keep up feeding requests to the
> > > > controller, then we will have stale requests and missed isoc.
> > > >
> > > > From the discussion and review, the UVC couldn't queue requests fast
> > > > enough. The problem is exacerbated when you reduce the interrupt
> > > > frequency with no_interrupt setting. The dwc3 driver has some basic
> > > > mechanism to detect for underrun by checking if the queue is empty, but
> > > > that's not enough to workaround it.
> > > >
> > > > Your suggestion to dequeue the request would mean the controller driver
> > > > will reschedule the isoc transfer again, which reschedule the next
> > > > request for a new interval (potentially avoid being stale). That's fine,
> > > > but that may not be a great solution because we're still playing catch
> > > > up and the missed isoc already happened.
> > > >
> > > > You may try to do the followings to mitigate this issue:
> > > > 1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
> > > >    requests in between time gaps if need be?
> > > >
> > > This is extremely helpful, thank you! I have a somewhat basic
> > > question: For an isoc request, is its "freshness" determined from the
> > > time at which usb_ep_queue is called, or from the time at which the
> > > previous request was transferred to the host? If the "freshness" is
> > > determined from the time 'usb_ep_request' was called, I wonder if the
> > > "underrun" is artificial because UVC Gadget driver pumps all free
> > > usb_requests at once, and then waits around doing nothing while the
> > > usb_requests get stale in the controller's queue. In this case, just
> > > slowing the encode loop to wait a little before queuing more requests
> > > should do effectively what you suggest above?
> > >
> >
> > Here's a simplified version of how it works:
> > (Note that I'll be talking/using usb request as if it's TRBs and also
> > ignore SG. I will also only focus about the current handling of the
> > gadget driver and dwc3 driver.)
> 
> I appreciate you taking the time to explain this. This is very
> enlightening, thank you!
> 
> >
> > The controller tracks the current uframe. For isoc endpoint, when you
> > call usb_ep_queue(), the controller doesn't consume the request right
> > away. The request is scheduled for a specific interval. For UVC, an
> > interval is a single uframe (125us). As the current uframe keeps
> > incrementing, each request on a queue is consumed. If there's no request
> > available for the current uframe, the next queued request is considered
> > stale or expired. This case is considered underrun.
> >
> > "freshness" means the request is scheduled for a future uframe. "stale"
> > means the request is queued for a past uframe. Isoc data is time
> > sensitive. So if the data isn't gone out at a specific time, then it's
> > dropped.
> >
> > The problem with many gadget drivers that use isoc endpoint is that they
> > only queue more requests when they get notified that the previous set of
> > requests are completed. This creates time gaps which can starve the
> > controller of more requests.
> >
> > We have some basic mechanism in dwc3 to reschedule new requests when
> > there's missed isoc and the queue is empty for UVC. However that's not
> > enough. The time the controller consumes the request and the time the
> > driver handles the interrupt is different. The driver may not know that
> > queue is empty until it's already too late. The gadget driver just needs
> > to make sure to keep the request queue to at least X amount of requests.
> > Note that 125us is relatively fast.
> 
> I see, and I think I understand Greg's previous comment better as
> well: The UVC driver isn't falling behind on the video stream, it is
> falling behind the usb controller's monotonic isoc stream.
> 
> From what I can see, this leaves us in an interesting place: UVC
> allows the host to configure the camera's output resolution and fps,
> which effectively controls how fast the camera is generating data.
> This is at odds with the UVC gadget driver, which currently packs each
> video frame into as few usb_requests as possible (using the full

Hm... I would spread the data to more requests and not put all the eggs
in one basket. It shouldn't be an issue with dwc3 controller, but some
hosts aren't the greatest when handling isoc at high transfer rate.

> available size in usb_requests). Effectively, the UVC gadget driver
> attempts to use the "full" bandwidth of isoc transfers even when the
> camera isn't generating data fast enough. For example, in my
> observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> of the amount of data the camera would generate in a second. This 22kB
> is split into 8 usb_requests which is about 1/1000 the number of
> requests UVC driver needs to generate per second to prevent isoc
> failures (assuming 125us monotonic uframes). Assuming some fudge
> factor from the simplifications in your explanation gives the uvc
> driver some extra leeway with request queuing, we're still roughly two
> order of magnitudes out of sync. Even with perfect 'complete'
> callbacks and video frame encodings, an underrun seems inevitable.
> Data is being generated at a far slower rate than it is being
> transferred. Does this reasoning seem valid?
> 
> Just as a test I'll try updating the UVC driver to consume 266
> usb_requests per video frame (~1/30 of 8000), which should be enough
> to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> time the controller queue is empty, the camera would have produced a

You mean queue 266 requests at once? This may work. There should be a
large enough time gap so that the dwc3 driver can properly detect an
empty queue to be able to reschedule new requests for the next video
frame.

> new frame. This doesn't solve the issue with latencies around callback
> and an isoc failure might still happen, hopefully the failure
> frequency is reduced because UVC queues enough requests per video
> frame to not starve the controller's queue while waiting on a new
> frame and the only way they go out of sync is from 'complete' callback
> timings. I am assuming this has been tried before, but my LKML search
> skills are failing and I can't find much on it.
> 
> >
> > Note that this talking point is assuming that the UVC host behaving
> > properly and poll for data every uframe. If not, it's a different issue.
> >
> > > > 2) Enhance dwc3 to detect underrun from UVC on request queue. ie. If the
> > > >    queue is empty and a new request is queue, reschedule the isoc transfer.
> > > >    This will break some other devices if time guarantee is required. So
> > > >    perhaps an additional flag is needed for this change of behavior.
> > > >
> > >
> > > I am not as familiar with the DWC3, but just to clarify: Is your
> > > suggestion to reschedule the stale requests if the controller's send
> > > queue is empty (underrun) and the UVC gadget driver queues a new
> > > request? Depending on how many stale requests there are, would that
> > > run the risk of timing out the new request as well?
> > >
> >
> > No, rescheduling here means all the queued requests will be scheduled
> > for a new future uframe. But also note that the queue empty check is
> > insufficient as explained above. So this would only mitigate the issue.
> >
> 
> This makes a lot more sense after your explanation, thank you!
> 

np.

BR,
Thinh
Thinh Nguyen April 19, 2023, 12:19 a.m. UTC | #8
On Wed, Apr 19, 2023, Thinh Nguyen wrote:
> On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> > On Tue, Apr 18, 2023 at 12:40 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > >
> > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > On Mon, Apr 17, 2023 at 7:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > > > >
> > > > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > > >
> > > > > > On 4/17/23 06:49, Greg KH wrote:
> > > > > > > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > > > > > >
> > > > > > >> This problem may be further exaggerated by the DWC3 controller driver
> > > > > > >> (which is what my device has) not setting the IMI flag when
> > > > > > >> no_interrupt flag is set
> > > > > > >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$
> > > > > > >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> > > > > > >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> > > > > > >> gadget driver, leaving more time for the frame to finish encoding.
> > > > > > >>
> > > > > > >> I couldn't find any concrete error handling rules in the UVC specs, so
> > > > > > >> I am not sure what the proper solution here is. To try out, I created
> > > > > > >> a patch (attached below) that dequeues all queued usb_requests from
> > > > > > >> the endpoint in case of an ISOC failure and clears the uvc buffer
> > > > > > >> queue. This eliminated the partial frames with no perceivable frame
> > > > > > >> drops.
> > > > > > >>
> > > > > > >> So my questions here are:
> > > > > > >> 1. Is this a known issue, and if so are there workarounds for it?
> > > > > > >> 2. If the answer to above is "No", does the explanation and mitigation
> > > > > > >> seem reasonable?
> > > > > > >>
> > > > > > >> Patch follows (mostly for illustration, I can formalize it if
> > > > > > >> needed!). It adds a new 'req_inflight' list to track queued
> > > > > > >> usb_requests that have not been given back to the gadget driver and
> > > > > > >> drops all the queued requests in case of an ISOC failure. The other
> > > > > > >> changes are for the extra bookkeeping required to handle dropping all
> > > > > > >> frames. I haven't been able to confirm it, but as far as I can tell
> > > > > > >> the issue exists at ToT as well.
> > > > > > >>
> > > > >
> > > > > Perhaps this conversation with Jeff may explain the issues you observed:
> > > > > https://urldefense.com/v3/__https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/__;!!A4F2R9G_pg!eK3VyAq7vX70vA-WJLA6_bPMbZFx0n33wH39JItHzwCNVqKSkN2aG0izF0GZPWqYxkgL-fNinWlIW71HbGGC$
> > > > >
> > > > > To summarized the long conversation, the UVC needs to maintain a
> > > > > constant queue of usb requests. Each request is scheduled for a specific
> > > > > interval. If the UVC couldn't keep up feeding requests to the
> > > > > controller, then we will have stale requests and missed isoc.
> > > > >
> > > > > From the discussion and review, the UVC couldn't queue requests fast
> > > > > enough. The problem is exacerbated when you reduce the interrupt
> > > > > frequency with no_interrupt setting. The dwc3 driver has some basic
> > > > > mechanism to detect for underrun by checking if the queue is empty, but
> > > > > that's not enough to workaround it.
> > > > >
> > > > > Your suggestion to dequeue the request would mean the controller driver
> > > > > will reschedule the isoc transfer again, which reschedule the next
> > > > > request for a new interval (potentially avoid being stale). That's fine,
> > > > > but that may not be a great solution because we're still playing catch
> > > > > up and the missed isoc already happened.
> > > > >
> > > > > You may try to do the followings to mitigate this issue:
> > > > > 1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
> > > > >    requests in between time gaps if need be?
> > > > >
> > > > This is extremely helpful, thank you! I have a somewhat basic
> > > > question: For an isoc request, is its "freshness" determined from the
> > > > time at which usb_ep_queue is called, or from the time at which the
> > > > previous request was transferred to the host? If the "freshness" is
> > > > determined from the time 'usb_ep_request' was called, I wonder if the
> > > > "underrun" is artificial because UVC Gadget driver pumps all free
> > > > usb_requests at once, and then waits around doing nothing while the
> > > > usb_requests get stale in the controller's queue. In this case, just
> > > > slowing the encode loop to wait a little before queuing more requests
> > > > should do effectively what you suggest above?
> > > >
> > >
> > > Here's a simplified version of how it works:
> > > (Note that I'll be talking/using usb request as if it's TRBs and also
> > > ignore SG. I will also only focus about the current handling of the
> > > gadget driver and dwc3 driver.)
> > 
> > I appreciate you taking the time to explain this. This is very
> > enlightening, thank you!
> > 
> > >
> > > The controller tracks the current uframe. For isoc endpoint, when you
> > > call usb_ep_queue(), the controller doesn't consume the request right
> > > away. The request is scheduled for a specific interval. For UVC, an
> > > interval is a single uframe (125us). As the current uframe keeps
> > > incrementing, each request on a queue is consumed. If there's no request
> > > available for the current uframe, the next queued request is considered
> > > stale or expired. This case is considered underrun.
> > >
> > > "freshness" means the request is scheduled for a future uframe. "stale"
> > > means the request is queued for a past uframe. Isoc data is time
> > > sensitive. So if the data isn't gone out at a specific time, then it's
> > > dropped.
> > >
> > > The problem with many gadget drivers that use isoc endpoint is that they
> > > only queue more requests when they get notified that the previous set of
> > > requests are completed. This creates time gaps which can starve the
> > > controller of more requests.
> > >
> > > We have some basic mechanism in dwc3 to reschedule new requests when
> > > there's missed isoc and the queue is empty for UVC. However that's not
> > > enough. The time the controller consumes the request and the time the
> > > driver handles the interrupt is different. The driver may not know that
> > > queue is empty until it's already too late. The gadget driver just needs
> > > to make sure to keep the request queue to at least X amount of requests.
> > > Note that 125us is relatively fast.
> > 
> > I see, and I think I understand Greg's previous comment better as
> > well: The UVC driver isn't falling behind on the video stream, it is
> > falling behind the usb controller's monotonic isoc stream.
> > 
> > From what I can see, this leaves us in an interesting place: UVC
> > allows the host to configure the camera's output resolution and fps,
> > which effectively controls how fast the camera is generating data.
> > This is at odds with the UVC gadget driver, which currently packs each
> > video frame into as few usb_requests as possible (using the full
> 
> Hm... I would spread the data to more requests and not put all the eggs
> in one basket. It shouldn't be an issue with dwc3 controller, but some
> hosts aren't the greatest when handling isoc at high transfer rate.
> 
> > available size in usb_requests). Effectively, the UVC gadget driver
> > attempts to use the "full" bandwidth of isoc transfers even when the
> > camera isn't generating data fast enough. For example, in my
> > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > of the amount of data the camera would generate in a second. This 22kB
> > is split into 8 usb_requests which is about 1/1000 the number of
> > requests UVC driver needs to generate per second to prevent isoc
> > failures (assuming 125us monotonic uframes). Assuming some fudge
> > factor from the simplifications in your explanation gives the uvc
> > driver some extra leeway with request queuing, we're still roughly two
> > order of magnitudes out of sync. Even with perfect 'complete'
> > callbacks and video frame encodings, an underrun seems inevitable.
> > Data is being generated at a far slower rate than it is being
> > transferred. Does this reasoning seem valid?
> > 
> > Just as a test I'll try updating the UVC driver to consume 266
> > usb_requests per video frame (~1/30 of 8000), which should be enough
> > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > time the controller queue is empty, the camera would have produced a
> 
> You mean queue 266 requests at once? This may work. There should be a
> large enough time gap so that the dwc3 driver can properly detect an
> empty queue to be able to reschedule new requests for the next video
> frame.

That's a lot of requests to allocate. Can't you just make sure to notify
the gadget driver more often to requeue requests and don't set
no_interrupt as often?

> 
> > new frame. This doesn't solve the issue with latencies around callback
> > and an isoc failure might still happen, hopefully the failure
> > frequency is reduced because UVC queues enough requests per video
> > frame to not starve the controller's queue while waiting on a new
> > frame and the only way they go out of sync is from 'complete' callback
> > timings. I am assuming this has been tried before, but my LKML search
> > skills are failing and I can't find much on it.
> > 
> > >

BR,
Thinh
Alan Stern April 19, 2023, 1:07 a.m. UTC | #9
On Tue, Apr 18, 2023 at 03:45:53PM -0700, Avichal Rakesh wrote:
> I see, and I think I understand Greg's previous comment better as
> well: The UVC driver isn't falling behind on the video stream, it is
> falling behind the usb controller's monotonic isoc stream.
> 
> From what I can see, this leaves us in an interesting place: UVC
> allows the host to configure the camera's output resolution and fps,
> which effectively controls how fast the camera is generating data.
> This is at odds with the UVC gadget driver, which currently packs each
> video frame into as few usb_requests as possible (using the full
> available size in usb_requests). Effectively, the UVC gadget driver
> attempts to use the "full" bandwidth of isoc transfers even when the
> camera isn't generating data fast enough. For example, in my
> observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> of the amount of data the camera would generate in a second. This 22kB
> is split into 8 usb_requests which is about 1/1000 the number of
> requests UVC driver needs to generate per second to prevent isoc
> failures (assuming 125us monotonic uframes). Assuming some fudge
> factor from the simplifications in your explanation gives the uvc
> driver some extra leeway with request queuing, we're still roughly two
> order of magnitudes out of sync. Even with perfect 'complete'
> callbacks and video frame encodings, an underrun seems inevitable.
> Data is being generated at a far slower rate than it is being
> transferred. Does this reasoning seem valid?
> 
> Just as a test I'll try updating the UVC driver to consume 266
> usb_requests per video frame (~1/30 of 8000), which should be enough
> to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> time the controller queue is empty, the camera would have produced a
> new frame. This doesn't solve the issue with latencies around callback
> and an isoc failure might still happen, hopefully the failure
> frequency is reduced because UVC queues enough requests per video
> frame to not starve the controller's queue while waiting on a new
> frame and the only way they go out of sync is from 'complete' callback
> timings. I am assuming this has been tried before, but my LKML search
> skills are failing and I can't find much on it.

Note that there's nothing wrong with submitting a 0-length isochronous 
transfer.  If there's no data left but you still need to send 
_something_ in order to fill out the remaining slots in the controller's 
schedule, this is a good way to do it.

Alan Stern
Avichal Rakesh April 19, 2023, 5:25 a.m. UTC | #10
On Tue, Apr 18, 2023 at 5:19 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
>
> On Wed, Apr 19, 2023, Thinh Nguyen wrote:
> > On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> > > On Tue, Apr 18, 2023 at 12:40 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > > >
> > > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > > On Mon, Apr 17, 2023 at 7:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > > > > >
> > > > > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > > > >
> > > > > > > On 4/17/23 06:49, Greg KH wrote:
> > > > > > > > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > > > > > > >
> > > > > > > >> This problem may be further exaggerated by the DWC3 controller driver
> > > > > > > >> (which is what my device has) not setting the IMI flag when
> > > > > > > >> no_interrupt flag is set
> > > > > > > >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$
> > > > > > > >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> > > > > > > >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> > > > > > > >> gadget driver, leaving more time for the frame to finish encoding.
> > > > > > > >>
> > > > > > > >> I couldn't find any concrete error handling rules in the UVC specs, so
> > > > > > > >> I am not sure what the proper solution here is. To try out, I created
> > > > > > > >> a patch (attached below) that dequeues all queued usb_requests from
> > > > > > > >> the endpoint in case of an ISOC failure and clears the uvc buffer
> > > > > > > >> queue. This eliminated the partial frames with no perceivable frame
> > > > > > > >> drops.
> > > > > > > >>
> > > > > > > >> So my questions here are:
> > > > > > > >> 1. Is this a known issue, and if so are there workarounds for it?
> > > > > > > >> 2. If the answer to above is "No", does the explanation and mitigation
> > > > > > > >> seem reasonable?
> > > > > > > >>
> > > > > > > >> Patch follows (mostly for illustration, I can formalize it if
> > > > > > > >> needed!). It adds a new 'req_inflight' list to track queued
> > > > > > > >> usb_requests that have not been given back to the gadget driver and
> > > > > > > >> drops all the queued requests in case of an ISOC failure. The other
> > > > > > > >> changes are for the extra bookkeeping required to handle dropping all
> > > > > > > >> frames. I haven't been able to confirm it, but as far as I can tell
> > > > > > > >> the issue exists at ToT as well.
> > > > > > > >>
> > > > > >
> > > > > > Perhaps this conversation with Jeff may explain the issues you observed:
> > > > > > https://urldefense.com/v3/__https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/__;!!A4F2R9G_pg!eK3VyAq7vX70vA-WJLA6_bPMbZFx0n33wH39JItHzwCNVqKSkN2aG0izF0GZPWqYxkgL-fNinWlIW71HbGGC$
> > > > > >
> > > > > > To summarized the long conversation, the UVC needs to maintain a
> > > > > > constant queue of usb requests. Each request is scheduled for a specific
> > > > > > interval. If the UVC couldn't keep up feeding requests to the
> > > > > > controller, then we will have stale requests and missed isoc.
> > > > > >
> > > > > > From the discussion and review, the UVC couldn't queue requests fast
> > > > > > enough. The problem is exacerbated when you reduce the interrupt
> > > > > > frequency with no_interrupt setting. The dwc3 driver has some basic
> > > > > > mechanism to detect for underrun by checking if the queue is empty, but
> > > > > > that's not enough to workaround it.
> > > > > >
> > > > > > Your suggestion to dequeue the request would mean the controller driver
> > > > > > will reschedule the isoc transfer again, which reschedule the next
> > > > > > request for a new interval (potentially avoid being stale). That's fine,
> > > > > > but that may not be a great solution because we're still playing catch
> > > > > > up and the missed isoc already happened.
> > > > > >
> > > > > > You may try to do the followings to mitigate this issue:
> > > > > > 1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
> > > > > >    requests in between time gaps if need be?
> > > > > >
> > > > > This is extremely helpful, thank you! I have a somewhat basic
> > > > > question: For an isoc request, is its "freshness" determined from the
> > > > > time at which usb_ep_queue is called, or from the time at which the
> > > > > previous request was transferred to the host? If the "freshness" is
> > > > > determined from the time 'usb_ep_request' was called, I wonder if the
> > > > > "underrun" is artificial because UVC Gadget driver pumps all free
> > > > > usb_requests at once, and then waits around doing nothing while the
> > > > > usb_requests get stale in the controller's queue. In this case, just
> > > > > slowing the encode loop to wait a little before queuing more requests
> > > > > should do effectively what you suggest above?
> > > > >
> > > >
> > > > Here's a simplified version of how it works:
> > > > (Note that I'll be talking/using usb request as if it's TRBs and also
> > > > ignore SG. I will also only focus about the current handling of the
> > > > gadget driver and dwc3 driver.)
> > >
> > > I appreciate you taking the time to explain this. This is very
> > > enlightening, thank you!
> > >
> > > >
> > > > The controller tracks the current uframe. For isoc endpoint, when you
> > > > call usb_ep_queue(), the controller doesn't consume the request right
> > > > away. The request is scheduled for a specific interval. For UVC, an
> > > > interval is a single uframe (125us). As the current uframe keeps
> > > > incrementing, each request on a queue is consumed. If there's no request
> > > > available for the current uframe, the next queued request is considered
> > > > stale or expired. This case is considered underrun.
> > > >
> > > > "freshness" means the request is scheduled for a future uframe. "stale"
> > > > means the request is queued for a past uframe. Isoc data is time
> > > > sensitive. So if the data isn't gone out at a specific time, then it's
> > > > dropped.
> > > >
> > > > The problem with many gadget drivers that use isoc endpoint is that they
> > > > only queue more requests when they get notified that the previous set of
> > > > requests are completed. This creates time gaps which can starve the
> > > > controller of more requests.
> > > >
> > > > We have some basic mechanism in dwc3 to reschedule new requests when
> > > > there's missed isoc and the queue is empty for UVC. However that's not
> > > > enough. The time the controller consumes the request and the time the
> > > > driver handles the interrupt is different. The driver may not know that
> > > > queue is empty until it's already too late. The gadget driver just needs
> > > > to make sure to keep the request queue to at least X amount of requests.
> > > > Note that 125us is relatively fast.
> > >
> > > I see, and I think I understand Greg's previous comment better as
> > > well: The UVC driver isn't falling behind on the video stream, it is
> > > falling behind the usb controller's monotonic isoc stream.
> > >
> > > From what I can see, this leaves us in an interesting place: UVC
> > > allows the host to configure the camera's output resolution and fps,
> > > which effectively controls how fast the camera is generating data.
> > > This is at odds with the UVC gadget driver, which currently packs each
> > > video frame into as few usb_requests as possible (using the full
> >
> > Hm... I would spread the data to more requests and not put all the eggs
> > in one basket. It shouldn't be an issue with dwc3 controller, but some
> > hosts aren't the greatest when handling isoc at high transfer rate.
> >
> > > available size in usb_requests). Effectively, the UVC gadget driver
> > > attempts to use the "full" bandwidth of isoc transfers even when the
> > > camera isn't generating data fast enough. For example, in my
> > > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > > of the amount of data the camera would generate in a second. This 22kB
> > > is split into 8 usb_requests which is about 1/1000 the number of
> > > requests UVC driver needs to generate per second to prevent isoc
> > > failures (assuming 125us monotonic uframes). Assuming some fudge
> > > factor from the simplifications in your explanation gives the uvc
> > > driver some extra leeway with request queuing, we're still roughly two
> > > order of magnitudes out of sync. Even with perfect 'complete'
> > > callbacks and video frame encodings, an underrun seems inevitable.
> > > Data is being generated at a far slower rate than it is being
> > > transferred. Does this reasoning seem valid?
> > >
> > > Just as a test I'll try updating the UVC driver to consume 266
> > > usb_requests per video frame (~1/30 of 8000), which should be enough
> > > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > > time the controller queue is empty, the camera would have produced a
> >
> > You mean queue 266 requests at once? This may work. There should be a
> > large enough time gap so that the dwc3 driver can properly detect an
> > empty queue to be able to reschedule new requests for the next video
> > frame.
>
> That's a lot of requests to allocate. Can't you just make sure to notify
> the gadget driver more often to requeue requests and don't set
> no_interrupt as often?

Oh yes, I am hoping UVC gadget driver and the usb controller can reach
a steady state of pumping out requests with less than 266 requests
allocated. I will play around and see how many requests it takes to
reach the steady state. I think the encode loop can maintain the isoc
deadlines as long as there are free requests available.

>
> >
> > > new frame. This doesn't solve the issue with latencies around callback
> > > and an isoc failure might still happen, hopefully the failure
> > > frequency is reduced because UVC queues enough requests per video
> > > frame to not starve the controller's queue while waiting on a new
> > > frame and the only way they go out of sync is from 'complete' callback
> > > timings. I am assuming this has been tried before, but my LKML search
> > > skills are failing and I can't find much on it.
> > >
> > > >
>
> BR,
> Thinh
Avichal Rakesh April 19, 2023, 5:26 a.m. UTC | #11
On Tue, Apr 18, 2023 at 6:07 PM Alan Stern <stern@rowland.harvard.edu> wrote:
>
> On Tue, Apr 18, 2023 at 03:45:53PM -0700, Avichal Rakesh wrote:
> > I see, and I think I understand Greg's previous comment better as
> > well: The UVC driver isn't falling behind on the video stream, it is
> > falling behind the usb controller's monotonic isoc stream.
> >
> > From what I can see, this leaves us in an interesting place: UVC
> > allows the host to configure the camera's output resolution and fps,
> > which effectively controls how fast the camera is generating data.
> > This is at odds with the UVC gadget driver, which currently packs each
> > video frame into as few usb_requests as possible (using the full
> > available size in usb_requests). Effectively, the UVC gadget driver
> > attempts to use the "full" bandwidth of isoc transfers even when the
> > camera isn't generating data fast enough. For example, in my
> > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > of the amount of data the camera would generate in a second. This 22kB
> > is split into 8 usb_requests which is about 1/1000 the number of
> > requests UVC driver needs to generate per second to prevent isoc
> > failures (assuming 125us monotonic uframes). Assuming some fudge
> > factor from the simplifications in your explanation gives the uvc
> > driver some extra leeway with request queuing, we're still roughly two
> > order of magnitudes out of sync. Even with perfect 'complete'
> > callbacks and video frame encodings, an underrun seems inevitable.
> > Data is being generated at a far slower rate than it is being
> > transferred. Does this reasoning seem valid?
> >
> > Just as a test I'll try updating the UVC driver to consume 266
> > usb_requests per video frame (~1/30 of 8000), which should be enough
> > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > time the controller queue is empty, the camera would have produced a
> > new frame. This doesn't solve the issue with latencies around callback
> > and an isoc failure might still happen, hopefully the failure
> > frequency is reduced because UVC queues enough requests per video
> > frame to not starve the controller's queue while waiting on a new
> > frame and the only way they go out of sync is from 'complete' callback
> > timings. I am assuming this has been tried before, but my LKML search
> > skills are failing and I can't find much on it.
>
> Note that there's nothing wrong with submitting a 0-length isochronous
> transfer.  If there's no data left but you still need to send
> _something_ in order to fill out the remaining slots in the controller's
> schedule, this is a good way to do it.
>
Oh, this is very good to know, thank you!! We just need to reach a
steady state of UVC queuing enough requests monotonically (even if
they are empty), and the usb controller calling the 'complete'
callback to give it more requests to queue. Although I wonder how the
host's UVC driver would interpret the zero length packets, if it would
even care.

I am unfortunately being pulled into some other work for the next few
days, but I will try out both: splitting one frame into many many
requests and just sending 0 length requests, and see what happens on
the host. Will report back with what I find. Any other insights are
welcome. I want to fix this problem for good if possible, and am happy
to try out whatever it takes!
Thinh Nguyen April 19, 2023, 9:49 p.m. UTC | #12
On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> On Tue, Apr 18, 2023 at 6:07 PM Alan Stern <stern@rowland.harvard.edu> wrote:
> >
> > On Tue, Apr 18, 2023 at 03:45:53PM -0700, Avichal Rakesh wrote:
> > > I see, and I think I understand Greg's previous comment better as
> > > well: The UVC driver isn't falling behind on the video stream, it is
> > > falling behind the usb controller's monotonic isoc stream.
> > >
> > > From what I can see, this leaves us in an interesting place: UVC
> > > allows the host to configure the camera's output resolution and fps,
> > > which effectively controls how fast the camera is generating data.
> > > This is at odds with the UVC gadget driver, which currently packs each
> > > video frame into as few usb_requests as possible (using the full
> > > available size in usb_requests). Effectively, the UVC gadget driver
> > > attempts to use the "full" bandwidth of isoc transfers even when the
> > > camera isn't generating data fast enough. For example, in my
> > > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > > of the amount of data the camera would generate in a second. This 22kB
> > > is split into 8 usb_requests which is about 1/1000 the number of
> > > requests UVC driver needs to generate per second to prevent isoc
> > > failures (assuming 125us monotonic uframes). Assuming some fudge
> > > factor from the simplifications in your explanation gives the uvc
> > > driver some extra leeway with request queuing, we're still roughly two
> > > order of magnitudes out of sync. Even with perfect 'complete'
> > > callbacks and video frame encodings, an underrun seems inevitable.
> > > Data is being generated at a far slower rate than it is being
> > > transferred. Does this reasoning seem valid?
> > >
> > > Just as a test I'll try updating the UVC driver to consume 266
> > > usb_requests per video frame (~1/30 of 8000), which should be enough
> > > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > > time the controller queue is empty, the camera would have produced a
> > > new frame. This doesn't solve the issue with latencies around callback
> > > and an isoc failure might still happen, hopefully the failure
> > > frequency is reduced because UVC queues enough requests per video
> > > frame to not starve the controller's queue while waiting on a new
> > > frame and the only way they go out of sync is from 'complete' callback
> > > timings. I am assuming this has been tried before, but my LKML search
> > > skills are failing and I can't find much on it.
> >
> > Note that there's nothing wrong with submitting a 0-length isochronous
> > transfer.  If there's no data left but you still need to send
> > _something_ in order to fill out the remaining slots in the controller's
> > schedule, this is a good way to do it.
> >
> Oh, this is very good to know, thank you!! We just need to reach a
> steady state of UVC queuing enough requests monotonically (even if
> they are empty), and the usb controller calling the 'complete'
> callback to give it more requests to queue. Although I wonder how the
> host's UVC driver would interpret the zero length packets, if it would
> even care.

By the usb spec, for IN direction, if there's no data available and the
host requests for data, then the device will send a zero-length data
packet. This is what the dwc3 controller will do. So regardless whether
you prepare and queue a 0-length request or not, the host will receive
it.

> 
> I am unfortunately being pulled into some other work for the next few
> days, but I will try out both: splitting one frame into many many
> requests and just sending 0 length requests, and see what happens on
> the host. Will report back with what I find. Any other insights are
> welcome. I want to fix this problem for good if possible, and am happy
> to try out whatever it takes!

That would be great. Thanks.

BR,
Thinh
Thinh Nguyen April 19, 2023, 9:58 p.m. UTC | #13
On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> On Tue, Apr 18, 2023 at 5:19 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> >
> > On Wed, Apr 19, 2023, Thinh Nguyen wrote:
> > > On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> > > > On Tue, Apr 18, 2023 at 12:40 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > > > >
> > > > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > > > On Mon, Apr 17, 2023 at 7:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > > > > > >
> > > > > > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > > > > >
> > > > > > > > On 4/17/23 06:49, Greg KH wrote:
> > > > > > > > > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > > > > > > > >
> > > > > > > > >> This problem may be further exaggerated by the DWC3 controller driver
> > > > > > > > >> (which is what my device has) not setting the IMI flag when
> > > > > > > > >> no_interrupt flag is set
> > > > > > > > >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$
> > > > > > > > >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> > > > > > > > >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> > > > > > > > >> gadget driver, leaving more time for the frame to finish encoding.
> > > > > > > > >>
> > > > > > > > >> I couldn't find any concrete error handling rules in the UVC specs, so
> > > > > > > > >> I am not sure what the proper solution here is. To try out, I created
> > > > > > > > >> a patch (attached below) that dequeues all queued usb_requests from
> > > > > > > > >> the endpoint in case of an ISOC failure and clears the uvc buffer
> > > > > > > > >> queue. This eliminated the partial frames with no perceivable frame
> > > > > > > > >> drops.
> > > > > > > > >>
> > > > > > > > >> So my questions here are:
> > > > > > > > >> 1. Is this a known issue, and if so are there workarounds for it?
> > > > > > > > >> 2. If the answer to above is "No", does the explanation and mitigation
> > > > > > > > >> seem reasonable?
> > > > > > > > >>
> > > > > > > > >> Patch follows (mostly for illustration, I can formalize it if
> > > > > > > > >> needed!). It adds a new 'req_inflight' list to track queued
> > > > > > > > >> usb_requests that have not been given back to the gadget driver and
> > > > > > > > >> drops all the queued requests in case of an ISOC failure. The other
> > > > > > > > >> changes are for the extra bookkeeping required to handle dropping all
> > > > > > > > >> frames. I haven't been able to confirm it, but as far as I can tell
> > > > > > > > >> the issue exists at ToT as well.
> > > > > > > > >>
> > > > > > >
> > > > > > > Perhaps this conversation with Jeff may explain the issues you observed:
> > > > > > > https://urldefense.com/v3/__https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/__;!!A4F2R9G_pg!eK3VyAq7vX70vA-WJLA6_bPMbZFx0n33wH39JItHzwCNVqKSkN2aG0izF0GZPWqYxkgL-fNinWlIW71HbGGC$
> > > > > > >
> > > > > > > To summarized the long conversation, the UVC needs to maintain a
> > > > > > > constant queue of usb requests. Each request is scheduled for a specific
> > > > > > > interval. If the UVC couldn't keep up feeding requests to the
> > > > > > > controller, then we will have stale requests and missed isoc.
> > > > > > >
> > > > > > > From the discussion and review, the UVC couldn't queue requests fast
> > > > > > > enough. The problem is exacerbated when you reduce the interrupt
> > > > > > > frequency with no_interrupt setting. The dwc3 driver has some basic
> > > > > > > mechanism to detect for underrun by checking if the queue is empty, but
> > > > > > > that's not enough to workaround it.
> > > > > > >
> > > > > > > Your suggestion to dequeue the request would mean the controller driver
> > > > > > > will reschedule the isoc transfer again, which reschedule the next
> > > > > > > request for a new interval (potentially avoid being stale). That's fine,
> > > > > > > but that may not be a great solution because we're still playing catch
> > > > > > > up and the missed isoc already happened.
> > > > > > >
> > > > > > > You may try to do the followings to mitigate this issue:
> > > > > > > 1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
> > > > > > >    requests in between time gaps if need be?
> > > > > > >
> > > > > > This is extremely helpful, thank you! I have a somewhat basic
> > > > > > question: For an isoc request, is its "freshness" determined from the
> > > > > > time at which usb_ep_queue is called, or from the time at which the
> > > > > > previous request was transferred to the host? If the "freshness" is
> > > > > > determined from the time 'usb_ep_request' was called, I wonder if the
> > > > > > "underrun" is artificial because UVC Gadget driver pumps all free
> > > > > > usb_requests at once, and then waits around doing nothing while the
> > > > > > usb_requests get stale in the controller's queue. In this case, just
> > > > > > slowing the encode loop to wait a little before queuing more requests
> > > > > > should do effectively what you suggest above?
> > > > > >
> > > > >
> > > > > Here's a simplified version of how it works:
> > > > > (Note that I'll be talking/using usb request as if it's TRBs and also
> > > > > ignore SG. I will also only focus about the current handling of the
> > > > > gadget driver and dwc3 driver.)
> > > >
> > > > I appreciate you taking the time to explain this. This is very
> > > > enlightening, thank you!
> > > >
> > > > >
> > > > > The controller tracks the current uframe. For isoc endpoint, when you
> > > > > call usb_ep_queue(), the controller doesn't consume the request right
> > > > > away. The request is scheduled for a specific interval. For UVC, an
> > > > > interval is a single uframe (125us). As the current uframe keeps
> > > > > incrementing, each request on a queue is consumed. If there's no request
> > > > > available for the current uframe, the next queued request is considered
> > > > > stale or expired. This case is considered underrun.
> > > > >
> > > > > "freshness" means the request is scheduled for a future uframe. "stale"
> > > > > means the request is queued for a past uframe. Isoc data is time
> > > > > sensitive. So if the data isn't gone out at a specific time, then it's
> > > > > dropped.
> > > > >
> > > > > The problem with many gadget drivers that use isoc endpoint is that they
> > > > > only queue more requests when they get notified that the previous set of
> > > > > requests are completed. This creates time gaps which can starve the
> > > > > controller of more requests.
> > > > >
> > > > > We have some basic mechanism in dwc3 to reschedule new requests when
> > > > > there's missed isoc and the queue is empty for UVC. However that's not
> > > > > enough. The time the controller consumes the request and the time the
> > > > > driver handles the interrupt is different. The driver may not know that
> > > > > queue is empty until it's already too late. The gadget driver just needs
> > > > > to make sure to keep the request queue to at least X amount of requests.
> > > > > Note that 125us is relatively fast.
> > > >
> > > > I see, and I think I understand Greg's previous comment better as
> > > > well: The UVC driver isn't falling behind on the video stream, it is
> > > > falling behind the usb controller's monotonic isoc stream.
> > > >
> > > > From what I can see, this leaves us in an interesting place: UVC
> > > > allows the host to configure the camera's output resolution and fps,
> > > > which effectively controls how fast the camera is generating data.
> > > > This is at odds with the UVC gadget driver, which currently packs each
> > > > video frame into as few usb_requests as possible (using the full
> > >
> > > Hm... I would spread the data to more requests and not put all the eggs
> > > in one basket. It shouldn't be an issue with dwc3 controller, but some
> > > hosts aren't the greatest when handling isoc at high transfer rate.
> > >
> > > > available size in usb_requests). Effectively, the UVC gadget driver
> > > > attempts to use the "full" bandwidth of isoc transfers even when the
> > > > camera isn't generating data fast enough. For example, in my
> > > > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > > > of the amount of data the camera would generate in a second. This 22kB
> > > > is split into 8 usb_requests which is about 1/1000 the number of
> > > > requests UVC driver needs to generate per second to prevent isoc
> > > > failures (assuming 125us monotonic uframes). Assuming some fudge
> > > > factor from the simplifications in your explanation gives the uvc
> > > > driver some extra leeway with request queuing, we're still roughly two
> > > > order of magnitudes out of sync. Even with perfect 'complete'
> > > > callbacks and video frame encodings, an underrun seems inevitable.
> > > > Data is being generated at a far slower rate than it is being
> > > > transferred. Does this reasoning seem valid?
> > > >
> > > > Just as a test I'll try updating the UVC driver to consume 266
> > > > usb_requests per video frame (~1/30 of 8000), which should be enough
> > > > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > > > time the controller queue is empty, the camera would have produced a
> > >
> > > You mean queue 266 requests at once? This may work. There should be a
> > > large enough time gap so that the dwc3 driver can properly detect an
> > > empty queue to be able to reschedule new requests for the next video
> > > frame.
> >
> > That's a lot of requests to allocate. Can't you just make sure to notify
> > the gadget driver more often to requeue requests and don't set
> > no_interrupt as often?
> 
> Oh yes, I am hoping UVC gadget driver and the usb controller can reach
> a steady state of pumping out requests with less than 266 requests
> allocated. I will play around and see how many requests it takes to
> reach the steady state. I think the encode loop can maintain the isoc
> deadlines as long as there are free requests available.
> 

You need to measure the worst latency of your setup interrupt handling
and how long it takes to prepare and queue the requests. We know that
each request should complete and interrupt after 125us. Taking into
account of all the latency, you can see how many requests you must
maintain in the queue at all time. E.g., if you maintain at least 8
requests in a queue, you have roughly 8*125us of leeway to queue the
next request.

BR,
Thinh
Avichal Rakesh April 19, 2023, 10:26 p.m. UTC | #14
On Wed, Apr 19, 2023 at 2:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
>
> On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> > On Tue, Apr 18, 2023 at 6:07 PM Alan Stern <stern@rowland.harvard.edu> wrote:
> > >
> > > On Tue, Apr 18, 2023 at 03:45:53PM -0700, Avichal Rakesh wrote:
> > > > I see, and I think I understand Greg's previous comment better as
> > > > well: The UVC driver isn't falling behind on the video stream, it is
> > > > falling behind the usb controller's monotonic isoc stream.
> > > >
> > > > From what I can see, this leaves us in an interesting place: UVC
> > > > allows the host to configure the camera's output resolution and fps,
> > > > which effectively controls how fast the camera is generating data.
> > > > This is at odds with the UVC gadget driver, which currently packs each
> > > > video frame into as few usb_requests as possible (using the full
> > > > available size in usb_requests). Effectively, the UVC gadget driver
> > > > attempts to use the "full" bandwidth of isoc transfers even when the
> > > > camera isn't generating data fast enough. For example, in my
> > > > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > > > of the amount of data the camera would generate in a second. This 22kB
> > > > is split into 8 usb_requests which is about 1/1000 the number of
> > > > requests UVC driver needs to generate per second to prevent isoc
> > > > failures (assuming 125us monotonic uframes). Assuming some fudge
> > > > factor from the simplifications in your explanation gives the uvc
> > > > driver some extra leeway with request queuing, we're still roughly two
> > > > order of magnitudes out of sync. Even with perfect 'complete'
> > > > callbacks and video frame encodings, an underrun seems inevitable.
> > > > Data is being generated at a far slower rate than it is being
> > > > transferred. Does this reasoning seem valid?
> > > >
> > > > Just as a test I'll try updating the UVC driver to consume 266
> > > > usb_requests per video frame (~1/30 of 8000), which should be enough
> > > > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > > > time the controller queue is empty, the camera would have produced a
> > > > new frame. This doesn't solve the issue with latencies around callback
> > > > and an isoc failure might still happen, hopefully the failure
> > > > frequency is reduced because UVC queues enough requests per video
> > > > frame to not starve the controller's queue while waiting on a new
> > > > frame and the only way they go out of sync is from 'complete' callback
> > > > timings. I am assuming this has been tried before, but my LKML search
> > > > skills are failing and I can't find much on it.
> > >
> > > Note that there's nothing wrong with submitting a 0-length isochronous
> > > transfer.  If there's no data left but you still need to send
> > > _something_ in order to fill out the remaining slots in the controller's
> > > schedule, this is a good way to do it.
> > >
> > Oh, this is very good to know, thank you!! We just need to reach a
> > steady state of UVC queuing enough requests monotonically (even if
> > they are empty), and the usb controller calling the 'complete'
> > callback to give it more requests to queue. Although I wonder how the
> > host's UVC driver would interpret the zero length packets, if it would
> > even care.
>
> By the usb spec, for IN direction, if there's no data available and the
> host requests for data, then the device will send a zero-length data
> packet. This is what the dwc3 controller will do. So regardless whether
> you prepare and queue a 0-length request or not, the host will receive
> it.
>
If this is the case: the usb controller sends a 0 length packet to the
host when uvc gadget driver doesn't queue anything, what stops the
controller from assuming that the usb_request queued by a gadget
driver is always for the next available uframe, and not for one in the
past? This is effectively the "always reschedule" suggestion you made
before but as a default instead of specific to uvc. Are there cases
where we would want to tell the gadget driver that it fell behind?
Apologies if I am missing something fundamental here, it feels like I
am :(.
> >
> > I am unfortunately being pulled into some other work for the next few
> > days, but I will try out both: splitting one frame into many many
> > requests and just sending 0 length requests, and see what happens on
> > the host. Will report back with what I find. Any other insights are
> > welcome. I want to fix this problem for good if possible, and am happy
> > to try out whatever it takes!
>
> That would be great. Thanks.

- Avi.
Avichal Rakesh April 19, 2023, 10:35 p.m. UTC | #15
On Wed, Apr 19, 2023 at 2:58 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
>
> On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> > On Tue, Apr 18, 2023 at 5:19 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > >
> > > On Wed, Apr 19, 2023, Thinh Nguyen wrote:
> > > > On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> > > > > On Tue, Apr 18, 2023 at 12:40 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > > > > >
> > > > > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > > > > On Mon, Apr 17, 2023 at 7:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > > > > > > >
> > > > > > > > On Mon, Apr 17, 2023, Avichal Rakesh wrote:
> > > > > > > > >
> > > > > > > > > On 4/17/23 06:49, Greg KH wrote:
> > > > > > > > > > On Fri, Apr 14, 2023 at 02:03:02PM -0700, Avichal Rakesh wrote:
> > > > > > > > > >
> > > > > > > > > >> This problem may be further exaggerated by the DWC3 controller driver
> > > > > > > > > >> (which is what my device has) not setting the IMI flag when
> > > > > > > > > >> no_interrupt flag is set
> > > > > > > > > >> (https://urldefense.com/v3/__https://lore.kernel.org/all/ced336c84434571340c07994e3667a0ee284fefe.1666735451.git.Thinh.Nguyen@synopsys.com/)?__;!!A4F2R9G_pg!fB0krUX6qsOXrBsOHzLstVFWqTLL9ncwGFXxlxK0eedLY_29XdwjXRtbO-EWv1eX5okN1rOeBZspZ21KSb5b$
> > > > > > > > > >> UVC Gadget Driver sets the no_interrupt flag for ~3/4 of its queued
> > > > > > > > > >> usb_request, so an ISOC failure may not immediately interrupt the UVC
> > > > > > > > > >> gadget driver, leaving more time for the frame to finish encoding.
> > > > > > > > > >>
> > > > > > > > > >> I couldn't find any concrete error handling rules in the UVC specs, so
> > > > > > > > > >> I am not sure what the proper solution here is. To try out, I created
> > > > > > > > > >> a patch (attached below) that dequeues all queued usb_requests from
> > > > > > > > > >> the endpoint in case of an ISOC failure and clears the uvc buffer
> > > > > > > > > >> queue. This eliminated the partial frames with no perceivable frame
> > > > > > > > > >> drops.
> > > > > > > > > >>
> > > > > > > > > >> So my questions here are:
> > > > > > > > > >> 1. Is this a known issue, and if so are there workarounds for it?
> > > > > > > > > >> 2. If the answer to above is "No", does the explanation and mitigation
> > > > > > > > > >> seem reasonable?
> > > > > > > > > >>
> > > > > > > > > >> Patch follows (mostly for illustration, I can formalize it if
> > > > > > > > > >> needed!). It adds a new 'req_inflight' list to track queued
> > > > > > > > > >> usb_requests that have not been given back to the gadget driver and
> > > > > > > > > >> drops all the queued requests in case of an ISOC failure. The other
> > > > > > > > > >> changes are for the extra bookkeeping required to handle dropping all
> > > > > > > > > >> frames. I haven't been able to confirm it, but as far as I can tell
> > > > > > > > > >> the issue exists at ToT as well.
> > > > > > > > > >>
> > > > > > > >
> > > > > > > > Perhaps this conversation with Jeff may explain the issues you observed:
> > > > > > > > https://urldefense.com/v3/__https://lore.kernel.org/all/20221021164349.fft4yqnxuztsqdeu@synopsys.com/__;!!A4F2R9G_pg!eK3VyAq7vX70vA-WJLA6_bPMbZFx0n33wH39JItHzwCNVqKSkN2aG0izF0GZPWqYxkgL-fNinWlIW71HbGGC$
> > > > > > > >
> > > > > > > > To summarized the long conversation, the UVC needs to maintain a
> > > > > > > > constant queue of usb requests. Each request is scheduled for a specific
> > > > > > > > interval. If the UVC couldn't keep up feeding requests to the
> > > > > > > > controller, then we will have stale requests and missed isoc.
> > > > > > > >
> > > > > > > > From the discussion and review, the UVC couldn't queue requests fast
> > > > > > > > enough. The problem is exacerbated when you reduce the interrupt
> > > > > > > > frequency with no_interrupt setting. The dwc3 driver has some basic
> > > > > > > > mechanism to detect for underrun by checking if the queue is empty, but
> > > > > > > > that's not enough to workaround it.
> > > > > > > >
> > > > > > > > Your suggestion to dequeue the request would mean the controller driver
> > > > > > > > will reschedule the isoc transfer again, which reschedule the next
> > > > > > > > request for a new interval (potentially avoid being stale). That's fine,
> > > > > > > > but that may not be a great solution because we're still playing catch
> > > > > > > > up and the missed isoc already happened.
> > > > > > > >
> > > > > > > > You may try to do the followings to mitigate this issue:
> > > > > > > > 1) Make sure UVC to only queuing rate is constant. Perhaps queue dummy
> > > > > > > >    requests in between time gaps if need be?
> > > > > > > >
> > > > > > > This is extremely helpful, thank you! I have a somewhat basic
> > > > > > > question: For an isoc request, is its "freshness" determined from the
> > > > > > > time at which usb_ep_queue is called, or from the time at which the
> > > > > > > previous request was transferred to the host? If the "freshness" is
> > > > > > > determined from the time 'usb_ep_request' was called, I wonder if the
> > > > > > > "underrun" is artificial because UVC Gadget driver pumps all free
> > > > > > > usb_requests at once, and then waits around doing nothing while the
> > > > > > > usb_requests get stale in the controller's queue. In this case, just
> > > > > > > slowing the encode loop to wait a little before queuing more requests
> > > > > > > should do effectively what you suggest above?
> > > > > > >
> > > > > >
> > > > > > Here's a simplified version of how it works:
> > > > > > (Note that I'll be talking/using usb request as if it's TRBs and also
> > > > > > ignore SG. I will also only focus about the current handling of the
> > > > > > gadget driver and dwc3 driver.)
> > > > >
> > > > > I appreciate you taking the time to explain this. This is very
> > > > > enlightening, thank you!
> > > > >
> > > > > >
> > > > > > The controller tracks the current uframe. For isoc endpoint, when you
> > > > > > call usb_ep_queue(), the controller doesn't consume the request right
> > > > > > away. The request is scheduled for a specific interval. For UVC, an
> > > > > > interval is a single uframe (125us). As the current uframe keeps
> > > > > > incrementing, each request on a queue is consumed. If there's no request
> > > > > > available for the current uframe, the next queued request is considered
> > > > > > stale or expired. This case is considered underrun.
> > > > > >
> > > > > > "freshness" means the request is scheduled for a future uframe. "stale"
> > > > > > means the request is queued for a past uframe. Isoc data is time
> > > > > > sensitive. So if the data isn't gone out at a specific time, then it's
> > > > > > dropped.
> > > > > >
> > > > > > The problem with many gadget drivers that use isoc endpoint is that they
> > > > > > only queue more requests when they get notified that the previous set of
> > > > > > requests are completed. This creates time gaps which can starve the
> > > > > > controller of more requests.
> > > > > >
> > > > > > We have some basic mechanism in dwc3 to reschedule new requests when
> > > > > > there's missed isoc and the queue is empty for UVC. However that's not
> > > > > > enough. The time the controller consumes the request and the time the
> > > > > > driver handles the interrupt is different. The driver may not know that
> > > > > > queue is empty until it's already too late. The gadget driver just needs
> > > > > > to make sure to keep the request queue to at least X amount of requests.
> > > > > > Note that 125us is relatively fast.
> > > > >
> > > > > I see, and I think I understand Greg's previous comment better as
> > > > > well: The UVC driver isn't falling behind on the video stream, it is
> > > > > falling behind the usb controller's monotonic isoc stream.
> > > > >
> > > > > From what I can see, this leaves us in an interesting place: UVC
> > > > > allows the host to configure the camera's output resolution and fps,
> > > > > which effectively controls how fast the camera is generating data.
> > > > > This is at odds with the UVC gadget driver, which currently packs each
> > > > > video frame into as few usb_requests as possible (using the full
> > > >
> > > > Hm... I would spread the data to more requests and not put all the eggs
> > > > in one basket. It shouldn't be an issue with dwc3 controller, but some
> > > > hosts aren't the greatest when handling isoc at high transfer rate.
> > > >
> > > > > available size in usb_requests). Effectively, the UVC gadget driver
> > > > > attempts to use the "full" bandwidth of isoc transfers even when the
> > > > > camera isn't generating data fast enough. For example, in my
> > > > > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > > > > of the amount of data the camera would generate in a second. This 22kB
> > > > > is split into 8 usb_requests which is about 1/1000 the number of
> > > > > requests UVC driver needs to generate per second to prevent isoc
> > > > > failures (assuming 125us monotonic uframes). Assuming some fudge
> > > > > factor from the simplifications in your explanation gives the uvc
> > > > > driver some extra leeway with request queuing, we're still roughly two
> > > > > order of magnitudes out of sync. Even with perfect 'complete'
> > > > > callbacks and video frame encodings, an underrun seems inevitable.
> > > > > Data is being generated at a far slower rate than it is being
> > > > > transferred. Does this reasoning seem valid?
> > > > >
> > > > > Just as a test I'll try updating the UVC driver to consume 266
> > > > > usb_requests per video frame (~1/30 of 8000), which should be enough
> > > > > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > > > > time the controller queue is empty, the camera would have produced a
> > > >
> > > > You mean queue 266 requests at once? This may work. There should be a
> > > > large enough time gap so that the dwc3 driver can properly detect an
> > > > empty queue to be able to reschedule new requests for the next video
> > > > frame.
> > >
> > > That's a lot of requests to allocate. Can't you just make sure to notify
> > > the gadget driver more often to requeue requests and don't set
> > > no_interrupt as often?
> >
> > Oh yes, I am hoping UVC gadget driver and the usb controller can reach
> > a steady state of pumping out requests with less than 266 requests
> > allocated. I will play around and see how many requests it takes to
> > reach the steady state. I think the encode loop can maintain the isoc
> > deadlines as long as there are free requests available.
> >
>
> You need to measure the worst latency of your setup interrupt handling
> and how long it takes to prepare and queue the requests. We know that
> each request should complete and interrupt after 125us. Taking into
> account of all the latency, you can see how many requests you must
> maintain in the queue at all time. E.g., if you maintain at least 8
> requests in a queue, you have roughly 8*125us of leeway to queue the
> next request.
>
Right, that makes sense! I also wonder if queueing in larger batches
would trigger the rescheduling logic you mentioned more often. If we
only queue in batches of 8 requests, and wait for all of them to come
back, the request queue might be empty for long enough for dwc3 to
realize that the next request should be rescheduled. Adding it to my
'to-test' list

- Avi.
Thinh Nguyen April 19, 2023, 10:38 p.m. UTC | #16
On Wed, Apr 19, 2023, Avichal Rakesh wrote:
> On Wed, Apr 19, 2023 at 2:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> >
> > On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> > > On Tue, Apr 18, 2023 at 6:07 PM Alan Stern <stern@rowland.harvard.edu> wrote:
> > > >
> > > > On Tue, Apr 18, 2023 at 03:45:53PM -0700, Avichal Rakesh wrote:
> > > > > I see, and I think I understand Greg's previous comment better as
> > > > > well: The UVC driver isn't falling behind on the video stream, it is
> > > > > falling behind the usb controller's monotonic isoc stream.
> > > > >
> > > > > From what I can see, this leaves us in an interesting place: UVC
> > > > > allows the host to configure the camera's output resolution and fps,
> > > > > which effectively controls how fast the camera is generating data.
> > > > > This is at odds with the UVC gadget driver, which currently packs each
> > > > > video frame into as few usb_requests as possible (using the full
> > > > > available size in usb_requests). Effectively, the UVC gadget driver
> > > > > attempts to use the "full" bandwidth of isoc transfers even when the
> > > > > camera isn't generating data fast enough. For example, in my
> > > > > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > > > > of the amount of data the camera would generate in a second. This 22kB
> > > > > is split into 8 usb_requests which is about 1/1000 the number of
> > > > > requests UVC driver needs to generate per second to prevent isoc
> > > > > failures (assuming 125us monotonic uframes). Assuming some fudge
> > > > > factor from the simplifications in your explanation gives the uvc
> > > > > driver some extra leeway with request queuing, we're still roughly two
> > > > > order of magnitudes out of sync. Even with perfect 'complete'
> > > > > callbacks and video frame encodings, an underrun seems inevitable.
> > > > > Data is being generated at a far slower rate than it is being
> > > > > transferred. Does this reasoning seem valid?
> > > > >
> > > > > Just as a test I'll try updating the UVC driver to consume 266
> > > > > usb_requests per video frame (~1/30 of 8000), which should be enough
> > > > > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > > > > time the controller queue is empty, the camera would have produced a
> > > > > new frame. This doesn't solve the issue with latencies around callback
> > > > > and an isoc failure might still happen, hopefully the failure
> > > > > frequency is reduced because UVC queues enough requests per video
> > > > > frame to not starve the controller's queue while waiting on a new
> > > > > frame and the only way they go out of sync is from 'complete' callback
> > > > > timings. I am assuming this has been tried before, but my LKML search
> > > > > skills are failing and I can't find much on it.
> > > >
> > > > Note that there's nothing wrong with submitting a 0-length isochronous
> > > > transfer.  If there's no data left but you still need to send
> > > > _something_ in order to fill out the remaining slots in the controller's
> > > > schedule, this is a good way to do it.
> > > >
> > > Oh, this is very good to know, thank you!! We just need to reach a
> > > steady state of UVC queuing enough requests monotonically (even if
> > > they are empty), and the usb controller calling the 'complete'
> > > callback to give it more requests to queue. Although I wonder how the
> > > host's UVC driver would interpret the zero length packets, if it would
> > > even care.
> >
> > By the usb spec, for IN direction, if there's no data available and the
> > host requests for data, then the device will send a zero-length data
> > packet. This is what the dwc3 controller will do. So regardless whether
> > you prepare and queue a 0-length request or not, the host will receive
> > it.
> >
> If this is the case: the usb controller sends a 0 length packet to the
> host when uvc gadget driver doesn't queue anything, what stops the
> controller from assuming that the usb_request queued by a gadget
> driver is always for the next available uframe, and not for one in the
> past? This is effectively the "always reschedule" suggestion you made
> before but as a default instead of specific to uvc. Are there cases
> where we would want to tell the gadget driver that it fell behind?
> Apologies if I am missing something fundamental here, it feels like I
> am :(.

No, the controller doesn't assume that. It won't queue for the future
uframe if it's stale. The UVC gadget driver will need to keep feeding
requests until it catches up to the current uframe. Remember that the
isoc data is time sensitive, if it's not gone out at a specific uframe,
then it's dropped.

The controller driver can end the current isoc stream and reschedule new
requests for the future uframe so that they won't be considered "stale".

BR,
Thinh
Avichal Rakesh April 19, 2023, 11:12 p.m. UTC | #17
On Wed, Apr 19, 2023 at 3:38 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
>
> On Wed, Apr 19, 2023, Avichal Rakesh wrote:
> > On Wed, Apr 19, 2023 at 2:49 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > >
> > > On Tue, Apr 18, 2023, Avichal Rakesh wrote:
> > > > On Tue, Apr 18, 2023 at 6:07 PM Alan Stern <stern@rowland.harvard.edu> wrote:
> > > > >
> > > > > On Tue, Apr 18, 2023 at 03:45:53PM -0700, Avichal Rakesh wrote:
> > > > > > I see, and I think I understand Greg's previous comment better as
> > > > > > well: The UVC driver isn't falling behind on the video stream, it is
> > > > > > falling behind the usb controller's monotonic isoc stream.
> > > > > >
> > > > > > From what I can see, this leaves us in an interesting place: UVC
> > > > > > allows the host to configure the camera's output resolution and fps,
> > > > > > which effectively controls how fast the camera is generating data.
> > > > > > This is at odds with the UVC gadget driver, which currently packs each
> > > > > > video frame into as few usb_requests as possible (using the full
> > > > > > available size in usb_requests). Effectively, the UVC gadget driver
> > > > > > attempts to use the "full" bandwidth of isoc transfers even when the
> > > > > > camera isn't generating data fast enough. For example, in my
> > > > > > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > > > > > of the amount of data the camera would generate in a second. This 22kB
> > > > > > is split into 8 usb_requests which is about 1/1000 the number of
> > > > > > requests UVC driver needs to generate per second to prevent isoc
> > > > > > failures (assuming 125us monotonic uframes). Assuming some fudge
> > > > > > factor from the simplifications in your explanation gives the uvc
> > > > > > driver some extra leeway with request queuing, we're still roughly two
> > > > > > order of magnitudes out of sync. Even with perfect 'complete'
> > > > > > callbacks and video frame encodings, an underrun seems inevitable.
> > > > > > Data is being generated at a far slower rate than it is being
> > > > > > transferred. Does this reasoning seem valid?
> > > > > >
> > > > > > Just as a test I'll try updating the UVC driver to consume 266
> > > > > > usb_requests per video frame (~1/30 of 8000), which should be enough
> > > > > > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > > > > > time the controller queue is empty, the camera would have produced a
> > > > > > new frame. This doesn't solve the issue with latencies around callback
> > > > > > and an isoc failure might still happen, hopefully the failure
> > > > > > frequency is reduced because UVC queues enough requests per video
> > > > > > frame to not starve the controller's queue while waiting on a new
> > > > > > frame and the only way they go out of sync is from 'complete' callback
> > > > > > timings. I am assuming this has been tried before, but my LKML search
> > > > > > skills are failing and I can't find much on it.
> > > > >
> > > > > Note that there's nothing wrong with submitting a 0-length isochronous
> > > > > transfer.  If there's no data left but you still need to send
> > > > > _something_ in order to fill out the remaining slots in the controller's
> > > > > schedule, this is a good way to do it.
> > > > >
> > > > Oh, this is very good to know, thank you!! We just need to reach a
> > > > steady state of UVC queuing enough requests monotonically (even if
> > > > they are empty), and the usb controller calling the 'complete'
> > > > callback to give it more requests to queue. Although I wonder how the
> > > > host's UVC driver would interpret the zero length packets, if it would
> > > > even care.
> > >
> > > By the usb spec, for IN direction, if there's no data available and the
> > > host requests for data, then the device will send a zero-length data
> > > packet. This is what the dwc3 controller will do. So regardless whether
> > > you prepare and queue a 0-length request or not, the host will receive
> > > it.
> > >
> > If this is the case: the usb controller sends a 0 length packet to the
> > host when uvc gadget driver doesn't queue anything, what stops the
> > controller from assuming that the usb_request queued by a gadget
> > driver is always for the next available uframe, and not for one in the
> > past? This is effectively the "always reschedule" suggestion you made
> > before but as a default instead of specific to uvc. Are there cases
> > where we would want to tell the gadget driver that it fell behind?
> > Apologies if I am missing something fundamental here, it feels like I
> > am :(.
>
> No, the controller doesn't assume that. It won't queue for the future
> uframe if it's stale. The UVC gadget driver will need to keep feeding
> requests until it catches up to the current uframe. Remember that the
> isoc data is time sensitive, if it's not gone out at a specific uframe,
> then it's dropped.
>
> The controller driver can end the current isoc stream and reschedule new
> requests for the future uframe so that they won't be considered "stale".
>
Oh I see, the hardware/controller doesn't make that assumption, and to
mimic that behavior the driver will have to detect empty queues and
reschedule requests which runs into its own timing issues with the
callbacks. I think I understand!

- Avi.
Alan Stern April 20, 2023, 1:20 a.m. UTC | #18
On Wed, Apr 19, 2023 at 09:49:35PM +0000, Thinh Nguyen wrote:
> By the usb spec, for IN direction, if there's no data available and the
> host requests for data, then the device will send a zero-length data
> packet.

I'm not aware of any such requirement in the USB-2 spec.  The closest I 
can find is in section 5.6.4:

	An isochronous IN endpoint must return a zero-length packet 
	whenever data is requested at a faster interval than the 
	specified interval and data is not available.

But this specifically refers only to situations where the host asks for 
isochonous data more frequently than the endpoint descriptor's bInterval 
describes.

>  This is what the dwc3 controller will do. So regardless whether
> you prepare and queue a 0-length request or not, the host will receive
> it.

Even so, whether the function driver submits these 0-length isochronous 
requests makes a difference in terms of filling the slots in the 
schedule and preventing later requests from becoming stale.

Alan Stern
Thinh Nguyen April 20, 2023, 2:31 a.m. UTC | #19
On Wed, Apr 19, 2023, Alan Stern wrote:
> On Wed, Apr 19, 2023 at 09:49:35PM +0000, Thinh Nguyen wrote:
> > By the usb spec, for IN direction, if there's no data available and the
> > host requests for data, then the device will send a zero-length data
> > packet.
> 
> I'm not aware of any such requirement in the USB-2 spec.  The closest I 
> can find is in section 5.6.4:
> 
> 	An isochronous IN endpoint must return a zero-length packet 
> 	whenever data is requested at a faster interval than the 
> 	specified interval and data is not available.
> 
> But this specifically refers only to situations where the host asks for 
> isochonous data more frequently than the endpoint descriptor's bInterval 
> describes.

This is from usb 3.2 section 4.4.8.2:

	Note, if an endpoint has no isochronous data to transmit when
	accessed by the host, it shall send a zero length packet in
	response to the request for data.

> 
> >  This is what the dwc3 controller will do. So regardless whether
> > you prepare and queue a 0-length request or not, the host will receive
> > it.
> 
> Even so, whether the function driver submits these 0-length isochronous 
> requests makes a difference in terms of filling the slots in the 
> schedule and preventing later requests from becoming stale.
> 

That's not my point. Avi concern was how the host may interpret 0-length
packet. My point was to note that it should behave the same as before.
Because even without sending 0-length requests, the controller would
already respond with 0-length packet already.

In fact, that's what the UVC driver should be doing, by maintaining the
request queue with dummy requests as suggested in the beginning.

Perhaps there was some misunderstanding. Sending 0-length request you
suggested is a perfectly good suggestion and UVC gadget driver should do
that instead of relying on the workaround in the dwc3 driver.

Thanks,
Thinh
Alan Stern April 20, 2023, 2:31 p.m. UTC | #20
On Thu, Apr 20, 2023 at 02:31:25AM +0000, Thinh Nguyen wrote:
> On Wed, Apr 19, 2023, Alan Stern wrote:
> > On Wed, Apr 19, 2023 at 09:49:35PM +0000, Thinh Nguyen wrote:
> > > By the usb spec, for IN direction, if there's no data available and the
> > > host requests for data, then the device will send a zero-length data
> > > packet.
> > 
> > I'm not aware of any such requirement in the USB-2 spec.  The closest I 
> > can find is in section 5.6.4:
> > 
> > 	An isochronous IN endpoint must return a zero-length packet 
> > 	whenever data is requested at a faster interval than the 
> > 	specified interval and data is not available.
> > 
> > But this specifically refers only to situations where the host asks for 
> > isochonous data more frequently than the endpoint descriptor's bInterval 
> > describes.
> 
> This is from usb 3.2 section 4.4.8.2:
> 
> 	Note, if an endpoint has no isochronous data to transmit when
> 	accessed by the host, it shall send a zero length packet in
> 	response to the request for data.

Ah, but chapter 4 in the USB-3.2 spec describes only the SuperSpeed bus, 
as mentioned in the first paragraph of section 4.1.  So the constraint 
about sending 0-length isochronous packets when no data is available 
applies only to SuperSpeed connections.  If a gadget is connected at 
USB-2 speed, the constraint doesn't apply.

(And in fact, no matter what the connection speed is, there's always a 
possibility that the first packet sent by the host to begin an 
isochronous transfer might get lost or corrupted, in which case the 
device would not send a reply in any case.)

> > >  This is what the dwc3 controller will do. So regardless whether
> > > you prepare and queue a 0-length request or not, the host will receive
> > > it.
> > 
> > Even so, whether the function driver submits these 0-length isochronous 
> > requests makes a difference in terms of filling the slots in the 
> > schedule and preventing later requests from becoming stale.
> > 
> 
> That's not my point. Avi concern was how the host may interpret 0-length
> packet. My point was to note that it should behave the same as before.
> Because even without sending 0-length requests, the controller would
> already respond with 0-length packet already.

It would be good to confirm the reasoning by checking the source code 
for the UVC host driver.  But I expect you are right.

> In fact, that's what the UVC driver should be doing, by maintaining the
> request queue with dummy requests as suggested in the beginning.
> 
> Perhaps there was some misunderstanding. Sending 0-length request you
> suggested is a perfectly good suggestion and UVC gadget driver should do
> that instead of relying on the workaround in the dwc3 driver.

Agreed.

Alan Stern
Laurent Pinchart April 20, 2023, 5:05 p.m. UTC | #21
On Thu, Apr 20, 2023 at 10:31:24AM -0400, Alan Stern wrote:
> On Thu, Apr 20, 2023 at 02:31:25AM +0000, Thinh Nguyen wrote:
> > On Wed, Apr 19, 2023, Alan Stern wrote:
> > > On Wed, Apr 19, 2023 at 09:49:35PM +0000, Thinh Nguyen wrote:
> > > > By the usb spec, for IN direction, if there's no data available and the
> > > > host requests for data, then the device will send a zero-length data
> > > > packet.
> > > 
> > > I'm not aware of any such requirement in the USB-2 spec.  The closest I 
> > > can find is in section 5.6.4:
> > > 
> > > 	An isochronous IN endpoint must return a zero-length packet 
> > > 	whenever data is requested at a faster interval than the 
> > > 	specified interval and data is not available.
> > > 
> > > But this specifically refers only to situations where the host asks for 
> > > isochonous data more frequently than the endpoint descriptor's bInterval 
> > > describes.
> > 
> > This is from usb 3.2 section 4.4.8.2:
> > 
> > 	Note, if an endpoint has no isochronous data to transmit when
> > 	accessed by the host, it shall send a zero length packet in
> > 	response to the request for data.
> 
> Ah, but chapter 4 in the USB-3.2 spec describes only the SuperSpeed bus, 
> as mentioned in the first paragraph of section 4.1.  So the constraint 
> about sending 0-length isochronous packets when no data is available 
> applies only to SuperSpeed connections.  If a gadget is connected at 
> USB-2 speed, the constraint doesn't apply.
> 
> (And in fact, no matter what the connection speed is, there's always a 
> possibility that the first packet sent by the host to begin an 
> isochronous transfer might get lost or corrupted, in which case the 
> device would not send a reply in any case.)
> 
> > > >  This is what the dwc3 controller will do. So regardless whether
> > > > you prepare and queue a 0-length request or not, the host will receive
> > > > it.
> > > 
> > > Even so, whether the function driver submits these 0-length isochronous 
> > > requests makes a difference in terms of filling the slots in the 
> > > schedule and preventing later requests from becoming stale.
> > 
> > That's not my point. Avi concern was how the host may interpret 0-length
> > packet. My point was to note that it should behave the same as before.
> > Because even without sending 0-length requests, the controller would
> > already respond with 0-length packet already.
> 
> It would be good to confirm the reasoning by checking the source code 
> for the UVC host driver.  But I expect you are right.

The uvcvideo host driver should be fine. An isochronous frame descriptor
with actual_length set to 0 will cause the packet to be ignored. The
uvc_video_decode_isoc() function loops over all packets, and calls
uvc_video_decode_start() which starts with

	if (len < 2 || data[0] < 2 || data[0] > len) {
		stream->stats.frame.nb_invalid++;
		return -EINVAL;
	}

The -EINVAL return value causes the caller to ignore the packet.

We probably want to avoid increasing the counter of invalid packets
though, but the counter is used for debug purpose only, so it doesn't
affect operation negatively.

> > In fact, that's what the UVC driver should be doing, by maintaining the
> > request queue with dummy requests as suggested in the beginning.
> > 
> > Perhaps there was some misunderstanding. Sending 0-length request you
> > suggested is a perfectly good suggestion and UVC gadget driver should do
> > that instead of relying on the workaround in the dwc3 driver.
> 
> Agreed.
Laurent Pinchart April 20, 2023, 5:20 p.m. UTC | #22
Hi Avichal,

First of all, thank you for looking into the problem and initiating this
discussion. The whole mail thread was very helpful, Thanks to Thinh and
Alan as well for all the help.

On Tue, Apr 18, 2023 at 10:26:11PM -0700, Avichal Rakesh wrote:
> On Tue, Apr 18, 2023 at 6:07 PM Alan Stern wrote:
> > On Tue, Apr 18, 2023 at 03:45:53PM -0700, Avichal Rakesh wrote:
> > > I see, and I think I understand Greg's previous comment better as
> > > well: The UVC driver isn't falling behind on the video stream, it is
> > > falling behind the usb controller's monotonic isoc stream.
> > >
> > > From what I can see, this leaves us in an interesting place: UVC
> > > allows the host to configure the camera's output resolution and fps,
> > > which effectively controls how fast the camera is generating data.
> > > This is at odds with the UVC gadget driver, which currently packs each
> > > video frame into as few usb_requests as possible (using the full
> > > available size in usb_requests). Effectively, the UVC gadget driver
> > > attempts to use the "full" bandwidth of isoc transfers even when the
> > > camera isn't generating data fast enough. For example, in my
> > > observations: 1 video frame is ~22kB. At 30fps, this represents 1/30
> > > of the amount of data the camera would generate in a second. This 22kB
> > > is split into 8 usb_requests which is about 1/1000 the number of
> > > requests UVC driver needs to generate per second to prevent isoc
> > > failures (assuming 125us monotonic uframes). Assuming some fudge
> > > factor from the simplifications in your explanation gives the uvc
> > > driver some extra leeway with request queuing, we're still roughly two
> > > order of magnitudes out of sync. Even with perfect 'complete'
> > > callbacks and video frame encodings, an underrun seems inevitable.
> > > Data is being generated at a far slower rate than it is being
> > > transferred. Does this reasoning seem valid?
> > >
> > > Just as a test I'll try updating the UVC driver to consume 266
> > > usb_requests per video frame (~1/30 of 8000), which should be enough
> > > to keep the usb controller queue occupied for ~1/30s. Ideally, by the
> > > time the controller queue is empty, the camera would have produced a
> > > new frame. This doesn't solve the issue with latencies around callback
> > > and an isoc failure might still happen, hopefully the failure
> > > frequency is reduced because UVC queues enough requests per video
> > > frame to not starve the controller's queue while waiting on a new
> > > frame and the only way they go out of sync is from 'complete' callback
> > > timings. I am assuming this has been tried before, but my LKML search
> > > skills are failing and I can't find much on it.
> >
> > Note that there's nothing wrong with submitting a 0-length isochronous
> > transfer.  If there's no data left but you still need to send
> > _something_ in order to fill out the remaining slots in the controller's
> > schedule, this is a good way to do it.
>
> Oh, this is very good to know, thank you!! We just need to reach a
> steady state of UVC queuing enough requests monotonically (even if
> they are empty), and the usb controller calling the 'complete'
> callback to give it more requests to queue. Although I wonder how the
> host's UVC driver would interpret the zero length packets, if it would
> even care.
> 
> I am unfortunately being pulled into some other work for the next few
> days, but I will try out both: splitting one frame into many many
> requests and just sending 0 length requests, and see what happens on
> the host. Will report back with what I find.

I'm looking forward to this :-)

> Any other insights are
> welcome. I want to fix this problem for good if possible, and am happy
> to try out whatever it takes!

As far as I understand, we have two ways forward here to avoid running
out of requests to send: sending data as quickly as possible (maximizing
the number of bytes sent in each packet) and filling up with 0-length
requests in-between, and spreading the data across packets. I'll call
the first one burst mode for lack of a better term.

Both mechanisms require a form of dynamic rate adaptation, monitoring
the consumption of bytes or packets by the UDC to decide how to fill the
next packets. In non-burst mode, we may still need to insert zero-length
packets if we really run out of data (for instance in case of a V4L2
buffer queue underrun), so the burst mode implementation may be simpler.

On the host side, both option should work, the uvcvideo driver shouldn't
have any problem with zero-length packets. As the driver offloads memcpy
operations to a work queue, I believe burst mode would be more efficient
on the host side as it would lower the number of times the work queue
needs to be woken up, and overall reduce the work performed in interrupt
context. If burst mode doesn't negatively impact the gadget side (from a
CPU time or a power consumption point of view), it would thus be
preferred.
Thinh Nguyen April 20, 2023, 8:22 p.m. UTC | #23
On Thu, Apr 20, 2023, Alan Stern wrote:
> On Thu, Apr 20, 2023 at 02:31:25AM +0000, Thinh Nguyen wrote:
> > On Wed, Apr 19, 2023, Alan Stern wrote:
> > > On Wed, Apr 19, 2023 at 09:49:35PM +0000, Thinh Nguyen wrote:
> > > > By the usb spec, for IN direction, if there's no data available and the
> > > > host requests for data, then the device will send a zero-length data
> > > > packet.
> > > 
> > > I'm not aware of any such requirement in the USB-2 spec.  The closest I 
> > > can find is in section 5.6.4:
> > > 
> > > 	An isochronous IN endpoint must return a zero-length packet 
> > > 	whenever data is requested at a faster interval than the 
> > > 	specified interval and data is not available.
> > > 
> > > But this specifically refers only to situations where the host asks for 
> > > isochonous data more frequently than the endpoint descriptor's bInterval 
> > > describes.
> > 
> > This is from usb 3.2 section 4.4.8.2:
> > 
> > 	Note, if an endpoint has no isochronous data to transmit when
> > 	accessed by the host, it shall send a zero length packet in
> > 	response to the request for data.
> 
> Ah, but chapter 4 in the USB-3.2 spec describes only the SuperSpeed bus, 
> as mentioned in the first paragraph of section 4.1.  So the constraint 
> about sending 0-length isochronous packets when no data is available 
> applies only to SuperSpeed connections.  If a gadget is connected at 
> USB-2 speed, the constraint doesn't apply.

Right, this may not be a requirement for usb2 speeds. Regarding our dwc3
controller, it keeps the same behavior for both usb2 and usb3x speeds,
which is to automatically respond with a 0-length packet for IN isoc
endpoint if the host requests and no data is available.

BR,
Thinh

> 
> (And in fact, no matter what the connection speed is, there's always a 
> possibility that the first packet sent by the host to begin an 
> isochronous transfer might get lost or corrupted, in which case the 
> device would not send a reply in any case.)
> 
> > > >  This is what the dwc3 controller will do. So regardless whether
> > > > you prepare and queue a 0-length request or not, the host will receive
> > > > it.
> > > 
> > > Even so, whether the function driver submits these 0-length isochronous 
> > > requests makes a difference in terms of filling the slots in the 
> > > schedule and preventing later requests from becoming stale.
> > > 
> > 
> > That's not my point. Avi concern was how the host may interpret 0-length
> > packet. My point was to note that it should behave the same as before.
> > Because even without sending 0-length requests, the controller would
> > already respond with 0-length packet already.
> 
> It would be good to confirm the reasoning by checking the source code 
> for the UVC host driver.  But I expect you are right.
> 
> > In fact, that's what the UVC driver should be doing, by maintaining the
> > request queue with dummy requests as suggested in the beginning.
> > 
> > Perhaps there was some misunderstanding. Sending 0-length request you
> > suggested is a perfectly good suggestion and UVC gadget driver should do
> > that instead of relying on the workaround in the dwc3 driver.
> 
> Agreed.
> 
> Alan Stern
Avichal Rakesh May 5, 2023, 10:39 p.m. UTC | #24
On 4/20/23 10:20, Laurent Pinchart wrote:
> 
> As far as I understand, we have two ways forward here to avoid running
> out of requests to send: sending data as quickly as possible (maximizing
> the number of bytes sent in each packet) and filling up with 0-length
> requests in-between, and spreading the data across packets. I'll call
> the first one burst mode for lack of a better term.
> 

Hey all,

Apologies for the late reply. My not-so-long work took longer than expected.
I tried the 2 (technically 3, but I'll go over it in a bit) approaches we had
discussed above and the "burst" approach works pretty well. I'll attach that
to this email, and send another email out with the other patch.

The first thing I tried was to split one video frame over 266 frames, without
changing the number of requests allocated. And it works! However, as Laurent
mentioned, it does add a fair amount of bookkeeping to split a video frame into
the required number of requests. I also hardcoded the number 266 from our
discussion, but I am not sure how to figure out that number dynamically. 266
also didn't work if the host started sending frames at more than 30fps :/, so
our dynamic calculation would need to take camera's real output fps into
account, which as far as I can tell is not known to the UVC driver.

With those issues I tried what Laurent called the "burst" approach
(attached below), i.e. send the video frames in as few packets as possible,
and then queue up 0 length packets to keep the ISOC queue happy. This approach
works perfectly as far as I can tell. Locally I tried with a Linux, Window,
and MacOS host with no frame drops or ISOC failures on any of them!

In the current patch, UVC gadget driver keeps the ISOC cadence by effectively
maintaining a back-pressure on the USB controller (at least to the best of its
capabilities). Any usb_request available to the UVC gadget gets immediately
queued back to the USB controller. If a video frame is available, the frame is
encoded, if not, the length is set to 0. The idea being that the host's polling
and the controller's 'complete' callback will result in a somewhat consistent
cadence for the uvc driver after the initial burst of packets.

However this does mean that at worst, the new video frames are up to 63
usb_requests behind, but assuming a 125us per usb_request, that amounts to ~8ms
latency at the worst, which seems acceptable?

Another concern I had was about how the back-pressure might affect other USB
controllers. DWC3 doesn't seem to be sweating and in local testing I saw no
EXDEVs or frame drops other than when the stream was being transitioned from
one configuration to another, but I don't know how this interaction might go for
other USB controllers. Would you have any insights into non-DWC3 controllers,
and if they might be negatively affected by having up to 64 requests queued at 
once?

Here's the patch, it doesn't currently handle bulk transfers, but I can upload a
formal patch with it if this approach seems acceptable!

---
 drivers/usb/gadget/function/uvc_video.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index dd1c6b2ca7c6..d7ad278709d4 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -386,6 +386,7 @@ static void uvcg_video_pump(struct work_struct *work)
 	struct uvc_buffer *buf;
 	unsigned long flags;
 	int ret;
+	bool buf_int;

 	while (video->ep->enabled) {
 		/*
@@ -408,20 +409,29 @@ static void uvcg_video_pump(struct work_struct *work)
 		 */
 		spin_lock_irqsave(&queue->irqlock, flags);
 		buf = uvcg_queue_head(queue);
-		if (buf == NULL) {
+
+		if (buf != NULL) {
+			// Encode video frame if we have one.
+			video->encode(req, video, buf);
+			// Always interrupt for the last usb_request of a video frame
+			buf_int = buf->state == UVC_BUF_STATE_DONE;
+		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED)) {
+			// No video frame, but the queue is still connected
+			// Send an empty USB request to keep ISOC contract...
+			req->length = 0;
+			buf_int = false;
+		} else {
+			// queue no longer connected. Stop processing further.
 			spin_unlock_irqrestore(&queue->irqlock, flags);
 			break;
 		}

-		video->encode(req, video, buf);
-
 		/*
 		 * With usb3 we have more requests. This will decrease the
 		 * interrupt load to a quarter but also catches the corner
 		 * cases, which needs to be handled.
 		 */
-		if (list_empty(&video->req_free) ||
-		    buf->state == UVC_BUF_STATE_DONE ||
+		if (list_empty(&video->req_free) || buf_int ||
 		    !(video->req_int_count %
 		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
 			video->req_int_count = 0;
@@ -441,8 +451,7 @@ static void uvcg_video_pump(struct work_struct *work)

 		/* Endpoint now owns the request */
 		req = NULL;
-		if (buf->state != UVC_BUF_STATE_DONE)
-			video->req_int_count++;
+		video->req_int_count++;
 	}

 	if (!req)
@@ -527,4 +536,3 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 			V4L2_BUF_TYPE_VIDEO_OUTPUT, &video->mutex);
 	return 0;
 }
-
--
Avichal Rakesh May 5, 2023, 10:41 p.m. UTC | #25
On 5/5/23 15:39, Avichal Rakesh wrote:
> 
> 
> On 4/20/23 10:20, Laurent Pinchart wrote:
>>
>> As far as I understand, we have two ways forward here to avoid running
>> out of requests to send: sending data as quickly as possible (maximizing
>> the number of bytes sent in each packet) and filling up with 0-length
>> requests in-between, and spreading the data across packets. I'll call
>> the first one burst mode for lack of a better term.
>>
> 
> Hey all,
> 
> Apologies for the late reply. My not-so-long work took longer than expected.
> I tried the 2 (technically 3, but I'll go over it in a bit) approaches we had
> discussed above and the "burst" approach works pretty well. I'll attach that
> to this email, and send another email out with the other patch.
> 
Here's the second test. This patch does the complete opposite of the previous one
and is based on Thinh mentioning that there was some logic built into DWC3 to
reschedule requests if the uvc driver does not queue a request for some time.

It queues usb_requests in short bursts and waits until all the queued requests
have been returned before queueing up another burst of usb_requests. This was
disastrous as it saw far more ISOC failures than before.

That likely stemmed from timing between the last request being returned and the
new burst of requests being queued, but no amount of arbitrary waiting
between last return and first queue completely eliminated the ISOC failures.

Patch, if anyone is curious:
---
 drivers/usb/gadget/function/uvc.h       |  1 +
 drivers/usb/gadget/function/uvc_video.c | 13 +++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index 100475b1363e..e29821a80a82 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -102,6 +102,7 @@ struct uvc_video {

 	/* Requests */
 	unsigned int req_size;
+	unsigned int num_free_req;
 	struct uvc_request *ureq;
 	struct list_head req_free;
 	spinlock_t req_lock;
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index dd1c6b2ca7c6..e3aad9c3f3ca 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -255,6 +255,7 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	struct uvc_video_queue *queue = &video->queue;
 	struct uvc_device *uvc = video->uvc;
 	unsigned long flags;
+	bool should_pump = false;

 	switch (req->status) {
 	case 0:
@@ -284,9 +285,12 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)

 	spin_lock_irqsave(&video->req_lock, flags);
 	list_add_tail(&req->list, &video->req_free);
+	video->num_free_req++;
+	// Only pump more requests if we have all our usb_requests back.
+	should_pump = video->num_free_req == video->uvc_num_requests;
 	spin_unlock_irqrestore(&video->req_lock, flags);

-	if (uvc->state == UVC_STATE_STREAMING)
+	if (uvc->state == UVC_STATE_STREAMING && should_pump)
 		queue_work(video->async_wq, &video->pump);
 }

@@ -316,6 +320,7 @@ uvc_video_free_requests(struct uvc_video *video)

 	INIT_LIST_HEAD(&video->req_free);
 	video->req_size = 0;
+	video->num_free_req = 0;
 	return 0;
 }

@@ -332,10 +337,12 @@ uvc_video_alloc_requests(struct uvc_video *video)
 		 * max_t(unsigned int, video->ep->maxburst, 1)
 		 * (video->ep->mult);

+	video->num_free_req = 0;
 	video->ureq = kcalloc(video->uvc_num_requests, sizeof(struct uvc_request), GFP_KERNEL);
 	if (video->ureq == NULL)
 		return -ENOMEM;

+	video->num_free_req = 0;
 	for (i = 0; i < video->uvc_num_requests; ++i) {
 		video->ureq[i].req_buffer = kmalloc(req_size, GFP_KERNEL);
 		if (video->ureq[i].req_buffer == NULL)
@@ -357,6 +364,7 @@ uvc_video_alloc_requests(struct uvc_video *video)
 		sg_alloc_table(&video->ureq[i].sgt,
 			       DIV_ROUND_UP(req_size - UVCG_REQUEST_HEADER_LEN,
 					    PAGE_SIZE) + 2, GFP_KERNEL);
+		video->num_free_req++;
 	}

 	video->req_size = req_size;
@@ -400,6 +408,7 @@ static void uvcg_video_pump(struct work_struct *work)
 		req = list_first_entry(&video->req_free, struct usb_request,
 					list);
 		list_del(&req->list);
+		video->num_free_req--;
 		spin_unlock_irqrestore(&video->req_lock, flags);

 		/*
@@ -450,6 +459,7 @@ static void uvcg_video_pump(struct work_struct *work)

 	spin_lock_irqsave(&video->req_lock, flags);
 	list_add_tail(&req->list, &video->req_free);
+	video->num_free_req++;
 	spin_unlock_irqrestore(&video->req_lock, flags);
 	return;
 }
@@ -527,4 +537,3 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 			V4L2_BUF_TYPE_VIDEO_OUTPUT, &video->mutex);
 	return 0;
 }
-
--
Thinh Nguyen May 5, 2023, 11:58 p.m. UTC | #26
On Fri, May 05, 2023, Avichal Rakesh wrote:
> 
> 
> On 5/5/23 15:39, Avichal Rakesh wrote:
> > 
> > 
> > On 4/20/23 10:20, Laurent Pinchart wrote:
> >>
> >> As far as I understand, we have two ways forward here to avoid running
> >> out of requests to send: sending data as quickly as possible (maximizing
> >> the number of bytes sent in each packet) and filling up with 0-length
> >> requests in-between, and spreading the data across packets. I'll call
> >> the first one burst mode for lack of a better term.
> >>
> > 
> > Hey all,
> > 
> > Apologies for the late reply. My not-so-long work took longer than expected.
> > I tried the 2 (technically 3, but I'll go over it in a bit) approaches we had
> > discussed above and the "burst" approach works pretty well. I'll attach that
> > to this email, and send another email out with the other patch.
> > 
> Here's the second test. This patch does the complete opposite of the previous one
> and is based on Thinh mentioning that there was some logic built into DWC3 to
> reschedule requests if the uvc driver does not queue a request for some time.
> 
> It queues usb_requests in short bursts and waits until all the queued requests
> have been returned before queueing up another burst of usb_requests. This was
> disastrous as it saw far more ISOC failures than before.
> 
> That likely stemmed from timing between the last request being returned and the
> new burst of requests being queued, but no amount of arbitrary waiting
> between last return and first queue completely eliminated the ISOC failures.
> 

Right, if the "last request" is set with no_interrupt, then the dwc3's
rescheduling workaround won't kick in, which will fail regardless the
wait time before the next burst. This workaround logic in dwc3 is very
basic, and it does not handle every scenario. We can improve on this
logic, but as noted we should not rely on it.

BR,
Thinh
Thinh Nguyen May 6, 2023, 2:49 a.m. UTC | #27
On Fri, May 05, 2023, Avichal Rakesh wrote:
> 
> 
> On 4/20/23 10:20, Laurent Pinchart wrote:
> > 
> > As far as I understand, we have two ways forward here to avoid running
> > out of requests to send: sending data as quickly as possible (maximizing
> > the number of bytes sent in each packet) and filling up with 0-length
> > requests in-between, and spreading the data across packets. I'll call
> > the first one burst mode for lack of a better term.
> > 
> 
> Hey all,
> 
> Apologies for the late reply. My not-so-long work took longer than expected.
> I tried the 2 (technically 3, but I'll go over it in a bit) approaches we had
> discussed above and the "burst" approach works pretty well. I'll attach that
> to this email, and send another email out with the other patch.
> 
> The first thing I tried was to split one video frame over 266 frames, without
> changing the number of requests allocated. And it works! However, as Laurent
> mentioned, it does add a fair amount of bookkeeping to split a video frame into
> the required number of requests. I also hardcoded the number 266 from our
> discussion, but I am not sure how to figure out that number dynamically. 266
> also didn't work if the host started sending frames at more than 30fps :/, so
> our dynamic calculation would need to take camera's real output fps into
> account, which as far as I can tell is not known to the UVC driver.
> 
> With those issues I tried what Laurent called the "burst" approach
> (attached below), i.e. send the video frames in as few packets as possible,
> and then queue up 0 length packets to keep the ISOC queue happy. This approach
> works perfectly as far as I can tell. Locally I tried with a Linux, Window,
> and MacOS host with no frame drops or ISOC failures on any of them!

That's great!

> 
> In the current patch, UVC gadget driver keeps the ISOC cadence by effectively
> maintaining a back-pressure on the USB controller (at least to the best of its
> capabilities). Any usb_request available to the UVC gadget gets immediately
> queued back to the USB controller. If a video frame is available, the frame is
> encoded, if not, the length is set to 0. The idea being that the host's polling

This is what the dwc3 controller expects -- keeping up with the data to
the negotiated periodic interval. Thanks for the tests.

BR,
Thinh

> and the controller's 'complete' callback will result in a somewhat consistent
> cadence for the uvc driver after the initial burst of packets.
> 
> However this does mean that at worst, the new video frames are up to 63
> usb_requests behind, but assuming a 125us per usb_request, that amounts to ~8ms
> latency at the worst, which seems acceptable?
> 
> Another concern I had was about how the back-pressure might affect other USB
> controllers. DWC3 doesn't seem to be sweating and in local testing I saw no
> EXDEVs or frame drops other than when the stream was being transitioned from
> one configuration to another, but I don't know how this interaction might go for
> other USB controllers. Would you have any insights into non-DWC3 controllers,
> and if they might be negatively affected by having up to 64 requests queued at 
> once?
> 
> Here's the patch, it doesn't currently handle bulk transfers, but I can upload a
> formal patch with it if this approach seems acceptable!
> 
> ---
>  drivers/usb/gadget/function/uvc_video.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
>
Laurent Pinchart May 6, 2023, 12:53 p.m. UTC | #28
Hi Avichal,

(with a question for Dan below)

On Fri, May 05, 2023 at 03:39:41PM -0700, Avichal Rakesh wrote:
> On 4/20/23 10:20, Laurent Pinchart wrote:
> > 
> > As far as I understand, we have two ways forward here to avoid running
> > out of requests to send: sending data as quickly as possible (maximizing
> > the number of bytes sent in each packet) and filling up with 0-length
> > requests in-between, and spreading the data across packets. I'll call
> > the first one burst mode for lack of a better term.
> 
> Hey all,
> 
> Apologies for the late reply. My not-so-long work took longer than expected.

No worries, I know how it feels :-)

> I tried the 2 (technically 3, but I'll go over it in a bit) approaches we had
> discussed above and the "burst" approach works pretty well. I'll attach that
> to this email, and send another email out with the other patch.
> 
> The first thing I tried was to split one video frame over 266 frames, without
> changing the number of requests allocated. And it works! However, as Laurent
> mentioned, it does add a fair amount of bookkeeping to split a video frame into
> the required number of requests. I also hardcoded the number 266 from our
> discussion, but I am not sure how to figure out that number dynamically. 266
> also didn't work if the host started sending frames at more than 30fps :/, so
> our dynamic calculation would need to take camera's real output fps into
> account, which as far as I can tell is not known to the UVC driver.

It would probably need to monitor how full the request queue is, and
adapt the number of bytes it queues in each request accordingly. That's
indeed quite a bit of work, for little gain compared to the option you
describe below.

> With those issues I tried what Laurent called the "burst" approach
> (attached below), i.e. send the video frames in as few packets as possible,
> and then queue up 0 length packets to keep the ISOC queue happy. This approach
> works perfectly as far as I can tell. Locally I tried with a Linux, Window,
> and MacOS host with no frame drops or ISOC failures on any of them!
> 
> In the current patch, UVC gadget driver keeps the ISOC cadence by effectively
> maintaining a back-pressure on the USB controller (at least to the best of its
> capabilities). Any usb_request available to the UVC gadget gets immediately
> queued back to the USB controller. If a video frame is available, the frame is
> encoded, if not, the length is set to 0. The idea being that the host's polling
> and the controller's 'complete' callback will result in a somewhat consistent
> cadence for the uvc driver after the initial burst of packets.
> 
> However this does mean that at worst, the new video frames are up to 63
> usb_requests behind, but assuming a 125us per usb_request, that amounts to ~8ms
> latency at the worst, which seems acceptable?

There's a trade off between latency and the risk of underruns. We could
decrease the number of queued requests to lower the latency, as long as
we ensure the margin is high enough to avoid underruns in higher load
conditions. We could also do so only when queuing 0-size requests, and
queue the data in burst mode with a higher number of requests.

> Another concern I had was about how the back-pressure might affect other USB
> controllers. DWC3 doesn't seem to be sweating and in local testing I saw no
> EXDEVs or frame drops other than when the stream was being transitioned from
> one configuration to another, but I don't know how this interaction might go for
> other USB controllers. Would you have any insights into non-DWC3 controllers,
> and if they might be negatively affected by having up to 64 requests queued at 
> once?

Dan, do I recall correctly you have tested uvc-gadget with dwc2 too ?
Could you test the patch below ? Testing with musb would be nice too.

> Here's the patch, it doesn't currently handle bulk transfers, but I can upload a
> formal patch with it if this approach seems acceptable!
> 
> ---
>  drivers/usb/gadget/function/uvc_video.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index dd1c6b2ca7c6..d7ad278709d4 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -386,6 +386,7 @@ static void uvcg_video_pump(struct work_struct *work)
>  	struct uvc_buffer *buf;
>  	unsigned long flags;
>  	int ret;
> +	bool buf_int;
> 
>  	while (video->ep->enabled) {
>  		/*
> @@ -408,20 +409,29 @@ static void uvcg_video_pump(struct work_struct *work)
>  		 */
>  		spin_lock_irqsave(&queue->irqlock, flags);
>  		buf = uvcg_queue_head(queue);
> -		if (buf == NULL) {
> +
> +		if (buf != NULL) {
> +			// Encode video frame if we have one.

C-style comments please.

> +			video->encode(req, video, buf);
> +			// Always interrupt for the last usb_request of a video frame
> +			buf_int = buf->state == UVC_BUF_STATE_DONE;
> +		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED)) {
> +			// No video frame, but the queue is still connected
> +			// Send an empty USB request to keep ISOC contract...
> +			req->length = 0;
> +			buf_int = false;
> +		} else {
> +			// queue no longer connected. Stop processing further.
>  			spin_unlock_irqrestore(&queue->irqlock, flags);
>  			break;
>  		}
> 
> -		video->encode(req, video, buf);
> -
>  		/*
>  		 * With usb3 we have more requests. This will decrease the
>  		 * interrupt load to a quarter but also catches the corner
>  		 * cases, which needs to be handled.
>  		 */
> -		if (list_empty(&video->req_free) ||
> -		    buf->state == UVC_BUF_STATE_DONE ||
> +		if (list_empty(&video->req_free) || buf_int ||
>  		    !(video->req_int_count %
>  		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>  			video->req_int_count = 0;
> @@ -441,8 +451,7 @@ static void uvcg_video_pump(struct work_struct *work)
> 
>  		/* Endpoint now owns the request */
>  		req = NULL;
> -		if (buf->state != UVC_BUF_STATE_DONE)
> -			video->req_int_count++;
> +		video->req_int_count++;
>  	}
> 
>  	if (!req)
> @@ -527,4 +536,3 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>  			V4L2_BUF_TYPE_VIDEO_OUTPUT, &video->mutex);
>  	return 0;
>  }
> -
Avichal Rakesh May 8, 2023, 11:49 p.m. UTC | #29
On Sat, May 6, 2023 at 5:53 AM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:

> > The first thing I tried was to split one video frame over 266 frames, without
> > changing the number of requests allocated. And it works! However, as Laurent
> > mentioned, it does add a fair amount of bookkeeping to split a video frame into
> > the required number of requests. I also hardcoded the number 266 from our
> > discussion, but I am not sure how to figure out that number dynamically. 266
> > also didn't work if the host started sending frames at more than 30fps :/, so
> > our dynamic calculation would need to take camera's real output fps into
> > account, which as far as I can tell is not known to the UVC driver.
>
> It would probably need to monitor how full the request queue is, and
> adapt the number of bytes it queues in each request accordingly. That's
> indeed quite a bit of work, for little gain compared to the option you
> describe below.
>
Agreed, especially if the hosts already handle 0 length packets.
As long as the usb controllers can keep up, the burst approach seems
more reasonable.

> > With those issues I tried what Laurent called the "burst" approach
> > (attached below), i.e. send the video frames in as few packets as possible,
> > and then queue up 0 length packets to keep the ISOC queue happy. This approach
> > works perfectly as far as I can tell. Locally I tried with a Linux, Window,
> > and MacOS host with no frame drops or ISOC failures on any of them!
> >
> > In the current patch, UVC gadget driver keeps the ISOC cadence by effectively
> > maintaining a back-pressure on the USB controller (at least to the best of its
> > capabilities). Any usb_request available to the UVC gadget gets immediately
> > queued back to the USB controller. If a video frame is available, the frame is
> > encoded, if not, the length is set to 0. The idea being that the host's polling
> > and the controller's 'complete' callback will result in a somewhat consistent
> > cadence for the uvc driver after the initial burst of packets.
> >
> > However this does mean that at worst, the new video frames are up to 63
> > usb_requests behind, but assuming a 125us per usb_request, that amounts to ~8ms
> > latency at the worst, which seems acceptable?
>
> There's a trade off between latency and the risk of underruns. We could
> decrease the number of queued requests to lower the latency, as long as
> we ensure the margin is high enough to avoid underruns in higher load
> conditions. We could also do so only when queuing 0-size requests, and
> queue the data in burst mode with a higher number of requests.

Would 8ms of latency be considered significant? Unless the host asks
for >125fps,
that amounts to less than a frame of latency, so frames should not be dropped
by the host for being "late". Admittedly, I don't know enough about UVC usage to
say if 8ms (at worst) will be problematic for certain usages. The
hosts don't seem to
have any issues when streaming at <=60fps.

> > Another concern I had was about how the back-pressure might affect other USB
> > controllers. DWC3 doesn't seem to be sweating and in local testing I saw no
> > EXDEVs or frame drops other than when the stream was being transitioned from
> > one configuration to another, but I don't know how this interaction might go for
> > other USB controllers. Would you have any insights into non-DWC3 controllers,
> > and if they might be negatively affected by having up to 64 requests queued at
> > once?
>
> Dan, do I recall correctly you have tested uvc-gadget with dwc2 too ?
> Could you test the patch below ? Testing with musb would be nice too.
>
> > Here's the patch, it doesn't currently handle bulk transfers, but I can upload a
> > formal patch with it if this approach seems acceptable!
> >
> > ---
> >  drivers/usb/gadget/function/uvc_video.c | 24 ++++++++++++++++--------
> >  1 file changed, 16 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> > index dd1c6b2ca7c6..d7ad278709d4 100644
> > --- a/drivers/usb/gadget/function/uvc_video.c
> > +++ b/drivers/usb/gadget/function/uvc_video.c
> > @@ -386,6 +386,7 @@ static void uvcg_video_pump(struct work_struct *work)
> >       struct uvc_buffer *buf;
> >       unsigned long flags;
> >       int ret;
> > +     bool buf_int;
> >
> >       while (video->ep->enabled) {
> >               /*
> > @@ -408,20 +409,29 @@ static void uvcg_video_pump(struct work_struct *work)
> >                */
> >               spin_lock_irqsave(&queue->irqlock, flags);
> >               buf = uvcg_queue_head(queue);
> > -             if (buf == NULL) {
> > +
> > +             if (buf != NULL) {
> > +                     // Encode video frame if we have one.
>
> C-style comments please.
>
Addressed this comment and uploaded a formal patch:
https://lore.kernel.org/20230508231103.1621375-1-arakesh@google.com/
It is basically this patch with an extra flag to ensure that we don't
spam a bulk endpoint with 0-length requests.

I wrote a script to detect abrupt size changes in uvc frames on the
host. In my two hours of testing with the above patch, I've recorded
only one "short" frame.
```
[616677.453290] usb 1-6: frame 1 stats: 0/31/31 packets, 0/0/0 pts
(!early !initial), 30/31 scr, last pts/stc/sof 0/4055830784/1298
[617701.585256] usb 1-6: frame 30594 stats: 0/1/1 packets, 0/0/0 pts
(!early !initial), 0/1 scr, last pts/stc/sof 0/1677462368/2636
[617701.621240] usb 1-6: frame 30596 stats: 0/32/32 packets, 0/0/0 pts
(!early !initial), 31/32 scr, last pts/stc/sof 0/1679244656/2933
```
First log is when streaming starts (abrupt change from 0 to 31).
Frame 30594 had only 1 packet for some reason  (I couldn't capture
gadget logs at the time :/), but the stream recovered after one
skipped frame.

Will leave the script running overnight, and report back if there's
anything significant there.

- Avi.
Thinh Nguyen May 9, 2023, 12:21 a.m. UTC | #30
On Mon, May 08, 2023, Avichal Rakesh wrote:
> On Sat, May 6, 2023 at 5:53 AM Laurent Pinchart
> <laurent.pinchart@ideasonboard.com> wrote:
> 
> > > The first thing I tried was to split one video frame over 266 frames, without
> > > changing the number of requests allocated. And it works! However, as Laurent
> > > mentioned, it does add a fair amount of bookkeeping to split a video frame into
> > > the required number of requests. I also hardcoded the number 266 from our
> > > discussion, but I am not sure how to figure out that number dynamically. 266
> > > also didn't work if the host started sending frames at more than 30fps :/, so
> > > our dynamic calculation would need to take camera's real output fps into
> > > account, which as far as I can tell is not known to the UVC driver.
> >
> > It would probably need to monitor how full the request queue is, and
> > adapt the number of bytes it queues in each request accordingly. That's
> > indeed quite a bit of work, for little gain compared to the option you
> > describe below.
> >
> Agreed, especially if the hosts already handle 0 length packets.
> As long as the usb controllers can keep up, the burst approach seems
> more reasonable.
> 
> > > With those issues I tried what Laurent called the "burst" approach
> > > (attached below), i.e. send the video frames in as few packets as possible,
> > > and then queue up 0 length packets to keep the ISOC queue happy. This approach
> > > works perfectly as far as I can tell. Locally I tried with a Linux, Window,
> > > and MacOS host with no frame drops or ISOC failures on any of them!
> > >
> > > In the current patch, UVC gadget driver keeps the ISOC cadence by effectively
> > > maintaining a back-pressure on the USB controller (at least to the best of its
> > > capabilities). Any usb_request available to the UVC gadget gets immediately
> > > queued back to the USB controller. If a video frame is available, the frame is
> > > encoded, if not, the length is set to 0. The idea being that the host's polling
> > > and the controller's 'complete' callback will result in a somewhat consistent
> > > cadence for the uvc driver after the initial burst of packets.
> > >
> > > However this does mean that at worst, the new video frames are up to 63
> > > usb_requests behind, but assuming a 125us per usb_request, that amounts to ~8ms
> > > latency at the worst, which seems acceptable?
> >
> > There's a trade off between latency and the risk of underruns. We could
> > decrease the number of queued requests to lower the latency, as long as
> > we ensure the margin is high enough to avoid underruns in higher load
> > conditions. We could also do so only when queuing 0-size requests, and
> > queue the data in burst mode with a higher number of requests.
> 
> Would 8ms of latency be considered significant? Unless the host asks
> for >125fps,
> that amounts to less than a frame of latency, so frames should not be dropped
> by the host for being "late". Admittedly, I don't know enough about UVC usage to
> say if 8ms (at worst) will be problematic for certain usages. The
> hosts don't seem to
> have any issues when streaming at <=60fps.
> 

Do you only queue a single burst at a time? We don't have to queue all
63 0-length requests as a single "burst" right? We can queue multiple
smaller bursts of 0-length requests. This way, the UVC driver can be
interrupted more often, reducing the video data latency if it arrives
earlier. This keeps the total number of queued requests smaller while
ensuring that the controller isn't starved of requests (as long as this
smaller burst accounts for the system latency). The tradeoff here is
just the UVC gadget driver needs to handle request completion a little
more often. However, we are less likely to be underrun no matter the
video's fps.

Thanks,
Thinh
Avichal Rakesh May 9, 2023, 6:33 p.m. UTC | #31
On Mon, May 8, 2023 at 5:21 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
>
> On Mon, May 08, 2023, Avichal Rakesh wrote:
> > On Sat, May 6, 2023 at 5:53 AM Laurent Pinchart
> > <laurent.pinchart@ideasonboard.com> wrote:
> >
> > > > The first thing I tried was to split one video frame over 266 frames, without
> > > > changing the number of requests allocated. And it works! However, as Laurent
> > > > mentioned, it does add a fair amount of bookkeeping to split a video frame into
> > > > the required number of requests. I also hardcoded the number 266 from our
> > > > discussion, but I am not sure how to figure out that number dynamically. 266
> > > > also didn't work if the host started sending frames at more than 30fps :/, so
> > > > our dynamic calculation would need to take camera's real output fps into
> > > > account, which as far as I can tell is not known to the UVC driver.
> > >
> > > It would probably need to monitor how full the request queue is, and
> > > adapt the number of bytes it queues in each request accordingly. That's
> > > indeed quite a bit of work, for little gain compared to the option you
> > > describe below.
> > >
> > Agreed, especially if the hosts already handle 0 length packets.
> > As long as the usb controllers can keep up, the burst approach seems
> > more reasonable.
> >
> > > > With those issues I tried what Laurent called the "burst" approach
> > > > (attached below), i.e. send the video frames in as few packets as possible,
> > > > and then queue up 0 length packets to keep the ISOC queue happy. This approach
> > > > works perfectly as far as I can tell. Locally I tried with a Linux, Window,
> > > > and MacOS host with no frame drops or ISOC failures on any of them!
> > > >
> > > > In the current patch, UVC gadget driver keeps the ISOC cadence by effectively
> > > > maintaining a back-pressure on the USB controller (at least to the best of its
> > > > capabilities). Any usb_request available to the UVC gadget gets immediately
> > > > queued back to the USB controller. If a video frame is available, the frame is
> > > > encoded, if not, the length is set to 0. The idea being that the host's polling
> > > > and the controller's 'complete' callback will result in a somewhat consistent
> > > > cadence for the uvc driver after the initial burst of packets.
> > > >
> > > > However this does mean that at worst, the new video frames are up to 63
> > > > usb_requests behind, but assuming a 125us per usb_request, that amounts to ~8ms
> > > > latency at the worst, which seems acceptable?
> > >
> > > There's a trade off between latency and the risk of underruns. We could
> > > decrease the number of queued requests to lower the latency, as long as
> > > we ensure the margin is high enough to avoid underruns in higher load
> > > conditions. We could also do so only when queuing 0-size requests, and
> > > queue the data in burst mode with a higher number of requests.
> >
> > Would 8ms of latency be considered significant? Unless the host asks
> > for >125fps,
> > that amounts to less than a frame of latency, so frames should not be dropped
> > by the host for being "late". Admittedly, I don't know enough about UVC usage to
> > say if 8ms (at worst) will be problematic for certain usages. The
> > hosts don't seem to
> > have any issues when streaming at <=60fps.
> >
>
> Do you only queue a single burst at a time? We don't have to queue all
> 63 0-length requests as a single "burst" right? We can queue multiple
> smaller bursts of 0-length requests. This way, the UVC driver can be
> interrupted more often, reducing the video data latency if it arrives
> earlier. This keeps the total number of queued requests smaller while
> ensuring that the controller isn't starved of requests (as long as this
> smaller burst accounts for the system latency). The tradeoff here is
> just the UVC gadget driver needs to handle request completion a little
> more often. However, we are less likely to be underrun no matter the
> video's fps.

The patch does currently queue all 64 0-length packets from the start,
and then relies on completion callbacks to maintain a steady state. It
still sets the `no_interrupt` flag, so the completion callback
interrupts every ~16th request (at worst) which is the same as before.
Ideally, the UVC driver never sits on a video frame for longer than 16
requests, but it does keep the controller queue as full as it can,
which means the video frame could be stuck behind a bunch of 0-length
packets and could hypothetically be sent faster if we were to be
smarter with queuing the 0-length requests. I say hypothetically,
because I have been unable to capture any latency differences with and
without the patch.

I've been trying to think of some way to account for system latency
and only queue a smaller set of 0 length packets, but most of them
boil down to "hope we have enough requests queued". It would be
helpful if we could pin some numbers down that can be relied on by the
gadget driver. For example: Can 125us per request be safely assumed;
or is there a way to get information about how long a usb_request will
last for? It would be very helpful if there was a way to query the
controller's active queue, but I couldn't really find anything in the
APIs. We can't really rely on the complete callbacks because of the
"no_interrupt" flag. I'll try out some things, and get back. If you
have any ideas, I would love to hear them too!

Thank you!
Avi.
Thinh Nguyen May 9, 2023, 10:35 p.m. UTC | #32
On Tue, May 09, 2023, Avichal Rakesh wrote:
> On Mon, May 8, 2023 at 5:21 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> >
> > On Mon, May 08, 2023, Avichal Rakesh wrote:
> > > On Sat, May 6, 2023 at 5:53 AM Laurent Pinchart
> > > <laurent.pinchart@ideasonboard.com> wrote:
> > >
> > > > > The first thing I tried was to split one video frame over 266 frames, without
> > > > > changing the number of requests allocated. And it works! However, as Laurent
> > > > > mentioned, it does add a fair amount of bookkeeping to split a video frame into
> > > > > the required number of requests. I also hardcoded the number 266 from our
> > > > > discussion, but I am not sure how to figure out that number dynamically. 266
> > > > > also didn't work if the host started sending frames at more than 30fps :/, so
> > > > > our dynamic calculation would need to take camera's real output fps into
> > > > > account, which as far as I can tell is not known to the UVC driver.
> > > >
> > > > It would probably need to monitor how full the request queue is, and
> > > > adapt the number of bytes it queues in each request accordingly. That's
> > > > indeed quite a bit of work, for little gain compared to the option you
> > > > describe below.
> > > >
> > > Agreed, especially if the hosts already handle 0 length packets.
> > > As long as the usb controllers can keep up, the burst approach seems
> > > more reasonable.
> > >
> > > > > With those issues I tried what Laurent called the "burst" approach
> > > > > (attached below), i.e. send the video frames in as few packets as possible,
> > > > > and then queue up 0 length packets to keep the ISOC queue happy. This approach
> > > > > works perfectly as far as I can tell. Locally I tried with a Linux, Window,
> > > > > and MacOS host with no frame drops or ISOC failures on any of them!
> > > > >
> > > > > In the current patch, UVC gadget driver keeps the ISOC cadence by effectively
> > > > > maintaining a back-pressure on the USB controller (at least to the best of its
> > > > > capabilities). Any usb_request available to the UVC gadget gets immediately
> > > > > queued back to the USB controller. If a video frame is available, the frame is
> > > > > encoded, if not, the length is set to 0. The idea being that the host's polling
> > > > > and the controller's 'complete' callback will result in a somewhat consistent
> > > > > cadence for the uvc driver after the initial burst of packets.
> > > > >
> > > > > However this does mean that at worst, the new video frames are up to 63
> > > > > usb_requests behind, but assuming a 125us per usb_request, that amounts to ~8ms
> > > > > latency at the worst, which seems acceptable?
> > > >
> > > > There's a trade off between latency and the risk of underruns. We could
> > > > decrease the number of queued requests to lower the latency, as long as
> > > > we ensure the margin is high enough to avoid underruns in higher load
> > > > conditions. We could also do so only when queuing 0-size requests, and
> > > > queue the data in burst mode with a higher number of requests.
> > >
> > > Would 8ms of latency be considered significant? Unless the host asks
> > > for >125fps,
> > > that amounts to less than a frame of latency, so frames should not be dropped
> > > by the host for being "late". Admittedly, I don't know enough about UVC usage to
> > > say if 8ms (at worst) will be problematic for certain usages. The
> > > hosts don't seem to
> > > have any issues when streaming at <=60fps.
> > >
> >
> > Do you only queue a single burst at a time? We don't have to queue all
> > 63 0-length requests as a single "burst" right? We can queue multiple
> > smaller bursts of 0-length requests. This way, the UVC driver can be
> > interrupted more often, reducing the video data latency if it arrives
> > earlier. This keeps the total number of queued requests smaller while
> > ensuring that the controller isn't starved of requests (as long as this
> > smaller burst accounts for the system latency). The tradeoff here is
> > just the UVC gadget driver needs to handle request completion a little
> > more often. However, we are less likely to be underrun no matter the
> > video's fps.
> 
> The patch does currently queue all 64 0-length packets from the start,
> and then relies on completion callbacks to maintain a steady state. It
> still sets the `no_interrupt` flag, so the completion callback
> interrupts every ~16th request (at worst) which is the same as before.

I see, that's good. I thought there's only 1 out of 64 requests will
have completion interrupt.

> Ideally, the UVC driver never sits on a video frame for longer than 16
> requests, but it does keep the controller queue as full as it can,
> which means the video frame could be stuck behind a bunch of 0-length
> packets and could hypothetically be sent faster if we were to be
> smarter with queuing the 0-length requests. I say hypothetically,
> because I have been unable to capture any latency differences with and
> without the patch.
> 
> I've been trying to think of some way to account for system latency
> and only queue a smaller set of 0 length packets, but most of them
> boil down to "hope we have enough requests queued". It would be
> helpful if we could pin some numbers down that can be relied on by the
> gadget driver. For example: Can 125us per request be safely assumed;
> or is there a way to get information about how long a usb_request will
> last for? It would be very helpful if there was a way to query the
> controller's active queue, but I couldn't really find anything in the
> APIs. We can't really rely on the complete callbacks because of the
> "no_interrupt" flag. I'll try out some things, and get back. If you
> have any ideas, I would love to hear them too!
> 

If "no_interrupt" is set, it just means the gadget driver will not get a
notification of the request completion. It may help to think in term of
the moving current uframe since it's predictable and steadily
progressing. Each queued request is earmarked for a future uframe. Once
the current uframe matches the scheduled uframe of the request, the
request is completed regardless there's a missed isoc or whether the
no_interrupt is set (granted it's not stale).

The system latency introduces delay in the interrupt handler and the
handling of it. In other words, we *know* the minimum time for the
request completion, but we can't guarantee the time the gadget driver
would receive and finish handling the request completion interrupt. This
varies between different setups. An error due to system latency should
not be a frequent occurrence. As long as it's within an acceptable
threshold, it should be fine. I think your 8ms delay is fine, possibly
4ms may be more than enough. I haven't had to deal with more than 3ms
delay in our hardware testings (with multiple endpoints and devices).

I hope this info is new and useful, otherwise I may sound like a broken
record.

Note: we can always enhance UVC again should anyone report new issue
in their testing once your change is upstreamed.

BR,
Thinh
Thinh Nguyen May 9, 2023, 10:42 p.m. UTC | #33
On Tue, May 09, 2023, Thinh Nguyen wrote:
> On Tue, May 09, 2023, Avichal Rakesh wrote:
> > On Mon, May 8, 2023 at 5:21 PM Thinh Nguyen <Thinh.Nguyen@synopsys.com> wrote:
> > >
> > > On Mon, May 08, 2023, Avichal Rakesh wrote:
> > > > On Sat, May 6, 2023 at 5:53 AM Laurent Pinchart
> > > > <laurent.pinchart@ideasonboard.com> wrote:
> > > >
> > > > > > The first thing I tried was to split one video frame over 266 frames, without
> > > > > > changing the number of requests allocated. And it works! However, as Laurent
> > > > > > mentioned, it does add a fair amount of bookkeeping to split a video frame into
> > > > > > the required number of requests. I also hardcoded the number 266 from our
> > > > > > discussion, but I am not sure how to figure out that number dynamically. 266
> > > > > > also didn't work if the host started sending frames at more than 30fps :/, so
> > > > > > our dynamic calculation would need to take camera's real output fps into
> > > > > > account, which as far as I can tell is not known to the UVC driver.
> > > > >
> > > > > It would probably need to monitor how full the request queue is, and
> > > > > adapt the number of bytes it queues in each request accordingly. That's
> > > > > indeed quite a bit of work, for little gain compared to the option you
> > > > > describe below.
> > > > >
> > > > Agreed, especially if the hosts already handle 0 length packets.
> > > > As long as the usb controllers can keep up, the burst approach seems
> > > > more reasonable.
> > > >
> > > > > > With those issues I tried what Laurent called the "burst" approach
> > > > > > (attached below), i.e. send the video frames in as few packets as possible,
> > > > > > and then queue up 0 length packets to keep the ISOC queue happy. This approach
> > > > > > works perfectly as far as I can tell. Locally I tried with a Linux, Window,
> > > > > > and MacOS host with no frame drops or ISOC failures on any of them!
> > > > > >
> > > > > > In the current patch, UVC gadget driver keeps the ISOC cadence by effectively
> > > > > > maintaining a back-pressure on the USB controller (at least to the best of its
> > > > > > capabilities). Any usb_request available to the UVC gadget gets immediately
> > > > > > queued back to the USB controller. If a video frame is available, the frame is
> > > > > > encoded, if not, the length is set to 0. The idea being that the host's polling
> > > > > > and the controller's 'complete' callback will result in a somewhat consistent
> > > > > > cadence for the uvc driver after the initial burst of packets.
> > > > > >
> > > > > > However this does mean that at worst, the new video frames are up to 63
> > > > > > usb_requests behind, but assuming a 125us per usb_request, that amounts to ~8ms
> > > > > > latency at the worst, which seems acceptable?
> > > > >
> > > > > There's a trade off between latency and the risk of underruns. We could
> > > > > decrease the number of queued requests to lower the latency, as long as
> > > > > we ensure the margin is high enough to avoid underruns in higher load
> > > > > conditions. We could also do so only when queuing 0-size requests, and
> > > > > queue the data in burst mode with a higher number of requests.
> > > >
> > > > Would 8ms of latency be considered significant? Unless the host asks
> > > > for >125fps,
> > > > that amounts to less than a frame of latency, so frames should not be dropped
> > > > by the host for being "late". Admittedly, I don't know enough about UVC usage to
> > > > say if 8ms (at worst) will be problematic for certain usages. The
> > > > hosts don't seem to
> > > > have any issues when streaming at <=60fps.
> > > >
> > >
> > > Do you only queue a single burst at a time? We don't have to queue all
> > > 63 0-length requests as a single "burst" right? We can queue multiple
> > > smaller bursts of 0-length requests. This way, the UVC driver can be
> > > interrupted more often, reducing the video data latency if it arrives
> > > earlier. This keeps the total number of queued requests smaller while
> > > ensuring that the controller isn't starved of requests (as long as this
> > > smaller burst accounts for the system latency). The tradeoff here is
> > > just the UVC gadget driver needs to handle request completion a little
> > > more often. However, we are less likely to be underrun no matter the
> > > video's fps.
> > 
> > The patch does currently queue all 64 0-length packets from the start,
> > and then relies on completion callbacks to maintain a steady state. It
> > still sets the `no_interrupt` flag, so the completion callback
> > interrupts every ~16th request (at worst) which is the same as before.
> 
> I see, that's good. I thought there's only 1 out of 64 requests will
> have completion interrupt.
> 
> > Ideally, the UVC driver never sits on a video frame for longer than 16
> > requests, but it does keep the controller queue as full as it can,
> > which means the video frame could be stuck behind a bunch of 0-length
> > packets and could hypothetically be sent faster if we were to be
> > smarter with queuing the 0-length requests. I say hypothetically,
> > because I have been unable to capture any latency differences with and
> > without the patch.
> > 
> > I've been trying to think of some way to account for system latency
> > and only queue a smaller set of 0 length packets, but most of them
> > boil down to "hope we have enough requests queued". It would be
> > helpful if we could pin some numbers down that can be relied on by the
> > gadget driver. For example: Can 125us per request be safely assumed;
> > or is there a way to get information about how long a usb_request will
> > last for? It would be very helpful if there was a way to query the
> > controller's active queue, but I couldn't really find anything in the
> > APIs. We can't really rely on the complete callbacks because of the
> > "no_interrupt" flag. I'll try out some things, and get back. If you
> > have any ideas, I would love to hear them too!
> > 
> 
> If "no_interrupt" is set, it just means the gadget driver will not get a
> notification of the request completion. It may help to think in term of
> the moving current uframe since it's predictable and steadily
> progressing. Each queued request is earmarked for a future uframe. Once
> the current uframe matches the scheduled uframe of the request, the
> request is completed regardless there's a missed isoc or whether the
> no_interrupt is set (granted it's not stale).

Just want to clarify that complete here is from the perspective of the
controller. The controller driver doesn't automatically update the
request and give back the request to the gadget driver yet.

BR,
Thinh

> 
> The system latency introduces delay in the interrupt handler and the
> handling of it. In other words, we *know* the minimum time for the
> request completion, but we can't guarantee the time the gadget driver
> would receive and finish handling the request completion interrupt. This
> varies between different setups. An error due to system latency should
> not be a frequent occurrence. As long as it's within an acceptable
> threshold, it should be fine. I think your 8ms delay is fine, possibly
> 4ms may be more than enough. I haven't had to deal with more than 3ms
> delay in our hardware testings (with multiple endpoints and devices).
> 
> I hope this info is new and useful, otherwise I may sound like a broken
> record.
> 
> Note: we can always enhance UVC again should anyone report new issue
> in their testing once your change is upstreamed.
> 
> BR,
> Thinh
Avichal Rakesh May 16, 2023, 12:34 a.m. UTC | #34
On Mon, May 15, 2023 at 5:29 PM Avichal Rakesh <arakesh@google.com> wrote:

>
> TL;DR: Naively queueing up 0-length requests as fast as possible has the
> consequence of delaying the entire stream by ~8ms, but does not seem to be
> delaying frames individually any more than before. Changing the number of
> 0-length requests has very little consequence on the stream, but increases
> the chances of missed ISOCs.
>

Dan and Laurent, could you review
https://lore.kernel.org/20230508231103.1621375-1-arakesh@google.com/ when
you get the chance?

Thank you!
Avi
Thinh Nguyen May 17, 2023, 11:36 p.m. UTC | #35
Redirect "To:" Dan and Laurent.

On Mon, May 15, 2023, Avichal Rakesh wrote:
> On Mon, May 15, 2023 at 5:29 PM Avichal Rakesh <arakesh@google.com> wrote:
> 
> >
> > TL;DR: Naively queueing up 0-length requests as fast as possible has the
> > consequence of delaying the entire stream by ~8ms, but does not seem to be
> > delaying frames individually any more than before. Changing the number of
> > 0-length requests has very little consequence on the stream, but increases
> > the chances of missed ISOCs.
> >
> 
> Dan and Laurent, could you review
> https://urldefense.com/v3/__https://lore.kernel.org/20230508231103.1621375-1-arakesh@google.com/__;!!A4F2R9G_pg!aHKkn_0z8V8kQMsStnCYcrRFoSXGqvGToeGxe7f-xOyZXzDWLOPTi1CfAVHO8PvnGch1LIW5IUNyfwEsYIN5$  when
> you get the chance?
> 
> Thank you!
> Avi
Daniel Scally June 1, 2023, 9:59 a.m. UTC | #36
Hello all

On 18/05/2023 00:36, Thinh Nguyen wrote:
> Redirect "To:" Dan and Laurent.
>
> On Mon, May 15, 2023, Avichal Rakesh wrote:
>> On Mon, May 15, 2023 at 5:29 PM Avichal Rakesh <arakesh@google.com> wrote:
>>
>>> TL;DR: Naively queueing up 0-length requests as fast as possible has the
>>> consequence of delaying the entire stream by ~8ms, but does not seem to be
>>> delaying frames individually any more than before. Changing the number of
>>> 0-length requests has very little consequence on the stream, but increases
>>> the chances of missed ISOCs.
>>>
>> Dan and Laurent, could you review
>> https://urldefense.com/v3/__https://lore.kernel.org/20230508231103.1621375-1-arakesh@google.com/__;!!A4F2R9G_pg!aHKkn_0z8V8kQMsStnCYcrRFoSXGqvGToeGxe7f-xOyZXzDWLOPTi1CfAVHO8PvnGch1LIW5IUNyfwEsYIN5$  when
>> you get the chance?


I'm sorry - I've not been involved in this thread at all yet which given the length of time it's 
been going on is extremely remiss. I'll take a look now, and also test it out on the dwc2 platforms 
I have.


Thanks
Dan

>>
>> Thank you!
>> Avi
Daniel Scally June 1, 2023, 2:54 p.m. UTC | #37
Hello Avi

On 16/05/2023 01:29, Avichal Rakesh wrote:
>
> On 5/9/23 15:42, Thinh Nguyen wrote:
>> On Tue, May 09, 2023, Thinh Nguyen wrote:
>> Just want to clarify that complete here is from the perspective of the
>> controller. The controller driver doesn't automatically update the
>> request and give back the request to the gadget driver yet.
>>
>> BR,
>> Thinh
>>
> Hey all,
>
> Sorry in advance for the long message. I have been running more tests to
> quantify if (and how) performance is affected with my patch. I wanted to
> quantify what the 8ms latency looks like, so I modified the gadget driver to
> track some stats. Specifically, I tracked the round trip time of
> usb_requests and the time between two consecutive frames being queued.
>
> TL;DR: Naively queueing up 0-length requests as fast as possible has the
> consequence of delaying the entire stream by ~8ms, but does not seem to be
> delaying frames individually any more than before. Changing the number of
> 0-length requests has very little consequence on the stream, but increases
> the chances of missed ISOCs.
>
> For usb_requests' RTTs, I tracked the time it takes between 'usb_ep_queue' and
> the controller's 'complete' callback to return the usb_request. This is
> affected by the 'no_interrupt' flag as the controller will call 'complete'
> callback in batches.
>
> Frame time is defined as the time elapsed between completely queueing two
> consecutive video frames. This is meant to give an idea of how long the
> frame sat around waiting for usb_requests to come back.
>
> These were collected in 4 settings:
>    1. Unpatched: As the name implies, ToT!
>    2. Burst: (Name borrowed from Laurent) Queue requests ASAP. This is the
>          implementation in
>          https://lore.kernel.org/20230508231103.1621375-1-arakesh@google.com/.
>    3. Burst w/ more interrupts: Same as (2) but instead of interrupting every
>          16th 0-length request, interrupt every 8th request. The idea was to
>          increase the gadget's driver responsiveness at the cost of extra
>          interrupts.
>    4. Burst w/ limited 0-length packets: Same as Burst but only queue up at most
>          32 0-length requests. The idea was to reduce the number of empty
>          usb_requests that a video frame could get stuck behind.
>
> Some other things to note before the logs:
>    - All times are reported in ns
>    - Samples were randomly collected during an ongoing 1080p @ 30fps stream
>    - The number of allocated usb_requests was fixed at 64 for all tests.
>
> Okay, so here are the numbers:
>
> 1. Unpatched:
> ```
> usb_request rtt : min/max/mean: 3860718 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3857951 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3854004 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3847575 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3841594 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3834066 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3426879 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3414632 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3408203 : 220744/5710327/3372587
> usb_request rtt : min/max/mean: 3401489 : 220744/5710327/3372587
> ```
>
> ```
> frame time: curr: min/max/mean: 33385376 : 15103638/60926513/33474315
> frame time: curr: min/max/mean: 31438558 : 15103638/60926513/33472507
> frame time: curr: min/max/mean: 31650391 : 15103638/60926513/33470890
> frame time: curr: min/max/mean: 36657959 : 15103638/60926513/33473715
> frame time: curr: min/max/mean: 31758260 : 15103638/60926513/33472195
> frame time: curr: min/max/mean: 34915934 : 15103638/60926513/33473472
> frame time: curr: min/max/mean: 34599447 : 15103638/60926513/33474467
> frame time: curr: min/max/mean: 33658162 : 15103638/60926513/33474629
> frame time: curr: min/max/mean: 44577637 : 15103638/60926513/33484428
> ```
>
> With no changes, the usb_requests spend ~3ms in flight, and there
> is one video frame dispatched every ~33ms. Unfortunately (or perhaps
> fortunately?), it seems like the system latencies add to >8ms of per frame
> jitter when encoding frames.


I'm really really impressed with the thoroughness of your investigation here. Can you share how you 
tracked the frame times and RTTs? I have some devices with a DWC2 controller and I think a MUSB too 
- I'd be interested in testing those too to see how they compared.


Thanks

Dan

>
>
> 2. Burst:
> ```
> usb_request rtt curr : min/max/mean: 7966878 : 1663778/9449341/7878007
> usb_request rtt curr : min/max/mean: 7964966 : 1663778/9449341/7878008
> usb_request rtt curr : min/max/mean: 7963582 : 1663778/9449341/7878009
> usb_request rtt curr : min/max/mean: 7920817 : 1663778/9449341/7878009
> usb_request rtt curr : min/max/mean: 7923503 : 1663778/9449341/7878009
> usb_request rtt curr : min/max/mean: 7916545 : 1663778/9449341/7878009
> usb_request rtt curr : min/max/mean: 7914021 : 1663778/9449341/7878009
> usb_request rtt curr : min/max/mean: 7914348 : 1663778/9449341/7878009
> usb_request rtt curr : min/max/mean: 7914469 : 1663778/9449341/7878009
> ```
>
> ```
> frame time: curr: min/max/mean: 31675130 : 14732381/121984294/33563117
> frame time: curr: min/max/mean: 33192668 : 14732381/121984294/33563095
> frame time: curr: min/max/mean: 35761963 : 14732381/121984294/33563223
> frame time: curr: min/max/mean: 30776733 : 14732381/121984294/33563059
> frame time: curr: min/max/mean: 31940471 : 14732381/121984294/33562964
> frame time: curr: min/max/mean: 39079834 : 14732381/121984294/33563286
> frame time: curr: min/max/mean: 30279500 : 14732381/121984294/33563093
> frame time: curr: min/max/mean: 38558472 : 14732381/121984294/33563385
> frame time: curr: min/max/mean: 34294149 : 14732381/121984294/33563427
> frame time: curr: min/max/mean: 26993286 : 14732381/121984294/33563042
> ```
>
> As discussed in the thread above, we do in fact see usb_requests taking an
> average of 8ms to be returned to the gadget. This means that the driver is
> generally keeping up the back pressure to the controller, which is consistent
> with no missed ISOCs.
>
> Slightly more interesting is that the frames are still being dispatched at the
> same frequency as (1), albeit a fraction slower than (1). The jitters for
> individual frames are difficult to show here, but they seemed about the same as
> (1). IIUC, this means that the 8ms delay gets applied to the entire the entire
> stream once a steady state is reached. However, with system latencies already
> showing jitter >8ms, it is possible that a slight regression here is being
> covered up.
>
>
> 3. Burst w/ more interrupts
> ```
> usb_request rtt curr : min/max/mean: 7982137 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7998698 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7998088 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7933024 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7948364 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7945476 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7968872 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7982219 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7983155 : 906942/11119832/7712539
> usb_request rtt curr : min/max/mean: 7649048 : 906942/11119832/7712538
> ```
>
> ```
> frame time: curr: min/max/mean: 40471069 : 21488363/46635051/33474386
> frame time: curr: min/max/mean: 29503703 : 21488363/46635051/33473695
> frame time: curr: min/max/mean: 27489502 : 21488363/46635051/33472654
> frame time: curr: min/max/mean: 36647908 : 21488363/46635051/33473205
> frame time: curr: min/max/mean: 34343506 : 21488363/46635051/33473356
> frame time: curr: min/max/mean: 31686198 : 21488363/46635051/33473045
> frame time: curr: min/max/mean: 34582601 : 21488363/46635051/33473237
> frame time: curr: min/max/mean: 32652425 : 21488363/46635051/33473094
> frame time: curr: min/max/mean: 31930054 : 21488363/46635051/33472826
> frame time: curr: min/max/mean: 34652669 : 21488363/46635051/33473030
> ```
>
> There were no meaningful differences in usb_request RTTs from (2), which is
> expected, as the driver can still comfortably maintain the back pressure.
>
> However, we do see a very slight increase in responsiveness as the frames are
> queued at approximately the same frequency as (1). However, the per frame
> jitter is consistent with (1) and (2), so this could just be a sampling issue.
>
>
> 4. Burst w/ limited 0-length packets:
> ```
> usb_request rtt curr : min/max/mean: 6591879 : 644246/7986613/4202894
> usb_request rtt curr : min/max/mean: 6585978 : 644246/7986613/4202929
> usb_request rtt curr : min/max/mean: 6582805 : 644246/7986613/4202964
> usb_request rtt curr : min/max/mean: 3980957 : 644246/7986613/4202960
> usb_request rtt curr : min/max/mean: 4000162 : 644246/7986613/4202956
> usb_request rtt curr : min/max/mean: 3995484 : 644246/7986613/4202952
> usb_request rtt curr : min/max/mean: 3989380 : 644246/7986613/4202948
> usb_request rtt curr : min/max/mean: 3998942 : 644246/7986613/4202944
> usb_request rtt curr : min/max/mean: 3998779 : 644246/7986613/4202940
> ```
>
> ```
> frame time: curr: min/max/mean: 35985392 : 21889445/42860636/33474731
> frame time: curr: min/max/mean: 34428670 : 21889445/42860636/33476020
> frame time: curr: min/max/mean: 35499146 : 21889445/42860636/33478750
> frame time: curr: min/max/mean: 40315307 : 21889445/42860636/33487963
> frame time: curr: min/max/mean: 24934611 : 21889445/42860636/33476451
> frame time: curr: min/max/mean: 32321005 : 21889445/42860636/33474897
> frame time: curr: min/max/mean: 33268799 : 21889445/42860636/33474620
> frame time: curr: min/max/mean: 37001586 : 21889445/42860636/33479347
> frame time: curr: min/max/mean: 30968384 : 21889445/42860636/33475985
> ```
>
> This showed the most jitter in usb_request RTTs, likely because controller was
> frequently underrun or on the verge of underrunning. So while the requests were
> delayed by an average of 4ms, there was significant deviation per packet.
>
> The video frames were still encoded at a stable frequency, but there were still
> flickers from when ISOC requests would eventually underrun.
>
> In conclusion, it seems like the naive approach (2) has similar
> characteristics to the current version (1), albeit with an 8ms stream latency,
> and optimizing by means of changing interrupt frequency or number of requests
> result is minor, if any, performance increases.
>
> - Avi.
Avichal Rakesh June 1, 2023, 9:01 p.m. UTC | #38
On 6/1/23 07:54, Dan Scally wrote:

>> With no changes, the usb_requests spend ~3ms in flight, and there
>> is one video frame dispatched every ~33ms. Unfortunately (or perhaps
>> fortunately?), it seems like the system latencies add to >8ms of per frame
>> jitter when encoding frames.
> 
> 
> I'm really really impressed with the thoroughness of your investigation here. Can you share how you tracked the frame times and RTTs? I have some devices with a DWC2 controller and I think a MUSB too - I'd be interested in testing those too to see how they compared.
> 

Hey Dan, I appreciate the kind words! Attaching a dirty patch that I used.

Some things to note: 
- It is terribly hacky, apologies in advance (didn't think anyone would
  want to look at it).
- The patch will likely not apply cleanly to ToT, because I am working off an
  older kernel which also had some of my own debug changes before this
  patch. Hopefully it is still useful!
- It uses the time honored technique of dumping debug logs as error logs so 
  be prepared for a lot of log spam :P
- The metrics mess up if the stream configuration changes mid-stream. I just
  restarted the gadget between tests.
- The stats at stream start and end are all over the place, but should stabilize 
  in a hundred frames or so.

Patch follows below
---
 drivers/usb/gadget/function/uvc.h       | 13 +++++
 drivers/usb/gadget/function/uvc_video.c | 67 ++++++++++++++++++++++---
 2 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index 40226b1f7e148..2b877ab270536 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -15,6 +15,7 @@
 #include <linux/usb/composite.h>
 #include <linux/videodev2.h>
 #include <linux/wait.h>
+#include <linux/ktime.h>
 
 #include <media/v4l2-device.h>
 #include <media/v4l2-dev.h>
@@ -81,6 +82,7 @@ struct uvc_request {
 	struct sg_table sgt;
 	u8 header[UVCG_REQUEST_HEADER_LEN];
 	struct uvc_buffer *last_buf;
+	ktime_t ts;
 };
 
 struct uvc_video {
@@ -104,6 +106,11 @@ struct uvc_video {
 	unsigned int req_size;
 	struct uvc_request *ureq;
 	struct list_head req_free;
+	ktime_t max_rtt;
+	ktime_t min_rtt;
+	ktime_t mean_rtt;
+	unsigned int num_reqs;
+	unsigned int inflight_reqs;
 	spinlock_t req_lock;
 
 	unsigned int req_int_count;
@@ -117,6 +124,12 @@ struct uvc_video {
 
 	struct uvc_video_queue queue;
 	unsigned int fid;
+
+	ktime_t frame_ts;
+	ktime_t frame_ts_min;
+	ktime_t frame_ts_max;
+	ktime_t frame_ts_mean;
+	unsigned int num_frames;
 };
 
 enum uvc_state {
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 465dd07bf66e7..4aaa8077145d2 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -230,7 +230,8 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
 static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
 {
 	int ret;
-
+	struct uvc_request *ureq = req->context;
+	ureq->ts = ktime_get();
 	ret = usb_ep_queue(video->ep, req, GFP_ATOMIC);
 	if (ret < 0) {
 		uvcg_err(&video->uvc->func, "Failed to queue request (%d).\n",
@@ -255,6 +256,7 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	struct uvc_video_queue *queue = &video->queue;
 	struct uvc_device *uvc = video->uvc;
 	unsigned long flags;
+	ktime_t rtt;
 
 	switch (req->status) {
 	case 0:
@@ -282,6 +284,22 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 		ureq->last_buf = NULL;
 	}
 
+	spin_lock_irqsave(&queue->irqlock, flags);
+	if (!req->length) {
+		video->inflight_reqs--;
+	}
+	rtt = ktime_get();
+	rtt = ktime_sub(rtt, ureq->ts);
+	video->max_rtt = max(video->max_rtt, rtt);
+	video->min_rtt = min(video->min_rtt, rtt);
+	video->num_reqs++;
+	video->mean_rtt = ((video->mean_rtt * (video->num_reqs - 1)) + rtt) / video->num_reqs;
+	printk(KERN_ERR "%s: ARLog: rtt : min/max/mean: "
+			"%lld : %lld/%lld/%lld goLRA", __FUNCTION__, rtt,
+			video->min_rtt, video->max_rtt, video->mean_rtt);
+	// printk(KERN_ERR "%s: ARLog: video->inflight_reqs = %u", __FUNCTION__, video->inflight_reqs);
+	spin_unlock_irqrestore(&queue->irqlock, flags);
+
 	spin_lock_irqsave(&video->req_lock, flags);
 	list_add_tail(&req->list, &video->req_free);
 	spin_unlock_irqrestore(&video->req_lock, flags);
@@ -315,6 +333,10 @@ uvc_video_free_requests(struct uvc_video *video)
 	}
 
 	INIT_LIST_HEAD(&video->req_free);
+	video->max_rtt = 0;
+	video->min_rtt = 2147483647;
+	video->num_reqs = 0;
+	video->inflight_reqs = 0;
 	video->req_size = 0;
 	return 0;
 }
@@ -360,6 +382,10 @@ uvc_video_alloc_requests(struct uvc_video *video)
 	}
 
 	video->req_size = req_size;
+	video->max_rtt = 0;
+	video->min_rtt = 2147483647;
+	video->inflight_reqs = 0;
+	video->num_reqs = 0;
 
 	return 0;
 
@@ -389,6 +415,11 @@ static void uvcg_video_pump(struct work_struct *work)
 	bool buf_int;
 	/* video->max_payload_size is only set when using bulk transfer */
 	bool is_bulk = video->max_payload_size;
+	unsigned int tmp = 0;
+	unsigned int *req_ctr;
+	// unsigned int target_empty_reqs = video->uvc_num_requests + 2;
+	unsigned int num_sampled_frames;
+	ktime_t frame_ts;
 
 	while (video->ep->enabled) {
 		/*
@@ -416,14 +447,31 @@ static void uvcg_video_pump(struct work_struct *work)
 			video->encode(req, video, buf);
 			/* Always interrupt for the last request of a video buffer */
 			buf_int = buf->state == UVC_BUF_STATE_DONE;
+
+			if (buf->state == UVC_BUF_STATE_DONE) {
+				frame_ts = video->frame_ts;
+				video->frame_ts = ktime_get();
+				frame_ts = ktime_sub(video->frame_ts, frame_ts);
+				if (video->num_frames >= 100) {
+					num_sampled_frames = video->num_frames - 100;
+					video->frame_ts_min = min(video->frame_ts_min, frame_ts);
+					video->frame_ts_max = max(video->frame_ts_max, frame_ts);
+					video->frame_ts_mean = ((video->frame_ts_mean * num_sampled_frames) + frame_ts) / (num_sampled_frames + 1);
+					printk(KERN_ERR "%s: ARLog: frame encode time: curr: min/max/mean: %lld : %lld/%lld/%lld", __FUNCTION__, frame_ts, video->frame_ts_min, video->frame_ts_max, video->frame_ts_mean);
+				}
+				video->num_frames++;
+			}
+			req_ctr = &tmp;
 		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
 			/*
-			 * No video buffer available; the queue is still connected and
-			 * we're traferring over ISOC. Send a 0 length request to
-			 * prevent missed ISOC transfers.
-			 */
+			* No video buffer available; the queue is still connected and
+			* we're traferring over ISOC. Send a 0 length request to
+			* prevent missed ISOC transfers.
+			*/
 			req->length = 0;
-			buf_int = false;
+			// Double the interrupt frequency for empty requests.
+			buf_int = !(video->req_int_count % DIV_ROUND_UP(video->uvc_num_requests, 8));
+			req_ctr = &video->inflight_reqs;
 		} else {
 			/*
 			 * Either queue has been disconnected or no video buffer
@@ -449,7 +497,11 @@ static void uvcg_video_pump(struct work_struct *work)
 		}
 
 		/* Queue the USB request */
+
 		ret = uvcg_video_ep_queue(video, req);
+		if (ret == 0) {
+			(*req_ctr)++;
+		}
 		spin_unlock_irqrestore(&queue->irqlock, flags);
 
 		if (ret < 0) {
@@ -512,6 +564,9 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
 			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
 
 	video->req_int_count = 0;
+	video->frame_ts_min = 9720988367;
+	video->frame_ts_max = 0;
+	video->frame_ts_mean = 0;
 
 	queue_work(video->async_wq, &video->pump);
 
--
diff mbox series

Patch

diff --git a/drivers/usb/gadget/function/uvc.h
b/drivers/usb/gadget/function/uvc.h
index 100475b1363e..d2c837011546 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -80,7 +80,8 @@  struct uvc_request {
struct uvc_video *video;
struct sg_table sgt;
u8 header[UVCG_REQUEST_HEADER_LEN];
- struct uvc_buffer *last_buf;
+ struct uvc_buffer *uvc_buf;
+ bool is_last;
};

struct uvc_video {
@@ -103,8 +104,9 @@  struct uvc_video {
/* Requests */
unsigned int req_size;
struct uvc_request *ureq;
- struct list_head req_free;
+ struct list_head req_free; /* protected by req_lock */
spinlock_t req_lock;
+ struct list_head req_inflight; /* protected by queue->irqlock */

unsigned int req_int_count;

diff --git a/drivers/usb/gadget/function/uvc_queue.c
b/drivers/usb/gadget/function/uvc_queue.c
index 0aa3d7e1f3cc..32fab1e5b32d 100644
--- a/drivers/usb/gadget/function/uvc_queue.c
+++ b/drivers/usb/gadget/function/uvc_queue.c
@@ -250,10 +250,22 @@  unsigned long
uvcg_queue_get_unmapped_area(struct uvc_video_queue *queue,
*/
void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect)
{
- struct uvc_buffer *buf;
unsigned long flags;

spin_lock_irqsave(&queue->irqlock, flags);
+ uvcg_queue_cancel_locked(queue, disconnect);
+ spin_unlock_irqrestore(&queue->irqlock, flags);
+}
+
+/*
+ * see uvcg_queue_cancel()
+ *
+ * Must be called with &queue_irqlock held.
+ */
+void uvcg_queue_cancel_locked(struct uvc_video_queue *queue, int disconnect)
+{
+ struct uvc_buffer *buf;
+
while (!list_empty(&queue->irqqueue)) {
buf = list_first_entry(&queue->irqqueue, struct uvc_buffer,
queue);
@@ -272,7 +284,6 @@  void uvcg_queue_cancel(struct uvc_video_queue
*queue, int disconnect)
*/
if (disconnect)
queue->flags |= UVC_QUEUE_DISCONNECTED;
- spin_unlock_irqrestore(&queue->irqlock, flags);
}

/*
@@ -356,4 +367,3 @@  struct uvc_buffer *uvcg_queue_head(struct
uvc_video_queue *queue)

return buf;
}
-
diff --git a/drivers/usb/gadget/function/uvc_queue.h
b/drivers/usb/gadget/function/uvc_queue.h
index 41f87b917f6b..622c0f146847 100644
--- a/drivers/usb/gadget/function/uvc_queue.h
+++ b/drivers/usb/gadget/function/uvc_queue.h
@@ -89,6 +89,7 @@  unsigned long uvcg_queue_get_unmapped_area(struct
uvc_video_queue *queue,
#endif /* CONFIG_MMU */

void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect);
+void uvcg_queue_cancel_locked(struct uvc_video_queue *queue, int disconnect);

int uvcg_queue_enable(struct uvc_video_queue *queue, int enable);

@@ -98,4 +99,3 @@  void uvcg_complete_buffer(struct uvc_video_queue *queue,
struct uvc_buffer *uvcg_queue_head(struct uvc_video_queue *queue);

#endif /* _UVC_QUEUE_H_ */
-
diff --git a/drivers/usb/gadget/function/uvc_video.c
b/drivers/usb/gadget/function/uvc_video.c
index dd1c6b2ca7c6..6c2698eca242 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -109,19 +109,18 @@  uvc_video_encode_bulk(struct usb_request *req,
struct uvc_video *video,

req->length = video->req_size - len;
req->zero = video->payload_size == video->max_payload_size;
+ ureq->uvc_buf = buf;

if (buf->bytesused == video->queue.buf_used) {
video->queue.buf_used = 0;
buf->state = UVC_BUF_STATE_DONE;
list_del(&buf->queue);
video->fid ^= UVC_STREAM_FID;
- ureq->last_buf = buf;
-
+ ureq->is_last = true;
video->payload_size = 0;
}

if (video->payload_size == video->max_payload_size ||
- video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE ||
buf->bytesused == video->queue.buf_used)
video->payload_size = 0;