Message ID | 1616008439-15494-1-git-send-email-sparmar@cadence.com |
---|---|
State | Superseded |
Headers | show |
Series | [v2] usb: cdns3: Optimize DMA request buffer allocation | expand |
On 21-03-18 07:32:45, Christoph Hellwig wrote: > On Wed, Mar 17, 2021 at 08:13:59PM +0100, Sanket Parmar wrote: > > dma_alloc_coherent() might fail on the platform with a small > > DMA region. > > > > To avoid such failure in cdns3_prepare_aligned_request_buf(), > > dma_alloc_coherent() is replaced with dma_alloc_noncoherent() > > to allocate aligned request buffer of dynamic length. > > > > Reported-by: Aswath Govindraju <a-govindraju@ti.com> > > Signed-off-by: Sanket Parmar <sparmar@cadence.com> > > Looks good to me: > > Reviewed-by: Christoph Hellwig <hch@lst.de> Hi Christoph, I would like to confirm the dma_alloc_noncoherent allocates the memory less than PAGE_SIZE if buffer size it would like to allocate is small (eg, 64 bytes)? -- Thanks, Peter Chen
On 21-03-17 20:13:59, Sanket Parmar wrote: > dma_alloc_coherent() might fail on the platform with a small > DMA region. > > To avoid such failure in cdns3_prepare_aligned_request_buf(), > dma_alloc_coherent() is replaced with dma_alloc_noncoherent() > to allocate aligned request buffer of dynamic length. > > Reported-by: Aswath Govindraju <a-govindraju@ti.com> > Signed-off-by: Sanket Parmar <sparmar@cadence.com> > --- > > Changelog: > v2: > - used dma_*_noncoherent() APIs > - changed the commit log > > drivers/usb/cdns3/cdns3-gadget.c | 30 ++++++++++++++++++++++++------ > drivers/usb/cdns3/cdns3-gadget.h | 2 ++ > 2 files changed, 26 insertions(+), 6 deletions(-) > > diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c > index 0b892a2..126087b 100644 > --- a/drivers/usb/cdns3/cdns3-gadget.c > +++ b/drivers/usb/cdns3/cdns3-gadget.c > @@ -819,9 +819,15 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, > priv_ep->dir); > > if ((priv_req->flags & REQUEST_UNALIGNED) && > - priv_ep->dir == USB_DIR_OUT && !request->status) > + priv_ep->dir == USB_DIR_OUT && !request->status) { > + /* Make DMA buffer CPU accessible */ > + dma_sync_single_for_cpu(priv_dev->sysdev, > + priv_req->aligned_buf->dma, > + priv_req->aligned_buf->size, > + priv_req->aligned_buf->dir); > memcpy(request->buf, priv_req->aligned_buf->buf, > request->length); > + } > > priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED); > /* All TRBs have finished, clear the counter */ > @@ -883,8 +889,8 @@ static void cdns3_free_aligned_request_buf(struct work_struct *work) > * interrupts. > */ > spin_unlock_irqrestore(&priv_dev->lock, flags); > - dma_free_coherent(priv_dev->sysdev, buf->size, > - buf->buf, buf->dma); > + dma_free_noncoherent(priv_dev->sysdev, buf->size, > + buf->buf, buf->dma, buf->dir); > kfree(buf); > spin_lock_irqsave(&priv_dev->lock, flags); > } > @@ -911,10 +917,13 @@ static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req) > return -ENOMEM; > > buf->size = priv_req->request.length; > + buf->dir = usb_endpoint_dir_in(priv_ep->endpoint.desc) ? > + DMA_TO_DEVICE : DMA_FROM_DEVICE; > > - buf->buf = dma_alloc_coherent(priv_dev->sysdev, > + buf->buf = dma_alloc_noncoherent(priv_dev->sysdev, > buf->size, > &buf->dma, > + buf->dir, > GFP_ATOMIC); > if (!buf->buf) { > kfree(buf); > @@ -936,10 +945,18 @@ static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req) > } > > if (priv_ep->dir == USB_DIR_IN) { > + /* Make DMA buffer CPU accessible */ > + dma_sync_single_for_cpu(priv_dev->sysdev, > + buf->dma, buf->size, buf->dir); > memcpy(buf->buf, priv_req->request.buf, > priv_req->request.length); > } > > + /* Transfer DMA buffer ownership back to device */ > + dma_sync_single_for_device(priv_dev->sysdev, > + buf->dma, buf->size, buf->dir); > + > + One more blank line. Otherwise, it seems OK for me. > priv_req->flags |= REQUEST_UNALIGNED; > trace_cdns3_prepare_aligned_request(priv_req); > > @@ -3088,9 +3105,10 @@ static void cdns3_gadget_exit(struct cdns *cdns) > struct cdns3_aligned_buf *buf; > > buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list); > - dma_free_coherent(priv_dev->sysdev, buf->size, > + dma_free_noncoherent(priv_dev->sysdev, buf->size, > buf->buf, > - buf->dma); > + buf->dma, > + buf->dir); > > list_del(&buf->list); > kfree(buf); > diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-gadget.h > index ecf9b91..c5660f2 100644 > --- a/drivers/usb/cdns3/cdns3-gadget.h > +++ b/drivers/usb/cdns3/cdns3-gadget.h > @@ -12,6 +12,7 @@ > #ifndef __LINUX_CDNS3_GADGET > #define __LINUX_CDNS3_GADGET > #include <linux/usb/gadget.h> > +#include <linux/dma-direction.h> > > /* > * USBSS-DEV register interface. > @@ -1205,6 +1206,7 @@ struct cdns3_aligned_buf { > void *buf; > dma_addr_t dma; > u32 size; > + enum dma_data_direction dir; > unsigned in_use:1; > struct list_head list; > }; > -- > 2.4.5 > -- Thanks, Peter Chen
Hi Peter, > On 21-03-17 20:13:59, Sanket Parmar wrote: > > dma_alloc_coherent() might fail on the platform with a small > > DMA region. > > > > To avoid such failure in cdns3_prepare_aligned_request_buf(), > > dma_alloc_coherent() is replaced with dma_alloc_noncoherent() > > to allocate aligned request buffer of dynamic length. > > > > Reported-by: Aswath Govindraju <a-govindraju@ti.com> > > Signed-off-by: Sanket Parmar <sparmar@cadence.com> > > --- > > > > Changelog: > > v2: > > - used dma_*_noncoherent() APIs > > - changed the commit log > > > > drivers/usb/cdns3/cdns3-gadget.c | 30 ++++++++++++++++++++++++----- > - > > drivers/usb/cdns3/cdns3-gadget.h | 2 ++ > > 2 files changed, 26 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3- > gadget.c > > index 0b892a2..126087b 100644 > > --- a/drivers/usb/cdns3/cdns3-gadget.c > > +++ b/drivers/usb/cdns3/cdns3-gadget.c > > @@ -819,9 +819,15 @@ void cdns3_gadget_giveback(struct > cdns3_endpoint *priv_ep, > > priv_ep->dir); > > > > if ((priv_req->flags & REQUEST_UNALIGNED) && > > - priv_ep->dir == USB_DIR_OUT && !request->status) > > + priv_ep->dir == USB_DIR_OUT && !request->status) { > > + /* Make DMA buffer CPU accessible */ > > + dma_sync_single_for_cpu(priv_dev->sysdev, > > + priv_req->aligned_buf->dma, > > + priv_req->aligned_buf->size, > > + priv_req->aligned_buf->dir); > > memcpy(request->buf, priv_req->aligned_buf->buf, > > request->length); > > + } > > > > priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED); > > /* All TRBs have finished, clear the counter */ > > @@ -883,8 +889,8 @@ static void cdns3_free_aligned_request_buf(struct > work_struct *work) > > * interrupts. > > */ > > spin_unlock_irqrestore(&priv_dev->lock, flags); > > - dma_free_coherent(priv_dev->sysdev, buf->size, > > - buf->buf, buf->dma); > > + dma_free_noncoherent(priv_dev->sysdev, buf- > >size, > > + buf->buf, buf->dma, buf->dir); > > kfree(buf); > > spin_lock_irqsave(&priv_dev->lock, flags); > > } > > @@ -911,10 +917,13 @@ static int > cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req) > > return -ENOMEM; > > > > buf->size = priv_req->request.length; > > + buf->dir = usb_endpoint_dir_in(priv_ep->endpoint.desc) ? > > + DMA_TO_DEVICE : DMA_FROM_DEVICE; > > > > - buf->buf = dma_alloc_coherent(priv_dev->sysdev, > > + buf->buf = dma_alloc_noncoherent(priv_dev->sysdev, > > buf->size, > > &buf->dma, > > + buf->dir, > > GFP_ATOMIC); > > if (!buf->buf) { > > kfree(buf); > > @@ -936,10 +945,18 @@ static int > cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req) > > } > > > > if (priv_ep->dir == USB_DIR_IN) { > > + /* Make DMA buffer CPU accessible */ > > + dma_sync_single_for_cpu(priv_dev->sysdev, > > + buf->dma, buf->size, buf->dir); > > memcpy(buf->buf, priv_req->request.buf, > > priv_req->request.length); > > } > > > > + /* Transfer DMA buffer ownership back to device */ > > + dma_sync_single_for_device(priv_dev->sysdev, > > + buf->dma, buf->size, buf->dir); > > + > > + > > One more blank line. > > Otherwise, it seems OK for me. I have remove this blank line. New patch has been posted already. > > > priv_req->flags |= REQUEST_UNALIGNED; > > trace_cdns3_prepare_aligned_request(priv_req); > > > > @@ -3088,9 +3105,10 @@ static void cdns3_gadget_exit(struct cdns *cdns) > > struct cdns3_aligned_buf *buf; > > > > buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list); > > - dma_free_coherent(priv_dev->sysdev, buf->size, > > + dma_free_noncoherent(priv_dev->sysdev, buf->size, > > buf->buf, > > - buf->dma); > > + buf->dma, > > + buf->dir); > > > > list_del(&buf->list); > > kfree(buf); > > diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3- > gadget.h > > index ecf9b91..c5660f2 100644 > > --- a/drivers/usb/cdns3/cdns3-gadget.h > > +++ b/drivers/usb/cdns3/cdns3-gadget.h > > @@ -12,6 +12,7 @@ > > #ifndef __LINUX_CDNS3_GADGET > > #define __LINUX_CDNS3_GADGET > > #include <linux/usb/gadget.h> > > +#include <linux/dma-direction.h> > > > > /* > > * USBSS-DEV register interface. > > @@ -1205,6 +1206,7 @@ struct cdns3_aligned_buf { > > void *buf; > > dma_addr_t dma; > > u32 size; > > + enum dma_data_direction dir; > > unsigned in_use:1; > > struct list_head list; > > }; > > -- > > 2.4.5 > > > > -- > > Thanks, > Peter Chen Thanks, Sanket
diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 0b892a2..126087b 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -819,9 +819,15 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, priv_ep->dir); if ((priv_req->flags & REQUEST_UNALIGNED) && - priv_ep->dir == USB_DIR_OUT && !request->status) + priv_ep->dir == USB_DIR_OUT && !request->status) { + /* Make DMA buffer CPU accessible */ + dma_sync_single_for_cpu(priv_dev->sysdev, + priv_req->aligned_buf->dma, + priv_req->aligned_buf->size, + priv_req->aligned_buf->dir); memcpy(request->buf, priv_req->aligned_buf->buf, request->length); + } priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED); /* All TRBs have finished, clear the counter */ @@ -883,8 +889,8 @@ static void cdns3_free_aligned_request_buf(struct work_struct *work) * interrupts. */ spin_unlock_irqrestore(&priv_dev->lock, flags); - dma_free_coherent(priv_dev->sysdev, buf->size, - buf->buf, buf->dma); + dma_free_noncoherent(priv_dev->sysdev, buf->size, + buf->buf, buf->dma, buf->dir); kfree(buf); spin_lock_irqsave(&priv_dev->lock, flags); } @@ -911,10 +917,13 @@ static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req) return -ENOMEM; buf->size = priv_req->request.length; + buf->dir = usb_endpoint_dir_in(priv_ep->endpoint.desc) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE; - buf->buf = dma_alloc_coherent(priv_dev->sysdev, + buf->buf = dma_alloc_noncoherent(priv_dev->sysdev, buf->size, &buf->dma, + buf->dir, GFP_ATOMIC); if (!buf->buf) { kfree(buf); @@ -936,10 +945,18 @@ static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req) } if (priv_ep->dir == USB_DIR_IN) { + /* Make DMA buffer CPU accessible */ + dma_sync_single_for_cpu(priv_dev->sysdev, + buf->dma, buf->size, buf->dir); memcpy(buf->buf, priv_req->request.buf, priv_req->request.length); } + /* Transfer DMA buffer ownership back to device */ + dma_sync_single_for_device(priv_dev->sysdev, + buf->dma, buf->size, buf->dir); + + priv_req->flags |= REQUEST_UNALIGNED; trace_cdns3_prepare_aligned_request(priv_req); @@ -3088,9 +3105,10 @@ static void cdns3_gadget_exit(struct cdns *cdns) struct cdns3_aligned_buf *buf; buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list); - dma_free_coherent(priv_dev->sysdev, buf->size, + dma_free_noncoherent(priv_dev->sysdev, buf->size, buf->buf, - buf->dma); + buf->dma, + buf->dir); list_del(&buf->list); kfree(buf); diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-gadget.h index ecf9b91..c5660f2 100644 --- a/drivers/usb/cdns3/cdns3-gadget.h +++ b/drivers/usb/cdns3/cdns3-gadget.h @@ -12,6 +12,7 @@ #ifndef __LINUX_CDNS3_GADGET #define __LINUX_CDNS3_GADGET #include <linux/usb/gadget.h> +#include <linux/dma-direction.h> /* * USBSS-DEV register interface. @@ -1205,6 +1206,7 @@ struct cdns3_aligned_buf { void *buf; dma_addr_t dma; u32 size; + enum dma_data_direction dir; unsigned in_use:1; struct list_head list; };
dma_alloc_coherent() might fail on the platform with a small DMA region. To avoid such failure in cdns3_prepare_aligned_request_buf(), dma_alloc_coherent() is replaced with dma_alloc_noncoherent() to allocate aligned request buffer of dynamic length. Reported-by: Aswath Govindraju <a-govindraju@ti.com> Signed-off-by: Sanket Parmar <sparmar@cadence.com> --- Changelog: v2: - used dma_*_noncoherent() APIs - changed the commit log drivers/usb/cdns3/cdns3-gadget.c | 30 ++++++++++++++++++++++++------ drivers/usb/cdns3/cdns3-gadget.h | 2 ++ 2 files changed, 26 insertions(+), 6 deletions(-)