diff mbox series

HID: intel-ish-hid: Use dma_alloc_coherent for firmware update

Message ID 20220209050947.2119465-1-gwendal@chromium.org
State Accepted
Commit f97ec5d75e9261a5da78dc28a8955b7cc0c4468b
Headers show
Series HID: intel-ish-hid: Use dma_alloc_coherent for firmware update | expand

Commit Message

Gwendal Grignou Feb. 9, 2022, 5:09 a.m. UTC
Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
allocator will ignore the attribute.

Instead, use dma_alloc_coherent() API as we allocate a small amount of
memory to transfer firmware fragment to the ISH.

On Arcada chromebook, after the patch the warning:
"Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0 (GFP_KERNEL).  Fix your code!"
is gone. The ISH firmware is loaded properly and we can interact with
the ISH:
> ectool  --name cros_ish version
...
Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08 05:37:47 @localhost
Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48 @localhost

Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader client driver")
Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Cc: stable@vger.kernel.org
---
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------------------
 1 file changed, 3 insertions(+), 26 deletions(-)

Comments

Gwendal Grignou Feb. 10, 2022, 6:34 p.m. UTC | #1
On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > <srinivas.pandruvada@linux.intel.com> wrote:
> > >
> > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> > > > allocator will ignore the attribute.
> > > >
> > > Looks like there is new error flow added here for this flag.
> > > Is this just removing GFP_DMA32 not enough?
> > It was already ignored. It is not enough and I don't know why since
> > the virtual and physical addresses are in the same range:
> >
> > With using kmalloc/dma_single_sync:
> > 5.10 (firmware not loading)
> > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > kmalloc/dma_map_single: v:0xffff91531ae88000, phy:0x0000000085afe000
> > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > ddr_phys_addr=0x0000000085afe000
> > ...
> >
> > 4.19 (firmware loading)
> > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > kmalloc/dma_map_single: v:0xffff88c145480000, phy:0x0000000085480000
> > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > ddr_phys_addr=0x0000000085480000
> > ...
> >
> > I also considered flushing the CPU cache before the
> > dma_sync_single_for_device call, and calling
> > dma_sync_single_for_cpu()
> > after loader_cl_send() to be allowed to write into the buffer again.
> > But these did not help either.
> >
> > But using dma_alloc_coherent() clearly works and its simpler API
> > makes
> That is OK.
>
> > it more appropriate for the task at hand.
> >
> > For reference, the same log when using dma_alloc_coherent().
> > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > ddr_phys_addr=0x0000000001048000
> > ...
> >
> > >
> > > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > > amount
> > > > of
> > > > memory to transfer firmware fragment to the ISH.
> > > >
> > > > On Arcada chromebook, after the patch the warning:
> > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > (GFP_KERNEL).  Fix your code!"
> > > > is gone. The ISH firmware is loaded properly and we can interact
> > > > with
> > > > the ISH:
> > > > > ectool  --name cros_ish version
> > > > ...
> > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > 05:37:47
> > > > @localhost
> > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > @localhost
> > > >
> > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > > loader
> > > > client driver")
> > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > Cc: stable@vger.kernel.org
> > > > ---
> > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------------
> > > > ----
> > > > --
> > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > >
> > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > index e24988586710..16aa030af845 100644
> > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct
> > > > ishtp_cl_data *client_data,
> > > >          */
> > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > >
> > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > GFP_DMA32);
> > > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > > &dma_buf_phy, GFP_KERNEL);
> > > >         if (!dma_buf) {
> > > >                 client_data->flag_retry = true;
> > > >                 return -ENOMEM;
> > > >         }
> > > >
> > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > payload_max_size,
> > > > -                                    DMA_TO_DEVICE);
> > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > > failed\n");
> > > > -               client_data->flag_retry = true;
> > > > -               rv = -ENOMEM;
> > > > -               goto end_err_dma_buf_release;
> > > > -       }
> > > > -
> > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > LOADER_CMD_XFER_FRAGMENT;
> > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > > ishtp_cl_data *client_data,
> > > >                 ldr_xfer_dma_frag.fragment.size = fragment_size;
> > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > fragment_size);
> > > >
> > > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > > -                                          payload_max_size,
> > > > -                                          DMA_TO_DEVICE);
> > > > -
> > > Any reason for removal of sync?
> > It is not needed since we are using dma_alloc_coherent(). From [1]:
> > """
> > void *
> > dma_alloc_coherent(struct device *dev, size_t size,
> >    dma_addr_t *dma_handle, gfp_t flag)
> >
> > Consistent memory is memory for which a write by either the device or
> > the processor can immediately be read by the processor or device
> > without having to worry about caching effects.  (You may however need
> > to make sure to flush the processor's write buffers before telling
> > devices to read that memory.)
> >
> > This routine allocates a region of <size> bytes of consistent memory.
> > """'
> >
>  I checked with some dma folks. This call may still be required for
> some device. Most likely not as this is on chip device.
> What happens if you leave this call? I worry for regression on some old
> systems.
I truly believe this call is unnecessary: From the docs, dma_sync_...
is only for streaming DMA mappings, not for coherent memory. Another
link here: http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
Looking at older drivers, for instance exec_drive_command() in
driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
dma_alloc_ and dma_free_.
In a simple driver - drivers/scsi/advansys.c - that only uses coherent
DMA memory, no dma_sync calls are used.
As long as we have a memory barrier before sending the packet
downstream, we are covered. I rebooted my machine in a loop without
error overnight.

Gwendal.

>
> Thanks,
> Srinivas
>
>
>
> > > Thanks,
> > > Srinivas
> > >
> > > > -               /*
> > > > -                * Flush cache here because the
> > > > dma_sync_single_for_device()
> > > > -                * does not do for x86.
> > > > -                */
> > > > +               /* Flush cache to be sure the data is in main
> > > > memory.
> > > > */
> > > >                 clflush_cache_range(dma_buf, payload_max_size);
> > > >
> > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > > ishtp_cl_data *client_data,
> > > >                 fragment_offset += fragment_size;
> > > >         }
> > > >
> > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > DMA_TO_DEVICE);
> > > > -       kfree(dma_buf);
> > > > -       return 0;
> > > > -
> > > >  end_err_resp_buf_release:
> > > > -       /* Free ISH buffer if not done already, in error case */
> > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > DMA_TO_DEVICE);
> > > > -end_err_dma_buf_release:
> > > > -       kfree(dma_buf);
> > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > dma_buf_phy);
> > > >         return rv;
> > > >  }
> > > >
> > >
> >
> > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
>
srinivas pandruvada Feb. 10, 2022, 7:39 p.m. UTC | #2
On Thu, 2022-02-10 at 10:34 -0800, Gwendal Grignou wrote:
> On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
> <srinivas.pandruvada@linux.intel.com> wrote:
> > 
> > On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > > <srinivas.pandruvada@linux.intel.com> wrote:
> > > > 
> > > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > > Allocating memory with kmalloc and GPF_DMA32 is not allowed,
> > > > > the
> > > > > allocator will ignore the attribute.
> > > > > 
> > > > Looks like there is new error flow added here for this flag.
> > > > Is this just removing GFP_DMA32 not enough?
> > > It was already ignored. It is not enough and I don't know why
> > > since
> > > the virtual and physical addresses are in the same range:
> > > 
> > > With using kmalloc/dma_single_sync:
> > > 5.10 (firmware not loading)
> > > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > kmalloc/dma_map_single: v:0xffff91531ae88000,
> > > phy:0x0000000085afe000
> > > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000085afe000
> > > ...
> > > 
> > > 4.19 (firmware loading)
> > > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > kmalloc/dma_map_single: v:0xffff88c145480000,
> > > phy:0x0000000085480000
> > > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000085480000
> > > ...
> > > 
> > > I also considered flushing the CPU cache before the
> > > dma_sync_single_for_device call, and calling
> > > dma_sync_single_for_cpu()
> > > after loader_cl_send() to be allowed to write into the buffer
> > > again.
> > > But these did not help either.
> > > 
> > > But using dma_alloc_coherent() clearly works and its simpler API
> > > makes
> > That is OK.
> > 
> > > it more appropriate for the task at hand.
> > > 
> > > For reference, the same log when using dma_alloc_coherent().
> > > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> > > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000001048000
> > > ...
> > > 
> > > > 
> > > > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > > > amount
> > > > > of
> > > > > memory to transfer firmware fragment to the ISH.
> > > > > 
> > > > > On Arcada chromebook, after the patch the warning:
> > > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > > (GFP_KERNEL).  Fix your code!"
> > > > > is gone. The ISH firmware is loaded properly and we can
> > > > > interact
> > > > > with
> > > > > the ISH:
> > > > > > ectool  --name cros_ish version
> > > > > ...
> > > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > > 05:37:47
> > > > > @localhost
> > > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > > @localhost
> > > > > 
> > > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > > > loader
> > > > > client driver")
> > > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > > Cc: stable@vger.kernel.org
> > > > > ---
> > > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++--------
> > > > > ----
> > > > > ----
> > > > > --
> > > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > index e24988586710..16aa030af845 100644
> > > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > @@ -661,21 +661,12 @@ static int
> > > > > ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >          */
> > > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > > > 
> > > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > > GFP_DMA32);
> > > > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > > > &dma_buf_phy, GFP_KERNEL);
> > > > >         if (!dma_buf) {
> > > > >                 client_data->flag_retry = true;
> > > > >                 return -ENOMEM;
> > > > >         }
> > > > > 
> > > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > > payload_max_size,
> > > > > -                                    DMA_TO_DEVICE);
> > > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > > > failed\n");
> > > > > -               client_data->flag_retry = true;
> > > > > -               rv = -ENOMEM;
> > > > > -               goto end_err_dma_buf_release;
> > > > > -       }
> > > > > -
> > > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > > LOADER_CMD_XFER_FRAGMENT;
> > > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >                 ldr_xfer_dma_frag.fragment.size =
> > > > > fragment_size;
> > > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > > fragment_size);
> > > > > 
> > > > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > > > -                                          payload_max_size,
> > > > > -                                          DMA_TO_DEVICE);
> > > > > -
> > > > Any reason for removal of sync?
> > > It is not needed since we are using dma_alloc_coherent(). From
> > > [1]:
> > > """
> > > void *
> > > dma_alloc_coherent(struct device *dev, size_t size,
> > >    dma_addr_t *dma_handle, gfp_t flag)
> > > 
> > > Consistent memory is memory for which a write by either the
> > > device or
> > > the processor can immediately be read by the processor or device
> > > without having to worry about caching effects.  (You may however
> > > need
> > > to make sure to flush the processor's write buffers before
> > > telling
> > > devices to read that memory.)
> > > 
> > > This routine allocates a region of <size> bytes of consistent
> > > memory.
> > > """'
> > > 
> >  I checked with some dma folks. This call may still be required for
> > some device. Most likely not as this is on chip device.
> > What happens if you leave this call? I worry for regression on some
> > old
> > systems.
> I truly believe this call is unnecessary: From the docs, dma_sync_...
> is only for streaming DMA mappings, not for coherent memory. Another
> link here:
> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
> Looking at older drivers, for instance exec_drive_command() in
> driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
> dma_alloc_ and dma_free_.
> In a simple driver - drivers/scsi/advansys.c - that only uses
> coherent
> DMA memory, no dma_sync calls are used.
> As long as we have a memory barrier before sending the packet
> downstream, we are covered. I rebooted my machine in a loop without
> error overnight.
> 
I think sync may not be required here. But 
there is an additional step you miss if you don't call sync, 
irrespective of "(!dev_is_dma_coherent(dev))" in the call chain of this
sync.

	if (unlikely(is_swiotlb_buffer(dev, paddr)))
		swiotlb_sync_single_for_device(dev, paddr, size, dir);

So when SWIOTLB is enabled with swiotlb=force and bypass IOMMU, I don't
know the net affect of this.
So I need some further debug on this.

Thanks,
Srinivas

> Gwendal.
> 
> > 
> > Thanks,
> > Srinivas
> > 
> > 
> > 
> > > > Thanks,
> > > > Srinivas
> > > > 
> > > > > -               /*
> > > > > -                * Flush cache here because the
> > > > > dma_sync_single_for_device()
> > > > > -                * does not do for x86.
> > > > > -                */
> > > > > +               /* Flush cache to be sure the data is in main
> > > > > memory.
> > > > > */
> > > > >                 clflush_cache_range(dma_buf,
> > > > > payload_max_size);
> > > > > 
> > > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >                 fragment_offset += fragment_size;
> > > > >         }
> > > > > 
> > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > DMA_TO_DEVICE);
> > > > > -       kfree(dma_buf);
> > > > > -       return 0;
> > > > > -
> > > > >  end_err_resp_buf_release:
> > > > > -       /* Free ISH buffer if not done already, in error case
> > > > > */
> > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > DMA_TO_DEVICE);
> > > > > -end_err_dma_buf_release:
> > > > > -       kfree(dma_buf);
> > > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > > dma_buf_phy);
> > > > >         return rv;
> > > > >  }
> > > > > 
> > > > 
> > > 
> > > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
> >
srinivas pandruvada Feb. 14, 2022, 5:11 p.m. UTC | #3
On Thu, 2022-02-10 at 11:39 -0800, srinivas pandruvada wrote:
> On Thu, 2022-02-10 at 10:34 -0800, Gwendal Grignou wrote:
> > On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
> > <srinivas.pandruvada@linux.intel.com> wrote:
> > > 
> > > On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > > > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > > > <srinivas.pandruvada@linux.intel.com> wrote:
> > > > > 
> > > > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > > > Allocating memory with kmalloc and GPF_DMA32 is not
> > > > > > allowed,
> > > > > > the
> > > > > > allocator will ignore the attribute.
> > > > > > 
> > > > > Looks like there is new error flow added here for this flag.
> > > > > Is this just removing GFP_DMA32 not enough?
> > > > It was already ignored. It is not enough and I don't know why
> > > > since
> > > > the virtual and physical addresses are in the same range:
> > > > 
> > > > With using kmalloc/dma_single_sync:
> > > > 5.10 (firmware not loading)
> > > > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > kmalloc/dma_map_single: v:0xffff91531ae88000,
> > > > phy:0x0000000085afe000
> > > > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000085afe000
> > > > ...
> > > > 
> > > > 4.19 (firmware loading)
> > > > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > kmalloc/dma_map_single: v:0xffff88c145480000,
> > > > phy:0x0000000085480000
> > > > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000085480000
> > > > ...
> > > > 
> > > > I also considered flushing the CPU cache before the
> > > > dma_sync_single_for_device call, and calling
> > > > dma_sync_single_for_cpu()
> > > > after loader_cl_send() to be allowed to write into the buffer
> > > > again.
> > > > But these did not help either.
> > > > 
> > > > But using dma_alloc_coherent() clearly works and its simpler
> > > > API
> > > > makes
> > > That is OK.
> > > 
> > > > it more appropriate for the task at hand.
> > > > 
> > > > For reference, the same log when using dma_alloc_coherent().
> > > > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > dma_alloc_coherent: v:0xffff9beb81048000,
> > > > phy:0x0000000001048000
> > > > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-
> > > > 1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000001048000
> > > > ...
> > > > 
> > > > > 
> > > > > > Instead, use dma_alloc_coherent() API as we allocate a
> > > > > > small
> > > > > > amount
> > > > > > of
> > > > > > memory to transfer firmware fragment to the ISH.
> > > > > > 
> > > > > > On Arcada chromebook, after the patch the warning:
> > > > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > > > (GFP_KERNEL).  Fix your code!"
> > > > > > is gone. The ISH firmware is loaded properly and we can
> > > > > > interact
> > > > > > with
> > > > > > the ISH:
> > > > > > > ectool  --name cros_ish version
> > > > > > ...
> > > > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > > > 05:37:47
> > > > > > @localhost
> > > > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > > > @localhost
> > > > > > 
> > > > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH
> > > > > > firmware
> > > > > > loader
> > > > > > client driver")
> > > > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > > > Cc: stable@vger.kernel.org
> > > > > > ---
> > > > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------
> > > > > > --
> > > > > > ----
> > > > > > ----
> > > > > > --
> > > > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > index e24988586710..16aa030af845 100644
> > > > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > @@ -661,21 +661,12 @@ static int
> > > > > > ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >          */
> > > > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > > > > 
> > > > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > > > GFP_DMA32);
> > > > > > +       dma_buf = dma_alloc_coherent(devc,
> > > > > > payload_max_size,
> > > > > > &dma_buf_phy, GFP_KERNEL);
> > > > > >         if (!dma_buf) {
> > > > > >                 client_data->flag_retry = true;
> > > > > >                 return -ENOMEM;
> > > > > >         }
> > > > > > 
> > > > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > > > payload_max_size,
> > > > > > -                                    DMA_TO_DEVICE);
> > > > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > > > -               dev_err(cl_data_to_dev(client_data), "DMA
> > > > > > map
> > > > > > failed\n");
> > > > > > -               client_data->flag_retry = true;
> > > > > > -               rv = -ENOMEM;
> > > > > > -               goto end_err_dma_buf_release;
> > > > > > -       }
> > > > > > -
> > > > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > > > LOADER_CMD_XFER_FRAGMENT;
> > > > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > > > @@ -695,14 +686,7 @@ static int
> > > > > > ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >                 ldr_xfer_dma_frag.fragment.size =
> > > > > > fragment_size;
> > > > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > > > fragment_size);
> > > > > > 
> > > > > > -               dma_sync_single_for_device(devc,
> > > > > > dma_buf_phy,
> > > > > > -                                         
> > > > > > payload_max_size,
> > > > > > -                                          DMA_TO_DEVICE);
> > > > > > -
> > > > > Any reason for removal of sync?
> > > > It is not needed since we are using dma_alloc_coherent(). From
> > > > [1]:
> > > > """
> > > > void *
> > > > dma_alloc_coherent(struct device *dev, size_t size,
> > > >    dma_addr_t *dma_handle, gfp_t flag)
> > > > 
> > > > Consistent memory is memory for which a write by either the
> > > > device or
> > > > the processor can immediately be read by the processor or
> > > > device
> > > > without having to worry about caching effects.  (You may
> > > > however
> > > > need
> > > > to make sure to flush the processor's write buffers before
> > > > telling
> > > > devices to read that memory.)
> > > > 
> > > > This routine allocates a region of <size> bytes of consistent
> > > > memory.
> > > > """'
> > > > 
> > >  I checked with some dma folks. This call may still be required
> > > for
> > > some device. Most likely not as this is on chip device.
> > > What happens if you leave this call? I worry for regression on
> > > some
> > > old
> > > systems.
> > I truly believe this call is unnecessary: From the docs,
> > dma_sync_...
> > is only for streaming DMA mappings, not for coherent memory.
> > Another
> > link here:
> > http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
> > Looking at older drivers, for instance exec_drive_command() in
> > driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
> > dma_alloc_ and dma_free_.
> > In a simple driver - drivers/scsi/advansys.c - that only uses
> > coherent
> > DMA memory, no dma_sync calls are used.
> > As long as we have a memory barrier before sending the packet
> > downstream, we are covered. I rebooted my machine in a loop without
> > error overnight.
> > 
> I think sync may not be required here. But 
> there is an additional step you miss if you don't call sync, 
> irrespective of "(!dev_is_dma_coherent(dev))" in the call chain of
> this
> sync.
> 
>         if (unlikely(is_swiotlb_buffer(dev, paddr)))
>                 swiotlb_sync_single_for_device(dev, paddr, size,
> dir);
> 
> So when SWIOTLB is enabled with swiotlb=force and bypass IOMMU, I
> don't
> know the net affect of this.
> So I need some further debug on this.
Looking into this, we will miss
copy the swiotlb buffer back and from original DMA location,
Please try with kernel command line option "swiotlb=force", if this can
still work we can remove this call safely.

Thanks,
Srinivas


> 
> Thanks,
> Srinivas
> 
> > Gwendal.
> > 
> > > 
> > > Thanks,
> > > Srinivas
> > > 
> > > 
> > > 
> > > > > Thanks,
> > > > > Srinivas
> > > > > 
> > > > > > -               /*
> > > > > > -                * Flush cache here because the
> > > > > > dma_sync_single_for_device()
> > > > > > -                * does not do for x86.
> > > > > > -                */
> > > > > > +               /* Flush cache to be sure the data is in
> > > > > > main
> > > > > > memory.
> > > > > > */
> > > > > >                 clflush_cache_range(dma_buf,
> > > > > > payload_max_size);
> > > > > > 
> > > > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > > > @@ -725,15 +709,8 @@ static int
> > > > > > ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >                 fragment_offset += fragment_size;
> > > > > >         }
> > > > > > 
> > > > > > -       dma_unmap_single(devc, dma_buf_phy,
> > > > > > payload_max_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -       kfree(dma_buf);
> > > > > > -       return 0;
> > > > > > -
> > > > > >  end_err_resp_buf_release:
> > > > > > -       /* Free ISH buffer if not done already, in error
> > > > > > case
> > > > > > */
> > > > > > -       dma_unmap_single(devc, dma_buf_phy,
> > > > > > payload_max_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -end_err_dma_buf_release:
> > > > > > -       kfree(dma_buf);
> > > > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > > > dma_buf_phy);
> > > > > >         return rv;
> > > > > >  }
> > > > > > 
> > > > > 
> > > > 
> > > > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
> > > 
>
srinivas pandruvada March 4, 2022, 6:04 p.m. UTC | #4
On Thu, 2022-02-10 at 10:34 -0800, Gwendal Grignou wrote:
> On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
> <srinivas.pandruvada@linux.intel.com> wrote:
> > 
> > On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > > <srinivas.pandruvada@linux.intel.com> wrote:
> > > > 
> > > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > > Allocating memory with kmalloc and GPF_DMA32 is not allowed,
> > > > > the
> > > > > allocator will ignore the attribute.
> > > > > 
> > > > Looks like there is new error flow added here for this flag.
> > > > Is this just removing GFP_DMA32 not enough?
> > > It was already ignored. It is not enough and I don't know why
> > > since
> > > the virtual and physical addresses are in the same range:
> > > 
> > > With using kmalloc/dma_single_sync:
> > > 5.10 (firmware not loading)
> > > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > kmalloc/dma_map_single: v:0xffff91531ae88000,
> > > phy:0x0000000085afe000
> > > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000085afe000
> > > ...
> > > 
> > > 4.19 (firmware loading)
> > > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > kmalloc/dma_map_single: v:0xffff88c145480000,
> > > phy:0x0000000085480000
> > > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000085480000
> > > ...
> > > 
> > > I also considered flushing the CPU cache before the
> > > dma_sync_single_for_device call, and calling
> > > dma_sync_single_for_cpu()
> > > after loader_cl_send() to be allowed to write into the buffer
> > > again.
> > > But these did not help either.
> > > 
> > > But using dma_alloc_coherent() clearly works and its simpler API
> > > makes
> > That is OK.
> > 
> > > it more appropriate for the task at hand.
> > > 
> > > For reference, the same log when using dma_alloc_coherent().
> > > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> > > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > ddr_phys_addr=0x0000000001048000
> > > ...
> > > 
> > > > 
> > > > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > > > amount
> > > > > of
> > > > > memory to transfer firmware fragment to the ISH.
> > > > > 
> > > > > On Arcada chromebook, after the patch the warning:
> > > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > > (GFP_KERNEL).  Fix your code!"
> > > > > is gone. The ISH firmware is loaded properly and we can
> > > > > interact
> > > > > with
> > > > > the ISH:
> > > > > > ectool  --name cros_ish version
> > > > > ...
> > > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > > 05:37:47
> > > > > @localhost
> > > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > > @localhost
> > > > > 
> > > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > > > loader
> > > > > client driver")
> > > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > > Cc: stable@vger.kernel.org
> > > > > ---
> > > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++--------
> > > > > ----
> > > > > ----
> > > > > --
> > > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > index e24988586710..16aa030af845 100644
> > > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > @@ -661,21 +661,12 @@ static int
> > > > > ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >          */
> > > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > > > 
> > > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > > GFP_DMA32);
> > > > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > > > &dma_buf_phy, GFP_KERNEL);
> > > > >         if (!dma_buf) {
> > > > >                 client_data->flag_retry = true;
> > > > >                 return -ENOMEM;
> > > > >         }
> > > > > 
> > > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > > payload_max_size,
> > > > > -                                    DMA_TO_DEVICE);
> > > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > > > failed\n");
> > > > > -               client_data->flag_retry = true;
> > > > > -               rv = -ENOMEM;
> > > > > -               goto end_err_dma_buf_release;
> > > > > -       }
> > > > > -
> > > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > > LOADER_CMD_XFER_FRAGMENT;
> > > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >                 ldr_xfer_dma_frag.fragment.size =
> > > > > fragment_size;
> > > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > > fragment_size);
> > > > > 
> > > > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > > > -                                          payload_max_size,
> > > > > -                                          DMA_TO_DEVICE);
> > > > > -
> > > > Any reason for removal of sync?
> > > It is not needed since we are using dma_alloc_coherent(). From
> > > [1]:
> > > """
> > > void *
> > > dma_alloc_coherent(struct device *dev, size_t size,
> > >    dma_addr_t *dma_handle, gfp_t flag)
> > > 
> > > Consistent memory is memory for which a write by either the
> > > device or
> > > the processor can immediately be read by the processor or device
> > > without having to worry about caching effects.  (You may however
> > > need
> > > to make sure to flush the processor's write buffers before
> > > telling
> > > devices to read that memory.)
> > > 
> > > This routine allocates a region of <size> bytes of consistent
> > > memory.
> > > """'
> > > 
> >  I checked with some dma folks. This call may still be required for
> > some device. Most likely not as this is on chip device.
> > What happens if you leave this call? I worry for regression on some
> > old
> > systems.
I couldn't find any Chrome device to experiment.
Please try with "swiotlb=force" and if it works, then this if fine to
take the patch as is.

Thanks,
Srinivas

> I truly believe this call is unnecessary: From the docs, dma_sync_...
> is only for streaming DMA mappings, not for coherent memory. Another
> link here:
> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
> Looking at older drivers, for instance exec_drive_command() in
> driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
> dma_alloc_ and dma_free_.
> In a simple driver - drivers/scsi/advansys.c - that only uses
> coherent
> DMA memory, no dma_sync calls are used.
> As long as we have a memory barrier before sending the packet
> downstream, we are covered. I rebooted my machine in a loop without
> error overnight.
> 
> Gwendal.
> 
> > 
> > Thanks,
> > Srinivas
> > 
> > 
> > 
> > > > Thanks,
> > > > Srinivas
> > > > 
> > > > > -               /*
> > > > > -                * Flush cache here because the
> > > > > dma_sync_single_for_device()
> > > > > -                * does not do for x86.
> > > > > -                */
> > > > > +               /* Flush cache to be sure the data is in main
> > > > > memory.
> > > > > */
> > > > >                 clflush_cache_range(dma_buf,
> > > > > payload_max_size);
> > > > > 
> > > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > ishtp_cl_data *client_data,
> > > > >                 fragment_offset += fragment_size;
> > > > >         }
> > > > > 
> > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > DMA_TO_DEVICE);
> > > > > -       kfree(dma_buf);
> > > > > -       return 0;
> > > > > -
> > > > >  end_err_resp_buf_release:
> > > > > -       /* Free ISH buffer if not done already, in error case
> > > > > */
> > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > DMA_TO_DEVICE);
> > > > > -end_err_dma_buf_release:
> > > > > -       kfree(dma_buf);
> > > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > > dma_buf_phy);
> > > > >         return rv;
> > > > >  }
> > > > > 
> > > > 
> > > 
> > > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
> >
Gwendal Grignou March 11, 2022, 4:50 p.m. UTC | #5
On Fri, Mar 4, 2022 at 10:05 AM srinivas pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> On Thu, 2022-02-10 at 10:34 -0800, Gwendal Grignou wrote:
> > On Wed, Feb 9, 2022 at 6:51 PM srinivas pandruvada
> > <srinivas.pandruvada@linux.intel.com> wrote:
> > >
> > > On Wed, 2022-02-09 at 16:59 -0800, Gwendal Grignou wrote:
> > > > On Wed, Feb 9, 2022 at 10:52 AM srinivas pandruvada
> > > > <srinivas.pandruvada@linux.intel.com> wrote:
> > > > >
> > > > > On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> > > > > > Allocating memory with kmalloc and GPF_DMA32 is not allowed,
> > > > > > the
> > > > > > allocator will ignore the attribute.
> > > > > >
> > > > > Looks like there is new error flow added here for this flag.
> > > > > Is this just removing GFP_DMA32 not enough?
> > > > It was already ignored. It is not enough and I don't know why
> > > > since
> > > > the virtual and physical addresses are in the same range:
> > > >
> > > > With using kmalloc/dma_single_sync:
> > > > 5.10 (firmware not loading)
> > > > [    3.837996] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > kmalloc/dma_map_single: v:0xffff91531ae88000,
> > > > phy:0x0000000085afe000
> > > > [    3.838003] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000085afe000
> > > > ...
> > > >
> > > > 4.19 (firmware loading)
> > > > [    3.878300] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > kmalloc/dma_map_single: v:0xffff88c145480000,
> > > > phy:0x0000000085480000
> > > > [    3.878322] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000085480000
> > > > ...
> > > >
> > > > I also considered flushing the CPU cache before the
> > > > dma_sync_single_for_device call, and calling
> > > > dma_sync_single_for_cpu()
> > > > after loader_cl_send() to be allowed to write into the buffer
> > > > again.
> > > > But these did not help either.
> > > >
> > > > But using dma_alloc_coherent() clearly works and its simpler API
> > > > makes
> > > That is OK.
> > >
> > > > it more appropriate for the task at hand.
> > > >
> > > > For reference, the same log when using dma_alloc_coherent().
> > > > [    3.779723] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > dma_alloc_coherent: v:0xffff9beb81048000, phy:0x0000000001048000
> > > > [    3.779731] ish-loader {C804D06A-55BD-4EA7-ADED-1E31228C76DC}:
> > > > xfer_mode=dma offset=0x00000000 size=0x4000 is_last=0
> > > > ddr_phys_addr=0x0000000001048000
> > > > ...
> > > >
> > > > >
> > > > > > Instead, use dma_alloc_coherent() API as we allocate a small
> > > > > > amount
> > > > > > of
> > > > > > memory to transfer firmware fragment to the ISH.
> > > > > >
> > > > > > On Arcada chromebook, after the patch the warning:
> > > > > > "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> > > > > > (GFP_KERNEL).  Fix your code!"
> > > > > > is gone. The ISH firmware is loaded properly and we can
> > > > > > interact
> > > > > > with
> > > > > > the ISH:
> > > > > > > ectool  --name cros_ish version
> > > > > > ...
> > > > > > Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08
> > > > > > 05:37:47
> > > > > > @localhost
> > > > > > Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48
> > > > > > @localhost
> > > > > >
> > > > > > Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware
> > > > > > loader
> > > > > > client driver")
> > > > > > Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> > > > > > Cc: stable@vger.kernel.org
> > > > > > ---
> > > > > >  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++--------
> > > > > > ----
> > > > > > ----
> > > > > > --
> > > > > >  1 file changed, 3 insertions(+), 26 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > index e24988586710..16aa030af845 100644
> > > > > > --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> > > > > > @@ -661,21 +661,12 @@ static int
> > > > > > ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >          */
> > > > > >         payload_max_size &= ~(L1_CACHE_BYTES - 1);
> > > > > >
> > > > > > -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL |
> > > > > > GFP_DMA32);
> > > > > > +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> > > > > > &dma_buf_phy, GFP_KERNEL);
> > > > > >         if (!dma_buf) {
> > > > > >                 client_data->flag_retry = true;
> > > > > >                 return -ENOMEM;
> > > > > >         }
> > > > > >
> > > > > > -       dma_buf_phy = dma_map_single(devc, dma_buf,
> > > > > > payload_max_size,
> > > > > > -                                    DMA_TO_DEVICE);
> > > > > > -       if (dma_mapping_error(devc, dma_buf_phy)) {
> > > > > > -               dev_err(cl_data_to_dev(client_data), "DMA map
> > > > > > failed\n");
> > > > > > -               client_data->flag_retry = true;
> > > > > > -               rv = -ENOMEM;
> > > > > > -               goto end_err_dma_buf_release;
> > > > > > -       }
> > > > > > -
> > > > > >         ldr_xfer_dma_frag.fragment.hdr.command =
> > > > > > LOADER_CMD_XFER_FRAGMENT;
> > > > > >         ldr_xfer_dma_frag.fragment.xfer_mode =
> > > > > > LOADER_XFER_MODE_DIRECT_DMA;
> > > > > >         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> > > > > > @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >                 ldr_xfer_dma_frag.fragment.size =
> > > > > > fragment_size;
> > > > > >                 memcpy(dma_buf, &fw->data[fragment_offset],
> > > > > > fragment_size);
> > > > > >
> > > > > > -               dma_sync_single_for_device(devc, dma_buf_phy,
> > > > > > -                                          payload_max_size,
> > > > > > -                                          DMA_TO_DEVICE);
> > > > > > -
> > > > > Any reason for removal of sync?
> > > > It is not needed since we are using dma_alloc_coherent(). From
> > > > [1]:
> > > > """
> > > > void *
> > > > dma_alloc_coherent(struct device *dev, size_t size,
> > > >    dma_addr_t *dma_handle, gfp_t flag)
> > > >
> > > > Consistent memory is memory for which a write by either the
> > > > device or
> > > > the processor can immediately be read by the processor or device
> > > > without having to worry about caching effects.  (You may however
> > > > need
> > > > to make sure to flush the processor's write buffers before
> > > > telling
> > > > devices to read that memory.)
> > > >
> > > > This routine allocates a region of <size> bytes of consistent
> > > > memory.
> > > > """'
> > > >
> > >  I checked with some dma folks. This call may still be required for
> > > some device. Most likely not as this is on chip device.
> > > What happens if you leave this call? I worry for regression on some
> > > old
> > > systems.
> I couldn't find any Chrome device to experiment.
> Please try with "swiotlb=force" and if it works, then this if fine to
> take the patch as is.
It works. Used an "arcada" [from dmidecode]
System Information
        Manufacturer: Dell Inc.
        Product Name: Arcada

Added "swiotlb=force" to the kernel command line using
'/usr/share/vboot/bin/make_dev_ssd.sh -i  /dev/nvme0n1 --partitions 4
--edit_config`.
Check with 'cat /proc/cmdline' the option is added and tested multiple
reboot and shutdown/power on.

Gwendal.

>
> Thanks,
> Srinivas
>
> > I truly believe this call is unnecessary: From the docs, dma_sync_...
> > is only for streaming DMA mappings, not for coherent memory. Another
> > link here:
> > http://lists.infradead.org/pipermail/linux-arm-kernel/2015-May/341712.html
> > Looking at older drivers, for instance exec_drive_command() in
> > driver/block/mtip32xx/mtip32xx.c, there are no dma_sync_ between
> > dma_alloc_ and dma_free_.
> > In a simple driver - drivers/scsi/advansys.c - that only uses
> > coherent
> > DMA memory, no dma_sync calls are used.
> > As long as we have a memory barrier before sending the packet
> > downstream, we are covered. I rebooted my machine in a loop without
> > error overnight.
> >
> > Gwendal.
> >
> > >
> > > Thanks,
> > > Srinivas
> > >
> > >
> > >
> > > > > Thanks,
> > > > > Srinivas
> > > > >
> > > > > > -               /*
> > > > > > -                * Flush cache here because the
> > > > > > dma_sync_single_for_device()
> > > > > > -                * does not do for x86.
> > > > > > -                */
> > > > > > +               /* Flush cache to be sure the data is in main
> > > > > > memory.
> > > > > > */
> > > > > >                 clflush_cache_range(dma_buf,
> > > > > > payload_max_size);
> > > > > >
> > > > > >                 dev_dbg(cl_data_to_dev(client_data),
> > > > > > @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> > > > > > ishtp_cl_data *client_data,
> > > > > >                 fragment_offset += fragment_size;
> > > > > >         }
> > > > > >
> > > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -       kfree(dma_buf);
> > > > > > -       return 0;
> > > > > > -
> > > > > >  end_err_resp_buf_release:
> > > > > > -       /* Free ISH buffer if not done already, in error case
> > > > > > */
> > > > > > -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -end_err_dma_buf_release:
> > > > > > -       kfree(dma_buf);
> > > > > > +       dma_free_coherent(devc, payload_max_size, dma_buf,
> > > > > > dma_buf_phy);
> > > > > >         return rv;
> > > > > >  }
> > > > > >
> > > > >
> > > >
> > > > [1]https://www.kernel.org/doc/Documentation/DMA-API.txt
> > >
>
srinivas pandruvada March 11, 2022, 9:22 p.m. UTC | #6
On Tue, 2022-02-08 at 21:09 -0800, Gwendal Grignou wrote:
> Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> allocator will ignore the attribute.
> 
> Instead, use dma_alloc_coherent() API as we allocate a small amount of
> memory to transfer firmware fragment to the ISH.
> 
> On Arcada chromebook, after the patch the warning:
> "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0
> (GFP_KERNEL).  Fix your code!"
> is gone. The ISH firmware is loaded properly and we can interact with
> the ISH:
> > ectool  --name cros_ish version
> ...
> Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08 05:37:47
> @localhost
> Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48 @localhost
> 
> Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader
> client driver")
> Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> Cc: stable@vger.kernel.org
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>

> ---
>  drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 29 +++------------------
>  1 file changed, 3 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> index e24988586710..16aa030af845 100644
> --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
> @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>          */
>         payload_max_size &= ~(L1_CACHE_BYTES - 1);
>  
> -       dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32);
> +       dma_buf = dma_alloc_coherent(devc, payload_max_size,
> &dma_buf_phy, GFP_KERNEL);
>         if (!dma_buf) {
>                 client_data->flag_retry = true;
>                 return -ENOMEM;
>         }
>  
> -       dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size,
> -                                    DMA_TO_DEVICE);
> -       if (dma_mapping_error(devc, dma_buf_phy)) {
> -               dev_err(cl_data_to_dev(client_data), "DMA map
> failed\n");
> -               client_data->flag_retry = true;
> -               rv = -ENOMEM;
> -               goto end_err_dma_buf_release;
> -       }
> -
>         ldr_xfer_dma_frag.fragment.hdr.command =
> LOADER_CMD_XFER_FRAGMENT;
>         ldr_xfer_dma_frag.fragment.xfer_mode =
> LOADER_XFER_MODE_DIRECT_DMA;
>         ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
> @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>                 ldr_xfer_dma_frag.fragment.size = fragment_size;
>                 memcpy(dma_buf, &fw->data[fragment_offset],
> fragment_size);
>  
> -               dma_sync_single_for_device(devc, dma_buf_phy,
> -                                          payload_max_size,
> -                                          DMA_TO_DEVICE);
> -
> -               /*
> -                * Flush cache here because the
> dma_sync_single_for_device()
> -                * does not do for x86.
> -                */
> +               /* Flush cache to be sure the data is in main memory.
> */
>                 clflush_cache_range(dma_buf, payload_max_size);
>  
>                 dev_dbg(cl_data_to_dev(client_data),
> @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct
> ishtp_cl_data *client_data,
>                 fragment_offset += fragment_size;
>         }
>  
> -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> DMA_TO_DEVICE);
> -       kfree(dma_buf);
> -       return 0;
> -
>  end_err_resp_buf_release:
> -       /* Free ISH buffer if not done already, in error case */
> -       dma_unmap_single(devc, dma_buf_phy, payload_max_size,
> DMA_TO_DEVICE);
> -end_err_dma_buf_release:
> -       kfree(dma_buf);
> +       dma_free_coherent(devc, payload_max_size, dma_buf,
> dma_buf_phy);
>         return rv;
>  }
>
Jiri Kosina March 14, 2022, 9:39 a.m. UTC | #7
On Tue, 8 Feb 2022, Gwendal Grignou wrote:

> Allocating memory with kmalloc and GPF_DMA32 is not allowed, the
> allocator will ignore the attribute.
> 
> Instead, use dma_alloc_coherent() API as we allocate a small amount of
> memory to transfer firmware fragment to the ISH.
> 
> On Arcada chromebook, after the patch the warning:
> "Unexpected gfp: 0x4 (GFP_DMA32). Fixing up to gfp: 0xcc0 (GFP_KERNEL).  Fix your code!"
> is gone. The ISH firmware is loaded properly and we can interact with
> the ISH:
> > ectool  --name cros_ish version
> ...
> Build info:    arcada_ish_v2.0.3661+3c1a1c1ae0 2022-02-08 05:37:47 @localhost
> Tool version:  v2.0.12300-900b03ec7f 2022-02-08 10:01:48 @localhost
> 
> Fixes: commit 91b228107da3 ("HID: intel-ish-hid: ISH firmware loader client driver")
> Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
> Cc: stable@vger.kernel.org

Applied, thanks.
diff mbox series

Patch

diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index e24988586710..16aa030af845 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -661,21 +661,12 @@  static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
 	 */
 	payload_max_size &= ~(L1_CACHE_BYTES - 1);
 
-	dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32);
+	dma_buf = dma_alloc_coherent(devc, payload_max_size, &dma_buf_phy, GFP_KERNEL);
 	if (!dma_buf) {
 		client_data->flag_retry = true;
 		return -ENOMEM;
 	}
 
-	dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size,
-				     DMA_TO_DEVICE);
-	if (dma_mapping_error(devc, dma_buf_phy)) {
-		dev_err(cl_data_to_dev(client_data), "DMA map failed\n");
-		client_data->flag_retry = true;
-		rv = -ENOMEM;
-		goto end_err_dma_buf_release;
-	}
-
 	ldr_xfer_dma_frag.fragment.hdr.command = LOADER_CMD_XFER_FRAGMENT;
 	ldr_xfer_dma_frag.fragment.xfer_mode = LOADER_XFER_MODE_DIRECT_DMA;
 	ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy;
@@ -695,14 +686,7 @@  static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
 		ldr_xfer_dma_frag.fragment.size = fragment_size;
 		memcpy(dma_buf, &fw->data[fragment_offset], fragment_size);
 
-		dma_sync_single_for_device(devc, dma_buf_phy,
-					   payload_max_size,
-					   DMA_TO_DEVICE);
-
-		/*
-		 * Flush cache here because the dma_sync_single_for_device()
-		 * does not do for x86.
-		 */
+		/* Flush cache to be sure the data is in main memory. */
 		clflush_cache_range(dma_buf, payload_max_size);
 
 		dev_dbg(cl_data_to_dev(client_data),
@@ -725,15 +709,8 @@  static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data,
 		fragment_offset += fragment_size;
 	}
 
-	dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE);
-	kfree(dma_buf);
-	return 0;
-
 end_err_resp_buf_release:
-	/* Free ISH buffer if not done already, in error case */
-	dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE);
-end_err_dma_buf_release:
-	kfree(dma_buf);
+	dma_free_coherent(devc, payload_max_size, dma_buf, dma_buf_phy);
 	return rv;
 }