Message ID | 20250513092803.2096-1-tao.wangtao@honor.com |
---|---|
State | New |
Headers | show |
Series | [1/2] dmabuf: add DMA_BUF_IOCTL_RW_FILE | expand |
On 5/13/25 14:30, wangtao wrote: >> -----Original Message----- >> From: Christian König <christian.koenig@amd.com> >> Sent: Tuesday, May 13, 2025 7:32 PM >> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org; >> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; >> jstultz@google.com; tjmercier@google.com >> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro- >> mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; >> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang >> <yipengxiang@honor.com>; liulu 00013167 <liulu.liu@honor.com>; hanfeng >> 00012985 <feng.han@honor.com> >> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement >> DMA_BUF_IOCTL_RW_FILE for system_heap >> >> On 5/13/25 11:28, wangtao wrote: >>> Support direct file I/O operations for system_heap dma-buf objects. >>> Implementation includes: >>> 1. Convert sg_table to bio_vec >> >> That is usually illegal for DMA-bufs. > [wangtao] The term 'convert' is misleading in this context. The appropriate phrasing should be: Construct bio_vec from sg_table. Well it doesn't matter what you call it. Touching the page inside an sg table of a DMA-buf is illegal, we even have code to actively prevent that. Once more: This approach was already rejected multiple times! Please use udmabuf instead! The hack you came up here is simply not necessary. Regards, Christian. > Appreciate your feedback. >> >> Regards, >> Christian. >> >>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke >>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O >>> >>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz): >>> >>> | Metric | 1MB | 8MB | 64MB | 1024MB | 3072MB | >>> |--------------------|-------:|-------:|--------:|---------:|--------- >>> |--------------------|:| >>> | Buffer Read (us) | 1658 | 9028 | 69295 | 1019783 | 2978179 | >>> | Direct Read (us) | 707 | 2647 | 18689 | 299627 | 937758 | >>> | Buffer Rate (MB/s) | 603 | 886 | 924 | 1004 | 1032 | >>> | Direct Rate (MB/s) | 1414 | 3022 | 3425 | 3418 | 3276 | >>> >>> Signed-off-by: wangtao <tao.wangtao@honor.com> >>> --- >>> drivers/dma-buf/heaps/system_heap.c | 118 >>> ++++++++++++++++++++++++++++ >>> 1 file changed, 118 insertions(+) >>> >>> diff --git a/drivers/dma-buf/heaps/system_heap.c >>> b/drivers/dma-buf/heaps/system_heap.c >>> index 26d5dc89ea16..f7b71b9843aa 100644 >>> --- a/drivers/dma-buf/heaps/system_heap.c >>> +++ b/drivers/dma-buf/heaps/system_heap.c >>> @@ -20,6 +20,8 @@ >>> #include <linux/scatterlist.h> >>> #include <linux/slab.h> >>> #include <linux/vmalloc.h> >>> +#include <linux/bvec.h> >>> +#include <linux/uio.h> >>> >>> static struct dma_heap *sys_heap; >>> >>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct dma_buf >> *dmabuf, struct iosys_map *map) >>> iosys_map_clear(map); >>> } >>> >>> +static struct bio_vec *system_heap_init_bvec(struct >> system_heap_buffer *buffer, >>> + size_t offset, size_t len, int *nr_segs) { >>> + struct sg_table *sgt = &buffer->sg_table; >>> + struct scatterlist *sg; >>> + size_t length = 0; >>> + unsigned int i, k = 0; >>> + struct bio_vec *bvec; >>> + size_t sg_left; >>> + size_t sg_offset; >>> + size_t sg_len; >>> + >>> + bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL); >>> + if (!bvec) >>> + return NULL; >>> + >>> + for_each_sg(sgt->sgl, sg, sgt->nents, i) { >>> + length += sg->length; >>> + if (length <= offset) >>> + continue; >>> + >>> + sg_left = length - offset; >>> + sg_offset = sg->offset + sg->length - sg_left; >>> + sg_len = min(sg_left, len); >>> + >>> + bvec[k].bv_page = sg_page(sg); >>> + bvec[k].bv_len = sg_len; >>> + bvec[k].bv_offset = sg_offset; >>> + k++; >>> + >>> + offset += sg_len; >>> + len -= sg_len; >>> + if (len <= 0) >>> + break; >>> + } >>> + >>> + *nr_segs = k; >>> + return bvec; >>> +} >>> + >>> +static int system_heap_rw_file(struct system_heap_buffer *buffer, bool >> is_read, >>> + bool direct_io, struct file *filp, loff_t file_offset, >>> + size_t buf_offset, size_t len) >>> +{ >>> + struct bio_vec *bvec; >>> + int nr_segs = 0; >>> + struct iov_iter iter; >>> + struct kiocb kiocb; >>> + ssize_t ret = 0; >>> + >>> + if (direct_io) { >>> + if (!(filp->f_mode & FMODE_CAN_ODIRECT)) >>> + return -EINVAL; >>> + } >>> + >>> + bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs); >>> + if (!bvec) >>> + return -ENOMEM; >>> + >>> + iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec, >> nr_segs, len); >>> + init_sync_kiocb(&kiocb, filp); >>> + kiocb.ki_pos = file_offset; >>> + if (direct_io) >>> + kiocb.ki_flags |= IOCB_DIRECT; >>> + >>> + while (kiocb.ki_pos < file_offset + len) { >>> + if (is_read) >>> + ret = vfs_iocb_iter_read(filp, &kiocb, &iter); >>> + else >>> + ret = vfs_iocb_iter_write(filp, &kiocb, &iter); >>> + if (ret <= 0) >>> + break; >>> + } >>> + >>> + kvfree(bvec); >>> + return ret < 0 ? ret : 0; >>> +} >>> + >>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf, >>> + struct dma_buf_rw_file *back) >>> +{ >>> + struct system_heap_buffer *buffer = dmabuf->priv; >>> + int ret = 0; >>> + __u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK; >>> + bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT; >>> + struct file *filp; >>> + >>> + if (op != DMA_BUF_RW_FLAGS_READ && op != >> DMA_BUF_RW_FLAGS_WRITE) >>> + return -EINVAL; >>> + if (direct_io) { >>> + if (!PAGE_ALIGNED(back->file_offset) || >>> + !PAGE_ALIGNED(back->buf_offset) || >>> + !PAGE_ALIGNED(back->buf_len)) >>> + return -EINVAL; >>> + } >>> + if (!back->buf_len || back->buf_len > dmabuf->size || >>> + back->buf_offset >= dmabuf->size || >>> + back->buf_offset + back->buf_len > dmabuf->size) >>> + return -EINVAL; >>> + if (back->file_offset + back->buf_len < back->file_offset) >>> + return -EINVAL; >>> + >>> + filp = fget(back->fd); >>> + if (!filp) >>> + return -EBADF; >>> + >>> + mutex_lock(&buffer->lock); >>> + ret = system_heap_rw_file(buffer, op == >> DMA_BUF_RW_FLAGS_READ, direct_io, >>> + filp, back->file_offset, back->buf_offset, back- >>> buf_len); >>> + mutex_unlock(&buffer->lock); >>> + >>> + fput(filp); >>> + return ret; >>> +} >>> + >>> static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { >>> struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6 >> +425,7 >>> @@ static const struct dma_buf_ops system_heap_buf_ops = { >>> .mmap = system_heap_mmap, >>> .vmap = system_heap_vmap, >>> .vunmap = system_heap_vunmap, >>> + .rw_file = system_heap_dma_buf_rw_file, >>> .release = system_heap_dma_buf_release, }; >>> >
> -----Original Message----- > From: Christian König <christian.koenig@amd.com> > Sent: Tuesday, May 13, 2025 9:18 PM > To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org; > benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; > jstultz@google.com; tjmercier@google.com > Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro- > mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; > wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang > <yipengxiang@honor.com>; <liulu.liu@honor.com>; <feng.han@honor.com> > Subject: Re: [PATCH 2/2] dmabuf/heaps: implement > DMA_BUF_IOCTL_RW_FILE for system_heap > > On 5/13/25 14:30, wangtao wrote: > >> -----Original Message----- > >> From: Christian König <christian.koenig@amd.com> > >> Sent: Tuesday, May 13, 2025 7:32 PM > >> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org; > >> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; > >> jstultz@google.com; tjmercier@google.com > >> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; > >> linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; > >> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang > >> <yipengxiang@honor.com>; <liulu.liu@honor.com>; > >> <feng.han@honor.com> > >> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement > >> DMA_BUF_IOCTL_RW_FILE for system_heap > >> > >> On 5/13/25 11:28, wangtao wrote: > >>> Support direct file I/O operations for system_heap dma-buf objects. > >>> Implementation includes: > >>> 1. Convert sg_table to bio_vec > >> > >> That is usually illegal for DMA-bufs. > > [wangtao] The term 'convert' is misleading in this context. The appropriate > phrasing should be: Construct bio_vec from sg_table. > > Well it doesn't matter what you call it. Touching the page inside an sg table of > a DMA-buf is illegal, we even have code to actively prevent that. [wangtao] For a driver using DMA-buf: Don't touch pages in the sg_table. But the system heap exporter (sg_table owner) should be allowed to use them. If a driver takes ownership via dma_buf_map_attachment or similar calls, the exporter must stop using the sg_table. User-space programs should call DMA_BUF_IOCTL_RW_FILE only when the DMA-buf is not attached. The exporter must check ownership (e.g., ensure no map_dma_buf/vmap is active) and block new calls during operations. I'll add these checks in patch v2. > > Once more: This approach was already rejected multiple times! Please use > udmabuf instead! > > The hack you came up here is simply not necessary. [wangtao] Many people need DMA-buf direct I/O. I tried it 2 years ago. My method is simpler, uses less CPU/power, and performs better: - Speed: 3418 MB/s vs. 2073 MB/s (udmabuf) at 1GHz CPU. - udmabuf wastes half its CPU time on __get_user_pages. - Creating 32x32MB DMA-bufs + reading 1GB file takes 346 ms vs. 1145 ms for udmabuf (10x slower) vs. 1503 ms for DMA-buf normal. udmabuf is slightly faster but not enough. Switching to udmabuf is easy for small apps but hard in complex systems without major benefits. > > Regards, > Christian. > > > > Appreciate your feedback. > >> > >> Regards, > >> Christian. > >> > >>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke > >>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O > >>> > >>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz): > >>> > >>> | Metric | 1MB | 8MB | 64MB | 1024MB | 3072MB | > >>> |--------------------|-------:|-------:|--------:|---------:|------- > >>> |--------------------|-- > >>> |--------------------|:| > >>> | Buffer Read (us) | 1658 | 9028 | 69295 | 1019783 | 2978179 | > >>> | Direct Read (us) | 707 | 2647 | 18689 | 299627 | 937758 | > >>> | Buffer Rate (MB/s) | 603 | 886 | 924 | 1004 | 1032 | > >>> | Direct Rate (MB/s) | 1414 | 3022 | 3425 | 3418 | 3276 | > >>> > >>> Signed-off-by: wangtao <tao.wangtao@honor.com> > >>> --- > >>> drivers/dma-buf/heaps/system_heap.c | 118 > >>> ++++++++++++++++++++++++++++ > >>> 1 file changed, 118 insertions(+) > >>> > >>> diff --git a/drivers/dma-buf/heaps/system_heap.c > >>> b/drivers/dma-buf/heaps/system_heap.c > >>> index 26d5dc89ea16..f7b71b9843aa 100644 > >>> --- a/drivers/dma-buf/heaps/system_heap.c > >>> +++ b/drivers/dma-buf/heaps/system_heap.c > >>> @@ -20,6 +20,8 @@ > >>> #include <linux/scatterlist.h> > >>> #include <linux/slab.h> > >>> #include <linux/vmalloc.h> > >>> +#include <linux/bvec.h> > >>> +#include <linux/uio.h> > >>> > >>> static struct dma_heap *sys_heap; > >>> > >>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct > dma_buf > >> *dmabuf, struct iosys_map *map) > >>> iosys_map_clear(map); > >>> } > >>> > >>> +static struct bio_vec *system_heap_init_bvec(struct > >> system_heap_buffer *buffer, > >>> + size_t offset, size_t len, int *nr_segs) { > >>> + struct sg_table *sgt = &buffer->sg_table; > >>> + struct scatterlist *sg; > >>> + size_t length = 0; > >>> + unsigned int i, k = 0; > >>> + struct bio_vec *bvec; > >>> + size_t sg_left; > >>> + size_t sg_offset; > >>> + size_t sg_len; > >>> + > >>> + bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL); > >>> + if (!bvec) > >>> + return NULL; > >>> + > >>> + for_each_sg(sgt->sgl, sg, sgt->nents, i) { > >>> + length += sg->length; > >>> + if (length <= offset) > >>> + continue; > >>> + > >>> + sg_left = length - offset; > >>> + sg_offset = sg->offset + sg->length - sg_left; > >>> + sg_len = min(sg_left, len); > >>> + > >>> + bvec[k].bv_page = sg_page(sg); > >>> + bvec[k].bv_len = sg_len; > >>> + bvec[k].bv_offset = sg_offset; > >>> + k++; > >>> + > >>> + offset += sg_len; > >>> + len -= sg_len; > >>> + if (len <= 0) > >>> + break; > >>> + } > >>> + > >>> + *nr_segs = k; > >>> + return bvec; > >>> +} > >>> + > >>> +static int system_heap_rw_file(struct system_heap_buffer *buffer, > >>> +bool > >> is_read, > >>> + bool direct_io, struct file *filp, loff_t file_offset, > >>> + size_t buf_offset, size_t len) > >>> +{ > >>> + struct bio_vec *bvec; > >>> + int nr_segs = 0; > >>> + struct iov_iter iter; > >>> + struct kiocb kiocb; > >>> + ssize_t ret = 0; > >>> + > >>> + if (direct_io) { > >>> + if (!(filp->f_mode & FMODE_CAN_ODIRECT)) > >>> + return -EINVAL; > >>> + } > >>> + > >>> + bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs); > >>> + if (!bvec) > >>> + return -ENOMEM; > >>> + > >>> + iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec, > >> nr_segs, len); > >>> + init_sync_kiocb(&kiocb, filp); > >>> + kiocb.ki_pos = file_offset; > >>> + if (direct_io) > >>> + kiocb.ki_flags |= IOCB_DIRECT; > >>> + > >>> + while (kiocb.ki_pos < file_offset + len) { > >>> + if (is_read) > >>> + ret = vfs_iocb_iter_read(filp, &kiocb, &iter); > >>> + else > >>> + ret = vfs_iocb_iter_write(filp, &kiocb, &iter); > >>> + if (ret <= 0) > >>> + break; > >>> + } > >>> + > >>> + kvfree(bvec); > >>> + return ret < 0 ? ret : 0; > >>> +} > >>> + > >>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf, > >>> + struct dma_buf_rw_file *back) > >>> +{ > >>> + struct system_heap_buffer *buffer = dmabuf->priv; > >>> + int ret = 0; > >>> + __u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK; > >>> + bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT; > >>> + struct file *filp; > >>> + > >>> + if (op != DMA_BUF_RW_FLAGS_READ && op != > >> DMA_BUF_RW_FLAGS_WRITE) > >>> + return -EINVAL; > >>> + if (direct_io) { > >>> + if (!PAGE_ALIGNED(back->file_offset) || > >>> + !PAGE_ALIGNED(back->buf_offset) || > >>> + !PAGE_ALIGNED(back->buf_len)) > >>> + return -EINVAL; > >>> + } > >>> + if (!back->buf_len || back->buf_len > dmabuf->size || > >>> + back->buf_offset >= dmabuf->size || > >>> + back->buf_offset + back->buf_len > dmabuf->size) > >>> + return -EINVAL; > >>> + if (back->file_offset + back->buf_len < back->file_offset) > >>> + return -EINVAL; > >>> + > >>> + filp = fget(back->fd); > >>> + if (!filp) > >>> + return -EBADF; > >>> + > >>> + mutex_lock(&buffer->lock); > >>> + ret = system_heap_rw_file(buffer, op == > >> DMA_BUF_RW_FLAGS_READ, direct_io, > >>> + filp, back->file_offset, back->buf_offset, back- > >>> buf_len); > >>> + mutex_unlock(&buffer->lock); > >>> + > >>> + fput(filp); > >>> + return ret; > >>> +} > >>> + > >>> static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { > >>> struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6 > >> +425,7 > >>> @@ static const struct dma_buf_ops system_heap_buf_ops = { > >>> .mmap = system_heap_mmap, > >>> .vmap = system_heap_vmap, > >>> .vunmap = system_heap_vunmap, > >>> + .rw_file = system_heap_dma_buf_rw_file, > >>> .release = system_heap_dma_buf_release, }; > >>> > >
On 5/14/25 13:02, wangtao wrote: >> -----Original Message----- >> From: Christian König <christian.koenig@amd.com> >> Sent: Tuesday, May 13, 2025 9:18 PM >> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org; >> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; >> jstultz@google.com; tjmercier@google.com >> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro- >> mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; >> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang >> <yipengxiang@honor.com>; <liulu.liu@honor.com>; <feng.han@honor.com> >> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement >> DMA_BUF_IOCTL_RW_FILE for system_heap >> >> On 5/13/25 14:30, wangtao wrote: >>>> -----Original Message----- >>>> From: Christian König <christian.koenig@amd.com> >>>> Sent: Tuesday, May 13, 2025 7:32 PM >>>> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org; >>>> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; >>>> jstultz@google.com; tjmercier@google.com >>>> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; >>>> linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; >>>> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang >>>> <yipengxiang@honor.com>; <liulu.liu@honor.com>; >>>> <feng.han@honor.com> >>>> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement >>>> DMA_BUF_IOCTL_RW_FILE for system_heap >>>> >>>> On 5/13/25 11:28, wangtao wrote: >>>>> Support direct file I/O operations for system_heap dma-buf objects. >>>>> Implementation includes: >>>>> 1. Convert sg_table to bio_vec >>>> >>>> That is usually illegal for DMA-bufs. >>> [wangtao] The term 'convert' is misleading in this context. The appropriate >> phrasing should be: Construct bio_vec from sg_table. >> >> Well it doesn't matter what you call it. Touching the page inside an sg table of >> a DMA-buf is illegal, we even have code to actively prevent that. > [wangtao] For a driver using DMA-buf: Don't touch pages in the sg_table. But the system heap exporter (sg_table owner) should be allowed to use them. Good point that might be possible. > If a driver takes ownership via dma_buf_map_attachment or similar calls, the exporter must stop using the sg_table. > User-space programs should call DMA_BUF_IOCTL_RW_FILE only when the DMA-buf is not attached. > The exporter must check ownership (e.g., ensure no map_dma_buf/vmap is active) and block new calls during operations. > I'll add these checks in patch v2. > >> >> Once more: This approach was already rejected multiple times! Please use >> udmabuf instead! >> >> The hack you came up here is simply not necessary. > [wangtao] Many people need DMA-buf direct I/O. I tried it 2 years ago. My method is simpler, uses less CPU/power, and performs better: I don't think that this is a valid argument. > - Speed: 3418 MB/s vs. 2073 MB/s (udmabuf) at 1GHz CPU. > - udmabuf wastes half its CPU time on __get_user_pages. > - Creating 32x32MB DMA-bufs + reading 1GB file takes 346 ms vs. 1145 ms for udmabuf (10x slower) vs. 1503 ms for DMA-buf normal. Why would using udmabuf be slower here? > udmabuf is slightly faster but not enough. Switching to udmabuf is easy for small apps but hard in complex systems without major benefits. Yeah, but your approach here is a rather clear hack. Using udmabuf is much more cleaner and generally accepted by everybody now. As far as I can see I have to reject your approach here. Regards, Christian. >> >> Regards, >> Christian. >> >> >>> Appreciate your feedback. >>>> >>>> Regards, >>>> Christian. >>>> >>>>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke >>>>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O >>>>> >>>>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz): >>>>> >>>>> | Metric | 1MB | 8MB | 64MB | 1024MB | 3072MB | >>>>> |--------------------|-------:|-------:|--------:|---------:|------- >>>>> |--------------------|-- >>>>> |--------------------|:| >>>>> | Buffer Read (us) | 1658 | 9028 | 69295 | 1019783 | 2978179 | >>>>> | Direct Read (us) | 707 | 2647 | 18689 | 299627 | 937758 | >>>>> | Buffer Rate (MB/s) | 603 | 886 | 924 | 1004 | 1032 | >>>>> | Direct Rate (MB/s) | 1414 | 3022 | 3425 | 3418 | 3276 | >>>>> >>>>> Signed-off-by: wangtao <tao.wangtao@honor.com> >>>>> --- >>>>> drivers/dma-buf/heaps/system_heap.c | 118 >>>>> ++++++++++++++++++++++++++++ >>>>> 1 file changed, 118 insertions(+) >>>>> >>>>> diff --git a/drivers/dma-buf/heaps/system_heap.c >>>>> b/drivers/dma-buf/heaps/system_heap.c >>>>> index 26d5dc89ea16..f7b71b9843aa 100644 >>>>> --- a/drivers/dma-buf/heaps/system_heap.c >>>>> +++ b/drivers/dma-buf/heaps/system_heap.c >>>>> @@ -20,6 +20,8 @@ >>>>> #include <linux/scatterlist.h> >>>>> #include <linux/slab.h> >>>>> #include <linux/vmalloc.h> >>>>> +#include <linux/bvec.h> >>>>> +#include <linux/uio.h> >>>>> >>>>> static struct dma_heap *sys_heap; >>>>> >>>>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct >> dma_buf >>>> *dmabuf, struct iosys_map *map) >>>>> iosys_map_clear(map); >>>>> } >>>>> >>>>> +static struct bio_vec *system_heap_init_bvec(struct >>>> system_heap_buffer *buffer, >>>>> + size_t offset, size_t len, int *nr_segs) { >>>>> + struct sg_table *sgt = &buffer->sg_table; >>>>> + struct scatterlist *sg; >>>>> + size_t length = 0; >>>>> + unsigned int i, k = 0; >>>>> + struct bio_vec *bvec; >>>>> + size_t sg_left; >>>>> + size_t sg_offset; >>>>> + size_t sg_len; >>>>> + >>>>> + bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL); >>>>> + if (!bvec) >>>>> + return NULL; >>>>> + >>>>> + for_each_sg(sgt->sgl, sg, sgt->nents, i) { >>>>> + length += sg->length; >>>>> + if (length <= offset) >>>>> + continue; >>>>> + >>>>> + sg_left = length - offset; >>>>> + sg_offset = sg->offset + sg->length - sg_left; >>>>> + sg_len = min(sg_left, len); >>>>> + >>>>> + bvec[k].bv_page = sg_page(sg); >>>>> + bvec[k].bv_len = sg_len; >>>>> + bvec[k].bv_offset = sg_offset; >>>>> + k++; >>>>> + >>>>> + offset += sg_len; >>>>> + len -= sg_len; >>>>> + if (len <= 0) >>>>> + break; >>>>> + } >>>>> + >>>>> + *nr_segs = k; >>>>> + return bvec; >>>>> +} >>>>> + >>>>> +static int system_heap_rw_file(struct system_heap_buffer *buffer, >>>>> +bool >>>> is_read, >>>>> + bool direct_io, struct file *filp, loff_t file_offset, >>>>> + size_t buf_offset, size_t len) >>>>> +{ >>>>> + struct bio_vec *bvec; >>>>> + int nr_segs = 0; >>>>> + struct iov_iter iter; >>>>> + struct kiocb kiocb; >>>>> + ssize_t ret = 0; >>>>> + >>>>> + if (direct_io) { >>>>> + if (!(filp->f_mode & FMODE_CAN_ODIRECT)) >>>>> + return -EINVAL; >>>>> + } >>>>> + >>>>> + bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs); >>>>> + if (!bvec) >>>>> + return -ENOMEM; >>>>> + >>>>> + iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec, >>>> nr_segs, len); >>>>> + init_sync_kiocb(&kiocb, filp); >>>>> + kiocb.ki_pos = file_offset; >>>>> + if (direct_io) >>>>> + kiocb.ki_flags |= IOCB_DIRECT; >>>>> + >>>>> + while (kiocb.ki_pos < file_offset + len) { >>>>> + if (is_read) >>>>> + ret = vfs_iocb_iter_read(filp, &kiocb, &iter); >>>>> + else >>>>> + ret = vfs_iocb_iter_write(filp, &kiocb, &iter); >>>>> + if (ret <= 0) >>>>> + break; >>>>> + } >>>>> + >>>>> + kvfree(bvec); >>>>> + return ret < 0 ? ret : 0; >>>>> +} >>>>> + >>>>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf, >>>>> + struct dma_buf_rw_file *back) >>>>> +{ >>>>> + struct system_heap_buffer *buffer = dmabuf->priv; >>>>> + int ret = 0; >>>>> + __u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK; >>>>> + bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT; >>>>> + struct file *filp; >>>>> + >>>>> + if (op != DMA_BUF_RW_FLAGS_READ && op != >>>> DMA_BUF_RW_FLAGS_WRITE) >>>>> + return -EINVAL; >>>>> + if (direct_io) { >>>>> + if (!PAGE_ALIGNED(back->file_offset) || >>>>> + !PAGE_ALIGNED(back->buf_offset) || >>>>> + !PAGE_ALIGNED(back->buf_len)) >>>>> + return -EINVAL; >>>>> + } >>>>> + if (!back->buf_len || back->buf_len > dmabuf->size || >>>>> + back->buf_offset >= dmabuf->size || >>>>> + back->buf_offset + back->buf_len > dmabuf->size) >>>>> + return -EINVAL; >>>>> + if (back->file_offset + back->buf_len < back->file_offset) >>>>> + return -EINVAL; >>>>> + >>>>> + filp = fget(back->fd); >>>>> + if (!filp) >>>>> + return -EBADF; >>>>> + >>>>> + mutex_lock(&buffer->lock); >>>>> + ret = system_heap_rw_file(buffer, op == >>>> DMA_BUF_RW_FLAGS_READ, direct_io, >>>>> + filp, back->file_offset, back->buf_offset, back- >>>>> buf_len); >>>>> + mutex_unlock(&buffer->lock); >>>>> + >>>>> + fput(filp); >>>>> + return ret; >>>>> +} >>>>> + >>>>> static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { >>>>> struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6 >>>> +425,7 >>>>> @@ static const struct dma_buf_ops system_heap_buf_ops = { >>>>> .mmap = system_heap_mmap, >>>>> .vmap = system_heap_vmap, >>>>> .vunmap = system_heap_vunmap, >>>>> + .rw_file = system_heap_dma_buf_rw_file, >>>>> .release = system_heap_dma_buf_release, }; >>>>> >>> >
> -----Original Message----- > From: Christian König <christian.koenig@amd.com> > Sent: Wednesday, May 14, 2025 8:00 PM > To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org; > benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; > jstultz@google.com; tjmercier@google.com > Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro- > mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; > wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang > <yipengxiang@honor.com>; <liulu.liu@honor.com>; hanfeng > <feng.han@honor.com> > Subject: Re: [PATCH 2/2] dmabuf/heaps: implement > DMA_BUF_IOCTL_RW_FILE for system_heap > > On 5/14/25 13:02, wangtao wrote: > >> -----Original Message----- > >> From: Christian König <christian.koenig@amd.com> > >> Sent: Tuesday, May 13, 2025 9:18 PM > >> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org; > >> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; > >> jstultz@google.com; tjmercier@google.com > >> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; > >> linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; > >> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang > >> <yipengxiang@honor.com>; <liulu.liu@honor.com>; > <feng.han@honor.com> > >> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement > >> DMA_BUF_IOCTL_RW_FILE for system_heap > >> > >> On 5/13/25 14:30, wangtao wrote: > >>>> -----Original Message----- > >>>> From: Christian König <christian.koenig@amd.com> > >>>> Sent: Tuesday, May 13, 2025 7:32 PM > >>>> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org; > >>>> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; > >>>> jstultz@google.com; tjmercier@google.com > >>>> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; > >>>> linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; > >>>> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang > >>>> <yipengxiang@honor.com>; <liulu.liu@honor.com>; > >>>> <feng.han@honor.com> > >>>> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement > >>>> DMA_BUF_IOCTL_RW_FILE for system_heap > >>>> > >>>> On 5/13/25 11:28, wangtao wrote: > >>>>> Support direct file I/O operations for system_heap dma-buf objects. > >>>>> Implementation includes: > >>>>> 1. Convert sg_table to bio_vec > >>>> > >>>> That is usually illegal for DMA-bufs. > >>> [wangtao] The term 'convert' is misleading in this context. The > >>> appropriate > >> phrasing should be: Construct bio_vec from sg_table. > >> > >> Well it doesn't matter what you call it. Touching the page inside an > >> sg table of a DMA-buf is illegal, we even have code to actively prevent > that. > > [wangtao] For a driver using DMA-buf: Don't touch pages in the sg_table. > But the system heap exporter (sg_table owner) should be allowed to use > them. > > Good point that might be possible. > > > If a driver takes ownership via dma_buf_map_attachment or similar calls, > the exporter must stop using the sg_table. > > User-space programs should call DMA_BUF_IOCTL_RW_FILE only when the > DMA-buf is not attached. > > The exporter must check ownership (e.g., ensure no map_dma_buf/vmap > is active) and block new calls during operations. > > I'll add these checks in patch v2. > > > >> > >> Once more: This approach was already rejected multiple times! Please > >> use udmabuf instead! > >> > >> The hack you came up here is simply not necessary. > > [wangtao] Many people need DMA-buf direct I/O. I tried it 2 years ago. My > method is simpler, uses less CPU/power, and performs better: > > I don't think that this is a valid argument. > > > - Speed: 3418 MB/s vs. 2073 MB/s (udmabuf) at 1GHz CPU. > > - udmabuf wastes half its CPU time on __get_user_pages. > > - Creating 32x32MB DMA-bufs + reading 1GB file takes 346 ms vs. 1145 ms > for udmabuf (10x slower) vs. 1503 ms for DMA-buf normal. > > Why would using udmabuf be slower here? > > > udmabuf is slightly faster but not enough. Switching to udmabuf is easy for > small apps but hard in complex systems without major benefits. > > Yeah, but your approach here is a rather clear hack. Using udmabuf is much > more cleaner and generally accepted by everybody now. > > As far as I can see I have to reject your approach here. > [wangtao] My Test Configuration (CPU 1GHz, 5-test average): Allocation: 32x32MB buffer creation - dmabuf 53ms vs. udmabuf 694ms (10X slower) - Note: shmem shows excessive allocation time Read 1024MB File: - dmabuf direct 326ms vs. udmabuf direct 461ms (40% slower) - Note: pin_user_pages_fast consumes majority CPU cycles Key function call timing: See details below. Dmabuf direct io: |- 12.39% DmaBufTest_PerfDmabufDirectIO_Test::TestBody() |-|- 5.95% perf_dmabuf_alloc_and_io |-|-|- 4.38% dmabuf_io_back |-|-|-|- 3.47% ioctl |-|-|-|-|- 3.47% __ioctl |-|-|-|-|-|-|-|-|-|-|-|- 3.47% dma_buf_ioctl |-|-|-|-|-|-|-|-|-|-|-|-|- 3.47% system_heap_dma_buf_rw_file |-|-|-|-|-|-|-|-|-|-|-|-|-|- 3.46% system_heap_rw_file |-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 3.46% f2fs_file_read_iter |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 3.46% __iomap_dio_rw |-|-|- 1.33% ioctl |-|-|-|- 1.33% __ioctl |-|-|-|-|-|-|-|-|-|-|- 1.33% dma_heap_ioctl |-|-|-|-|-|-|-|-|-|-|-|- 1.33% dma_heap_buffer_alloc |-|-|-|-|-|-|-|-|-|-|-|-|- 1.33% system_heap_allocate |-|-|-|-|-|-|-|-|-|-|-|-|-|- 1.33% system_heap_do_allocate |-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 1.28% __alloc_pages Udmabuf direct io: |- 39.35% DmaBufTest_PerfDmabufUDirectIO_Test::TestBody() |-|- 32.76% perf_dmabuf_alloc_and_io |-|-|- 17.43% alloc_udmabuf |-|-|-|- 13.34% ioctl |-|-|-|-|-|-|-|-|-|-|-|- 13.34% udmabuf_ioctl |-|-|-|-|-|-|-|-|-|-|-|-|- 13.32% udmabuf_create |-|-|-|-|-|-|-|-|-|-|-|-|-|- 13.26% shmem_read_mapping_page_gfp |-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 11.94% shmem_read_folio_gfp |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 11.28% shmem_get_folio_gfp |-|-|- 10.81% dmabuf_io_back |-|-|-|- 8.85% read |-|-|-|-|-|-|-|-|-|- 8.85% __arm64_sys_read |-|-|-|-|-|-|-|-|-|-|- 8.85% f2fs_file_read_iter |-|-|-|-|-|-|-|-|-|-|-|- 8.84% __iomap_dio_rw |-|-|-|-|-|-|-|-|-|-|-|-|- 7.85% iomap_dio_bio_iter |-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.61% bio_iov_iter_get_pages |-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.46% iov_iter_extract_pages |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.46% pin_user_pages_fast |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.46% internal_get_user_pages_fast |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.46% __gup_longterm_locked |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.36% __get_user_pages > Regards, > Christian. > > > >> > >> Regards, > >> Christian. > >> > >> > >>> Appreciate your feedback. > >>>> > >>>> Regards, > >>>> Christian. > >>>> > >>>>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke > >>>>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O > >>>>> > >>>>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz): > >>>>> > >>>>> | Metric | 1MB | 8MB | 64MB | 1024MB | 3072MB | > >>>>> |--------------------|-------:|-------:|--------:|---------:|----- > >>>>> |--------------------|-- > >>>>> |--------------------|-- > >>>>> |--------------------|:| > >>>>> | Buffer Read (us) | 1658 | 9028 | 69295 | 1019783 | 2978179 | > >>>>> | Direct Read (us) | 707 | 2647 | 18689 | 299627 | 937758 | > >>>>> | Buffer Rate (MB/s) | 603 | 886 | 924 | 1004 | 1032 | > >>>>> | Direct Rate (MB/s) | 1414 | 3022 | 3425 | 3418 | 3276 | > >>>>> > >>>>> Signed-off-by: wangtao <tao.wangtao@honor.com> > >>>>> --- > >>>>> drivers/dma-buf/heaps/system_heap.c | 118 > >>>>> ++++++++++++++++++++++++++++ > >>>>> 1 file changed, 118 insertions(+) > >>>>> > >>>>> diff --git a/drivers/dma-buf/heaps/system_heap.c > >>>>> b/drivers/dma-buf/heaps/system_heap.c > >>>>> index 26d5dc89ea16..f7b71b9843aa 100644 > >>>>> --- a/drivers/dma-buf/heaps/system_heap.c > >>>>> +++ b/drivers/dma-buf/heaps/system_heap.c > >>>>> @@ -20,6 +20,8 @@ > >>>>> #include <linux/scatterlist.h> > >>>>> #include <linux/slab.h> > >>>>> #include <linux/vmalloc.h> > >>>>> +#include <linux/bvec.h> > >>>>> +#include <linux/uio.h> > >>>>> > >>>>> static struct dma_heap *sys_heap; > >>>>> > >>>>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct > >> dma_buf > >>>> *dmabuf, struct iosys_map *map) > >>>>> iosys_map_clear(map); > >>>>> } > >>>>> > >>>>> +static struct bio_vec *system_heap_init_bvec(struct > >>>> system_heap_buffer *buffer, > >>>>> + size_t offset, size_t len, int *nr_segs) { > >>>>> + struct sg_table *sgt = &buffer->sg_table; > >>>>> + struct scatterlist *sg; > >>>>> + size_t length = 0; > >>>>> + unsigned int i, k = 0; > >>>>> + struct bio_vec *bvec; > >>>>> + size_t sg_left; > >>>>> + size_t sg_offset; > >>>>> + size_t sg_len; > >>>>> + > >>>>> + bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL); > >>>>> + if (!bvec) > >>>>> + return NULL; > >>>>> + > >>>>> + for_each_sg(sgt->sgl, sg, sgt->nents, i) { > >>>>> + length += sg->length; > >>>>> + if (length <= offset) > >>>>> + continue; > >>>>> + > >>>>> + sg_left = length - offset; > >>>>> + sg_offset = sg->offset + sg->length - sg_left; > >>>>> + sg_len = min(sg_left, len); > >>>>> + > >>>>> + bvec[k].bv_page = sg_page(sg); > >>>>> + bvec[k].bv_len = sg_len; > >>>>> + bvec[k].bv_offset = sg_offset; > >>>>> + k++; > >>>>> + > >>>>> + offset += sg_len; > >>>>> + len -= sg_len; > >>>>> + if (len <= 0) > >>>>> + break; > >>>>> + } > >>>>> + > >>>>> + *nr_segs = k; > >>>>> + return bvec; > >>>>> +} > >>>>> + > >>>>> +static int system_heap_rw_file(struct system_heap_buffer *buffer, > >>>>> +bool > >>>> is_read, > >>>>> + bool direct_io, struct file *filp, loff_t file_offset, > >>>>> + size_t buf_offset, size_t len) > >>>>> +{ > >>>>> + struct bio_vec *bvec; > >>>>> + int nr_segs = 0; > >>>>> + struct iov_iter iter; > >>>>> + struct kiocb kiocb; > >>>>> + ssize_t ret = 0; > >>>>> + > >>>>> + if (direct_io) { > >>>>> + if (!(filp->f_mode & FMODE_CAN_ODIRECT)) > >>>>> + return -EINVAL; > >>>>> + } > >>>>> + > >>>>> + bvec = system_heap_init_bvec(buffer, buf_offset, len, > &nr_segs); > >>>>> + if (!bvec) > >>>>> + return -ENOMEM; > >>>>> + > >>>>> + iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, > bvec, > >>>> nr_segs, len); > >>>>> + init_sync_kiocb(&kiocb, filp); > >>>>> + kiocb.ki_pos = file_offset; > >>>>> + if (direct_io) > >>>>> + kiocb.ki_flags |= IOCB_DIRECT; > >>>>> + > >>>>> + while (kiocb.ki_pos < file_offset + len) { > >>>>> + if (is_read) > >>>>> + ret = vfs_iocb_iter_read(filp, &kiocb, &iter); > >>>>> + else > >>>>> + ret = vfs_iocb_iter_write(filp, &kiocb, &iter); > >>>>> + if (ret <= 0) > >>>>> + break; > >>>>> + } > >>>>> + > >>>>> + kvfree(bvec); > >>>>> + return ret < 0 ? ret : 0; > >>>>> +} > >>>>> + > >>>>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf, > >>>>> + struct dma_buf_rw_file *back) > >>>>> +{ > >>>>> + struct system_heap_buffer *buffer = dmabuf->priv; > >>>>> + int ret = 0; > >>>>> + __u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK; > >>>>> + bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT; > >>>>> + struct file *filp; > >>>>> + > >>>>> + if (op != DMA_BUF_RW_FLAGS_READ && op != > >>>> DMA_BUF_RW_FLAGS_WRITE) > >>>>> + return -EINVAL; > >>>>> + if (direct_io) { > >>>>> + if (!PAGE_ALIGNED(back->file_offset) || > >>>>> + !PAGE_ALIGNED(back->buf_offset) || > >>>>> + !PAGE_ALIGNED(back->buf_len)) > >>>>> + return -EINVAL; > >>>>> + } > >>>>> + if (!back->buf_len || back->buf_len > dmabuf->size || > >>>>> + back->buf_offset >= dmabuf->size || > >>>>> + back->buf_offset + back->buf_len > dmabuf->size) > >>>>> + return -EINVAL; > >>>>> + if (back->file_offset + back->buf_len < back->file_offset) > >>>>> + return -EINVAL; > >>>>> + > >>>>> + filp = fget(back->fd); > >>>>> + if (!filp) > >>>>> + return -EBADF; > >>>>> + > >>>>> + mutex_lock(&buffer->lock); > >>>>> + ret = system_heap_rw_file(buffer, op == > >>>> DMA_BUF_RW_FLAGS_READ, direct_io, > >>>>> + filp, back->file_offset, back->buf_offset, > back- > >>>>> buf_len); > >>>>> + mutex_unlock(&buffer->lock); > >>>>> + > >>>>> + fput(filp); > >>>>> + return ret; > >>>>> +} > >>>>> + > >>>>> static void system_heap_dma_buf_release(struct dma_buf *dmabuf) > { > >>>>> struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6 > >>>> +425,7 > >>>>> @@ static const struct dma_buf_ops system_heap_buf_ops = { > >>>>> .mmap = system_heap_mmap, > >>>>> .vmap = system_heap_vmap, > >>>>> .vunmap = system_heap_vunmap, > >>>>> + .rw_file = system_heap_dma_buf_rw_file, > >>>>> .release = system_heap_dma_buf_release, }; > >>>>> > >>> > >
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 26d5dc89ea16..f7b71b9843aa 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -20,6 +20,8 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/bvec.h> +#include <linux/uio.h> static struct dma_heap *sys_heap; @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) iosys_map_clear(map); } +static struct bio_vec *system_heap_init_bvec(struct system_heap_buffer *buffer, + size_t offset, size_t len, int *nr_segs) +{ + struct sg_table *sgt = &buffer->sg_table; + struct scatterlist *sg; + size_t length = 0; + unsigned int i, k = 0; + struct bio_vec *bvec; + size_t sg_left; + size_t sg_offset; + size_t sg_len; + + bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL); + if (!bvec) + return NULL; + + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + length += sg->length; + if (length <= offset) + continue; + + sg_left = length - offset; + sg_offset = sg->offset + sg->length - sg_left; + sg_len = min(sg_left, len); + + bvec[k].bv_page = sg_page(sg); + bvec[k].bv_len = sg_len; + bvec[k].bv_offset = sg_offset; + k++; + + offset += sg_len; + len -= sg_len; + if (len <= 0) + break; + } + + *nr_segs = k; + return bvec; +} + +static int system_heap_rw_file(struct system_heap_buffer *buffer, bool is_read, + bool direct_io, struct file *filp, loff_t file_offset, + size_t buf_offset, size_t len) +{ + struct bio_vec *bvec; + int nr_segs = 0; + struct iov_iter iter; + struct kiocb kiocb; + ssize_t ret = 0; + + if (direct_io) { + if (!(filp->f_mode & FMODE_CAN_ODIRECT)) + return -EINVAL; + } + + bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs); + if (!bvec) + return -ENOMEM; + + iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec, nr_segs, len); + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = file_offset; + if (direct_io) + kiocb.ki_flags |= IOCB_DIRECT; + + while (kiocb.ki_pos < file_offset + len) { + if (is_read) + ret = vfs_iocb_iter_read(filp, &kiocb, &iter); + else + ret = vfs_iocb_iter_write(filp, &kiocb, &iter); + if (ret <= 0) + break; + } + + kvfree(bvec); + return ret < 0 ? ret : 0; +} + +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf, + struct dma_buf_rw_file *back) +{ + struct system_heap_buffer *buffer = dmabuf->priv; + int ret = 0; + __u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK; + bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT; + struct file *filp; + + if (op != DMA_BUF_RW_FLAGS_READ && op != DMA_BUF_RW_FLAGS_WRITE) + return -EINVAL; + if (direct_io) { + if (!PAGE_ALIGNED(back->file_offset) || + !PAGE_ALIGNED(back->buf_offset) || + !PAGE_ALIGNED(back->buf_len)) + return -EINVAL; + } + if (!back->buf_len || back->buf_len > dmabuf->size || + back->buf_offset >= dmabuf->size || + back->buf_offset + back->buf_len > dmabuf->size) + return -EINVAL; + if (back->file_offset + back->buf_len < back->file_offset) + return -EINVAL; + + filp = fget(back->fd); + if (!filp) + return -EBADF; + + mutex_lock(&buffer->lock); + ret = system_heap_rw_file(buffer, op == DMA_BUF_RW_FLAGS_READ, direct_io, + filp, back->file_offset, back->buf_offset, back->buf_len); + mutex_unlock(&buffer->lock); + + fput(filp); + return ret; +} + static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6 +425,7 @@ static const struct dma_buf_ops system_heap_buf_ops = { .mmap = system_heap_mmap, .vmap = system_heap_vmap, .vunmap = system_heap_vunmap, + .rw_file = system_heap_dma_buf_rw_file, .release = system_heap_dma_buf_release, };
Support direct file I/O operations for system_heap dma-buf objects. Implementation includes: 1. Convert sg_table to bio_vec 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz): | Metric | 1MB | 8MB | 64MB | 1024MB | 3072MB | |--------------------|-------:|-------:|--------:|---------:|---------:| | Buffer Read (us) | 1658 | 9028 | 69295 | 1019783 | 2978179 | | Direct Read (us) | 707 | 2647 | 18689 | 299627 | 937758 | | Buffer Rate (MB/s) | 603 | 886 | 924 | 1004 | 1032 | | Direct Rate (MB/s) | 1414 | 3022 | 3425 | 3418 | 3276 | Signed-off-by: wangtao <tao.wangtao@honor.com> --- drivers/dma-buf/heaps/system_heap.c | 118 ++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+)