diff mbox series

[2/2] dmabuf/heaps: implement DMA_BUF_IOCTL_RW_FILE for system_heap

Message ID 20250513092803.2096-1-tao.wangtao@honor.com
State New
Headers show
Series [1/2] dmabuf: add DMA_BUF_IOCTL_RW_FILE | expand

Commit Message

wangtao May 13, 2025, 9:28 a.m. UTC
Support direct file I/O operations for system_heap dma-buf objects.
Implementation includes:
1. Convert sg_table to bio_vec
2. Set IOCB_DIRECT when O_DIRECT is supported
3. Invoke vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O

Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz):

| Metric             |    1MB |    8MB |    64MB |   1024MB |   3072MB |
|--------------------|-------:|-------:|--------:|---------:|---------:|
| Buffer Read (us)   |   1658 |   9028 |   69295 |  1019783 |  2978179 |
| Direct Read (us)   |    707 |   2647 |   18689 |   299627 |   937758 |
| Buffer Rate (MB/s) |    603 |    886 |     924 |     1004 |     1032 |
| Direct Rate (MB/s) |   1414 |   3022 |    3425 |     3418 |     3276 |

Signed-off-by: wangtao <tao.wangtao@honor.com>
---
 drivers/dma-buf/heaps/system_heap.c | 118 ++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

Comments

Christian König May 13, 2025, 1:17 p.m. UTC | #1
On 5/13/25 14:30, wangtao wrote:
>> -----Original Message-----
>> From: Christian König <christian.koenig@amd.com>
>> Sent: Tuesday, May 13, 2025 7:32 PM
>> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
>> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com;
>> jstultz@google.com; tjmercier@google.com
>> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro-
>> mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org;
>> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang
>> <yipengxiang@honor.com>; liulu 00013167 <liulu.liu@honor.com>; hanfeng
>> 00012985 <feng.han@honor.com>
>> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
>> DMA_BUF_IOCTL_RW_FILE for system_heap
>>
>> On 5/13/25 11:28, wangtao wrote:
>>> Support direct file I/O operations for system_heap dma-buf objects.
>>> Implementation includes:
>>> 1. Convert sg_table to bio_vec
>>
>> That is usually illegal for DMA-bufs.
> [wangtao] The term 'convert' is misleading in this context. The appropriate phrasing should be: Construct bio_vec from sg_table.

Well it doesn't matter what you call it. Touching the page inside an sg table of a DMA-buf is illegal, we even have code to actively prevent that.

Once more: This approach was already rejected multiple times! Please use udmabuf instead!

The hack you came up here is simply not necessary.

Regards,
Christian.


> Appreciate your feedback.
>>
>> Regards,
>> Christian.
>>
>>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke
>>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O
>>>
>>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz):
>>>
>>> | Metric             |    1MB |    8MB |    64MB |   1024MB |   3072MB |
>>> |--------------------|-------:|-------:|--------:|---------:|---------
>>> |--------------------|:|
>>> | Buffer Read (us)   |   1658 |   9028 |   69295 |  1019783 |  2978179 |
>>> | Direct Read (us)   |    707 |   2647 |   18689 |   299627 |   937758 |
>>> | Buffer Rate (MB/s) |    603 |    886 |     924 |     1004 |     1032 |
>>> | Direct Rate (MB/s) |   1414 |   3022 |    3425 |     3418 |     3276 |
>>>
>>> Signed-off-by: wangtao <tao.wangtao@honor.com>
>>> ---
>>>  drivers/dma-buf/heaps/system_heap.c | 118
>>> ++++++++++++++++++++++++++++
>>>  1 file changed, 118 insertions(+)
>>>
>>> diff --git a/drivers/dma-buf/heaps/system_heap.c
>>> b/drivers/dma-buf/heaps/system_heap.c
>>> index 26d5dc89ea16..f7b71b9843aa 100644
>>> --- a/drivers/dma-buf/heaps/system_heap.c
>>> +++ b/drivers/dma-buf/heaps/system_heap.c
>>> @@ -20,6 +20,8 @@
>>>  #include <linux/scatterlist.h>
>>>  #include <linux/slab.h>
>>>  #include <linux/vmalloc.h>
>>> +#include <linux/bvec.h>
>>> +#include <linux/uio.h>
>>>
>>>  static struct dma_heap *sys_heap;
>>>
>>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct dma_buf
>> *dmabuf, struct iosys_map *map)
>>>  	iosys_map_clear(map);
>>>  }
>>>
>>> +static struct bio_vec *system_heap_init_bvec(struct
>> system_heap_buffer *buffer,
>>> +			size_t offset, size_t len, int *nr_segs) {
>>> +	struct sg_table *sgt = &buffer->sg_table;
>>> +	struct scatterlist *sg;
>>> +	size_t length = 0;
>>> +	unsigned int i, k = 0;
>>> +	struct bio_vec *bvec;
>>> +	size_t sg_left;
>>> +	size_t sg_offset;
>>> +	size_t sg_len;
>>> +
>>> +	bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL);
>>> +	if (!bvec)
>>> +		return NULL;
>>> +
>>> +	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
>>> +		length += sg->length;
>>> +		if (length <= offset)
>>> +			continue;
>>> +
>>> +		sg_left = length - offset;
>>> +		sg_offset = sg->offset + sg->length - sg_left;
>>> +		sg_len = min(sg_left, len);
>>> +
>>> +		bvec[k].bv_page = sg_page(sg);
>>> +		bvec[k].bv_len = sg_len;
>>> +		bvec[k].bv_offset = sg_offset;
>>> +		k++;
>>> +
>>> +		offset += sg_len;
>>> +		len -= sg_len;
>>> +		if (len <= 0)
>>> +			break;
>>> +	}
>>> +
>>> +	*nr_segs = k;
>>> +	return bvec;
>>> +}
>>> +
>>> +static int system_heap_rw_file(struct system_heap_buffer *buffer, bool
>> is_read,
>>> +		bool direct_io, struct file *filp, loff_t file_offset,
>>> +		size_t buf_offset, size_t len)
>>> +{
>>> +	struct bio_vec *bvec;
>>> +	int nr_segs = 0;
>>> +	struct iov_iter iter;
>>> +	struct kiocb kiocb;
>>> +	ssize_t ret = 0;
>>> +
>>> +	if (direct_io) {
>>> +		if (!(filp->f_mode & FMODE_CAN_ODIRECT))
>>> +			return -EINVAL;
>>> +	}
>>> +
>>> +	bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs);
>>> +	if (!bvec)
>>> +		return -ENOMEM;
>>> +
>>> +	iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec,
>> nr_segs, len);
>>> +	init_sync_kiocb(&kiocb, filp);
>>> +	kiocb.ki_pos = file_offset;
>>> +	if (direct_io)
>>> +		kiocb.ki_flags |= IOCB_DIRECT;
>>> +
>>> +	while (kiocb.ki_pos < file_offset + len) {
>>> +		if (is_read)
>>> +			ret = vfs_iocb_iter_read(filp, &kiocb, &iter);
>>> +		else
>>> +			ret = vfs_iocb_iter_write(filp, &kiocb, &iter);
>>> +		if (ret <= 0)
>>> +			break;
>>> +	}
>>> +
>>> +	kvfree(bvec);
>>> +	return ret < 0 ? ret : 0;
>>> +}
>>> +
>>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf,
>>> +			struct dma_buf_rw_file *back)
>>> +{
>>> +	struct system_heap_buffer *buffer = dmabuf->priv;
>>> +	int ret = 0;
>>> +	__u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK;
>>> +	bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT;
>>> +	struct file *filp;
>>> +
>>> +	if (op != DMA_BUF_RW_FLAGS_READ && op !=
>> DMA_BUF_RW_FLAGS_WRITE)
>>> +		return -EINVAL;
>>> +	if (direct_io) {
>>> +		if (!PAGE_ALIGNED(back->file_offset) ||
>>> +			!PAGE_ALIGNED(back->buf_offset) ||
>>> +			!PAGE_ALIGNED(back->buf_len))
>>> +		return -EINVAL;
>>> +	}
>>> +	if (!back->buf_len || back->buf_len > dmabuf->size ||
>>> +		back->buf_offset >= dmabuf->size ||
>>> +		back->buf_offset + back->buf_len > dmabuf->size)
>>> +		return -EINVAL;
>>> +	if (back->file_offset + back->buf_len < back->file_offset)
>>> +		return -EINVAL;
>>> +
>>> +	filp = fget(back->fd);
>>> +	if (!filp)
>>> +		return -EBADF;
>>> +
>>> +	mutex_lock(&buffer->lock);
>>> +	ret = system_heap_rw_file(buffer, op ==
>> DMA_BUF_RW_FLAGS_READ, direct_io,
>>> +			filp, back->file_offset, back->buf_offset, back-
>>> buf_len);
>>> +	mutex_unlock(&buffer->lock);
>>> +
>>> +	fput(filp);
>>> +	return ret;
>>> +}
>>> +
>>>  static void system_heap_dma_buf_release(struct dma_buf *dmabuf)  {
>>>  	struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6
>> +425,7
>>> @@ static const struct dma_buf_ops system_heap_buf_ops = {
>>>  	.mmap = system_heap_mmap,
>>>  	.vmap = system_heap_vmap,
>>>  	.vunmap = system_heap_vunmap,
>>> +	.rw_file = system_heap_dma_buf_rw_file,
>>>  	.release = system_heap_dma_buf_release,  };
>>>
>
wangtao May 14, 2025, 11:02 a.m. UTC | #2
> -----Original Message-----
> From: Christian König <christian.koenig@amd.com>
> Sent: Tuesday, May 13, 2025 9:18 PM
> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com;
> jstultz@google.com; tjmercier@google.com
> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro-
> mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org;
> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang
> <yipengxiang@honor.com>; <liulu.liu@honor.com>; <feng.han@honor.com>
> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
> DMA_BUF_IOCTL_RW_FILE for system_heap
> 
> On 5/13/25 14:30, wangtao wrote:
> >> -----Original Message-----
> >> From: Christian König <christian.koenig@amd.com>
> >> Sent: Tuesday, May 13, 2025 7:32 PM
> >> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
> >> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com;
> >> jstultz@google.com; tjmercier@google.com
> >> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org;
> >> linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org;
> >> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang
> >> <yipengxiang@honor.com>; <liulu.liu@honor.com>;
> >> <feng.han@honor.com>
> >> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
> >> DMA_BUF_IOCTL_RW_FILE for system_heap
> >>
> >> On 5/13/25 11:28, wangtao wrote:
> >>> Support direct file I/O operations for system_heap dma-buf objects.
> >>> Implementation includes:
> >>> 1. Convert sg_table to bio_vec
> >>
> >> That is usually illegal for DMA-bufs.
> > [wangtao] The term 'convert' is misleading in this context. The appropriate
> phrasing should be: Construct bio_vec from sg_table.
> 
> Well it doesn't matter what you call it. Touching the page inside an sg table of
> a DMA-buf is illegal, we even have code to actively prevent that.
[wangtao] For a driver using DMA-buf: Don't touch pages in the sg_table. But the system heap exporter (sg_table owner) should be allowed to use them.
If a driver takes ownership via dma_buf_map_attachment or similar calls, the exporter must stop using the sg_table.
User-space programs should call DMA_BUF_IOCTL_RW_FILE only when the DMA-buf is not attached.
The exporter must check ownership (e.g., ensure no map_dma_buf/vmap is active) and block new calls during operations.
I'll add these checks in patch v2.

> 
> Once more: This approach was already rejected multiple times! Please use
> udmabuf instead!
> 
> The hack you came up here is simply not necessary.
[wangtao] Many people need DMA-buf direct I/O. I tried it 2 years ago. My method is simpler, uses less CPU/power, and performs better:
  - Speed: 3418 MB/s vs. 2073 MB/s (udmabuf) at 1GHz CPU.
  - udmabuf wastes half its CPU time on __get_user_pages.
  - Creating 32x32MB DMA-bufs + reading 1GB file takes 346 ms vs. 1145 ms for udmabuf (10x slower) vs. 1503 ms for DMA-buf normal.
udmabuf is slightly faster but not enough. Switching to udmabuf is easy for small apps but hard in complex systems without major benefits.
> 
> Regards,
> Christian.
> 
> 
> > Appreciate your feedback.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke
> >>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O
> >>>
> >>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz):
> >>>
> >>> | Metric             |    1MB |    8MB |    64MB |   1024MB |   3072MB |
> >>> |--------------------|-------:|-------:|--------:|---------:|-------
> >>> |--------------------|--
> >>> |--------------------|:|
> >>> | Buffer Read (us)   |   1658 |   9028 |   69295 |  1019783 |  2978179 |
> >>> | Direct Read (us)   |    707 |   2647 |   18689 |   299627 |   937758 |
> >>> | Buffer Rate (MB/s) |    603 |    886 |     924 |     1004 |     1032 |
> >>> | Direct Rate (MB/s) |   1414 |   3022 |    3425 |     3418 |     3276 |
> >>>
> >>> Signed-off-by: wangtao <tao.wangtao@honor.com>
> >>> ---
> >>>  drivers/dma-buf/heaps/system_heap.c | 118
> >>> ++++++++++++++++++++++++++++
> >>>  1 file changed, 118 insertions(+)
> >>>
> >>> diff --git a/drivers/dma-buf/heaps/system_heap.c
> >>> b/drivers/dma-buf/heaps/system_heap.c
> >>> index 26d5dc89ea16..f7b71b9843aa 100644
> >>> --- a/drivers/dma-buf/heaps/system_heap.c
> >>> +++ b/drivers/dma-buf/heaps/system_heap.c
> >>> @@ -20,6 +20,8 @@
> >>>  #include <linux/scatterlist.h>
> >>>  #include <linux/slab.h>
> >>>  #include <linux/vmalloc.h>
> >>> +#include <linux/bvec.h>
> >>> +#include <linux/uio.h>
> >>>
> >>>  static struct dma_heap *sys_heap;
> >>>
> >>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct
> dma_buf
> >> *dmabuf, struct iosys_map *map)
> >>>  	iosys_map_clear(map);
> >>>  }
> >>>
> >>> +static struct bio_vec *system_heap_init_bvec(struct
> >> system_heap_buffer *buffer,
> >>> +			size_t offset, size_t len, int *nr_segs) {
> >>> +	struct sg_table *sgt = &buffer->sg_table;
> >>> +	struct scatterlist *sg;
> >>> +	size_t length = 0;
> >>> +	unsigned int i, k = 0;
> >>> +	struct bio_vec *bvec;
> >>> +	size_t sg_left;
> >>> +	size_t sg_offset;
> >>> +	size_t sg_len;
> >>> +
> >>> +	bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL);
> >>> +	if (!bvec)
> >>> +		return NULL;
> >>> +
> >>> +	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
> >>> +		length += sg->length;
> >>> +		if (length <= offset)
> >>> +			continue;
> >>> +
> >>> +		sg_left = length - offset;
> >>> +		sg_offset = sg->offset + sg->length - sg_left;
> >>> +		sg_len = min(sg_left, len);
> >>> +
> >>> +		bvec[k].bv_page = sg_page(sg);
> >>> +		bvec[k].bv_len = sg_len;
> >>> +		bvec[k].bv_offset = sg_offset;
> >>> +		k++;
> >>> +
> >>> +		offset += sg_len;
> >>> +		len -= sg_len;
> >>> +		if (len <= 0)
> >>> +			break;
> >>> +	}
> >>> +
> >>> +	*nr_segs = k;
> >>> +	return bvec;
> >>> +}
> >>> +
> >>> +static int system_heap_rw_file(struct system_heap_buffer *buffer,
> >>> +bool
> >> is_read,
> >>> +		bool direct_io, struct file *filp, loff_t file_offset,
> >>> +		size_t buf_offset, size_t len)
> >>> +{
> >>> +	struct bio_vec *bvec;
> >>> +	int nr_segs = 0;
> >>> +	struct iov_iter iter;
> >>> +	struct kiocb kiocb;
> >>> +	ssize_t ret = 0;
> >>> +
> >>> +	if (direct_io) {
> >>> +		if (!(filp->f_mode & FMODE_CAN_ODIRECT))
> >>> +			return -EINVAL;
> >>> +	}
> >>> +
> >>> +	bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs);
> >>> +	if (!bvec)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec,
> >> nr_segs, len);
> >>> +	init_sync_kiocb(&kiocb, filp);
> >>> +	kiocb.ki_pos = file_offset;
> >>> +	if (direct_io)
> >>> +		kiocb.ki_flags |= IOCB_DIRECT;
> >>> +
> >>> +	while (kiocb.ki_pos < file_offset + len) {
> >>> +		if (is_read)
> >>> +			ret = vfs_iocb_iter_read(filp, &kiocb, &iter);
> >>> +		else
> >>> +			ret = vfs_iocb_iter_write(filp, &kiocb, &iter);
> >>> +		if (ret <= 0)
> >>> +			break;
> >>> +	}
> >>> +
> >>> +	kvfree(bvec);
> >>> +	return ret < 0 ? ret : 0;
> >>> +}
> >>> +
> >>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf,
> >>> +			struct dma_buf_rw_file *back)
> >>> +{
> >>> +	struct system_heap_buffer *buffer = dmabuf->priv;
> >>> +	int ret = 0;
> >>> +	__u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK;
> >>> +	bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT;
> >>> +	struct file *filp;
> >>> +
> >>> +	if (op != DMA_BUF_RW_FLAGS_READ && op !=
> >> DMA_BUF_RW_FLAGS_WRITE)
> >>> +		return -EINVAL;
> >>> +	if (direct_io) {
> >>> +		if (!PAGE_ALIGNED(back->file_offset) ||
> >>> +			!PAGE_ALIGNED(back->buf_offset) ||
> >>> +			!PAGE_ALIGNED(back->buf_len))
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	if (!back->buf_len || back->buf_len > dmabuf->size ||
> >>> +		back->buf_offset >= dmabuf->size ||
> >>> +		back->buf_offset + back->buf_len > dmabuf->size)
> >>> +		return -EINVAL;
> >>> +	if (back->file_offset + back->buf_len < back->file_offset)
> >>> +		return -EINVAL;
> >>> +
> >>> +	filp = fget(back->fd);
> >>> +	if (!filp)
> >>> +		return -EBADF;
> >>> +
> >>> +	mutex_lock(&buffer->lock);
> >>> +	ret = system_heap_rw_file(buffer, op ==
> >> DMA_BUF_RW_FLAGS_READ, direct_io,
> >>> +			filp, back->file_offset, back->buf_offset, back-
> >>> buf_len);
> >>> +	mutex_unlock(&buffer->lock);
> >>> +
> >>> +	fput(filp);
> >>> +	return ret;
> >>> +}
> >>> +
> >>>  static void system_heap_dma_buf_release(struct dma_buf *dmabuf)  {
> >>>  	struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6
> >> +425,7
> >>> @@ static const struct dma_buf_ops system_heap_buf_ops = {
> >>>  	.mmap = system_heap_mmap,
> >>>  	.vmap = system_heap_vmap,
> >>>  	.vunmap = system_heap_vunmap,
> >>> +	.rw_file = system_heap_dma_buf_rw_file,
> >>>  	.release = system_heap_dma_buf_release,  };
> >>>
> >
Christian König May 14, 2025, noon UTC | #3
On 5/14/25 13:02, wangtao wrote:
>> -----Original Message-----
>> From: Christian König <christian.koenig@amd.com>
>> Sent: Tuesday, May 13, 2025 9:18 PM
>> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
>> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com;
>> jstultz@google.com; tjmercier@google.com
>> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro-
>> mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org;
>> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang
>> <yipengxiang@honor.com>; <liulu.liu@honor.com>; <feng.han@honor.com>
>> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
>> DMA_BUF_IOCTL_RW_FILE for system_heap
>>
>> On 5/13/25 14:30, wangtao wrote:
>>>> -----Original Message-----
>>>> From: Christian König <christian.koenig@amd.com>
>>>> Sent: Tuesday, May 13, 2025 7:32 PM
>>>> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
>>>> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com;
>>>> jstultz@google.com; tjmercier@google.com
>>>> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org;
>>>> linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org;
>>>> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang
>>>> <yipengxiang@honor.com>; <liulu.liu@honor.com>;
>>>> <feng.han@honor.com>
>>>> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
>>>> DMA_BUF_IOCTL_RW_FILE for system_heap
>>>>
>>>> On 5/13/25 11:28, wangtao wrote:
>>>>> Support direct file I/O operations for system_heap dma-buf objects.
>>>>> Implementation includes:
>>>>> 1. Convert sg_table to bio_vec
>>>>
>>>> That is usually illegal for DMA-bufs.
>>> [wangtao] The term 'convert' is misleading in this context. The appropriate
>> phrasing should be: Construct bio_vec from sg_table.
>>
>> Well it doesn't matter what you call it. Touching the page inside an sg table of
>> a DMA-buf is illegal, we even have code to actively prevent that.
> [wangtao] For a driver using DMA-buf: Don't touch pages in the sg_table. But the system heap exporter (sg_table owner) should be allowed to use them.

Good point that might be possible.

> If a driver takes ownership via dma_buf_map_attachment or similar calls, the exporter must stop using the sg_table.
> User-space programs should call DMA_BUF_IOCTL_RW_FILE only when the DMA-buf is not attached.
> The exporter must check ownership (e.g., ensure no map_dma_buf/vmap is active) and block new calls during operations.
> I'll add these checks in patch v2.
> 
>>
>> Once more: This approach was already rejected multiple times! Please use
>> udmabuf instead!
>>
>> The hack you came up here is simply not necessary.
> [wangtao] Many people need DMA-buf direct I/O. I tried it 2 years ago. My method is simpler, uses less CPU/power, and performs better:

I don't think that this is a valid argument.

>   - Speed: 3418 MB/s vs. 2073 MB/s (udmabuf) at 1GHz CPU.
>   - udmabuf wastes half its CPU time on __get_user_pages.
>   - Creating 32x32MB DMA-bufs + reading 1GB file takes 346 ms vs. 1145 ms for udmabuf (10x slower) vs. 1503 ms for DMA-buf normal.

Why would using udmabuf be slower here?

> udmabuf is slightly faster but not enough. Switching to udmabuf is easy for small apps but hard in complex systems without major benefits.

Yeah, but your approach here is a rather clear hack. Using udmabuf is much more cleaner and generally accepted by everybody now.

As far as I can see I have to reject your approach here.

Regards,
Christian.


>>
>> Regards,
>> Christian.
>>
>>
>>> Appreciate your feedback.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke
>>>>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O
>>>>>
>>>>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz):
>>>>>
>>>>> | Metric             |    1MB |    8MB |    64MB |   1024MB |   3072MB |
>>>>> |--------------------|-------:|-------:|--------:|---------:|-------
>>>>> |--------------------|--
>>>>> |--------------------|:|
>>>>> | Buffer Read (us)   |   1658 |   9028 |   69295 |  1019783 |  2978179 |
>>>>> | Direct Read (us)   |    707 |   2647 |   18689 |   299627 |   937758 |
>>>>> | Buffer Rate (MB/s) |    603 |    886 |     924 |     1004 |     1032 |
>>>>> | Direct Rate (MB/s) |   1414 |   3022 |    3425 |     3418 |     3276 |
>>>>>
>>>>> Signed-off-by: wangtao <tao.wangtao@honor.com>
>>>>> ---
>>>>>  drivers/dma-buf/heaps/system_heap.c | 118
>>>>> ++++++++++++++++++++++++++++
>>>>>  1 file changed, 118 insertions(+)
>>>>>
>>>>> diff --git a/drivers/dma-buf/heaps/system_heap.c
>>>>> b/drivers/dma-buf/heaps/system_heap.c
>>>>> index 26d5dc89ea16..f7b71b9843aa 100644
>>>>> --- a/drivers/dma-buf/heaps/system_heap.c
>>>>> +++ b/drivers/dma-buf/heaps/system_heap.c
>>>>> @@ -20,6 +20,8 @@
>>>>>  #include <linux/scatterlist.h>
>>>>>  #include <linux/slab.h>
>>>>>  #include <linux/vmalloc.h>
>>>>> +#include <linux/bvec.h>
>>>>> +#include <linux/uio.h>
>>>>>
>>>>>  static struct dma_heap *sys_heap;
>>>>>
>>>>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct
>> dma_buf
>>>> *dmabuf, struct iosys_map *map)
>>>>>  	iosys_map_clear(map);
>>>>>  }
>>>>>
>>>>> +static struct bio_vec *system_heap_init_bvec(struct
>>>> system_heap_buffer *buffer,
>>>>> +			size_t offset, size_t len, int *nr_segs) {
>>>>> +	struct sg_table *sgt = &buffer->sg_table;
>>>>> +	struct scatterlist *sg;
>>>>> +	size_t length = 0;
>>>>> +	unsigned int i, k = 0;
>>>>> +	struct bio_vec *bvec;
>>>>> +	size_t sg_left;
>>>>> +	size_t sg_offset;
>>>>> +	size_t sg_len;
>>>>> +
>>>>> +	bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL);
>>>>> +	if (!bvec)
>>>>> +		return NULL;
>>>>> +
>>>>> +	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
>>>>> +		length += sg->length;
>>>>> +		if (length <= offset)
>>>>> +			continue;
>>>>> +
>>>>> +		sg_left = length - offset;
>>>>> +		sg_offset = sg->offset + sg->length - sg_left;
>>>>> +		sg_len = min(sg_left, len);
>>>>> +
>>>>> +		bvec[k].bv_page = sg_page(sg);
>>>>> +		bvec[k].bv_len = sg_len;
>>>>> +		bvec[k].bv_offset = sg_offset;
>>>>> +		k++;
>>>>> +
>>>>> +		offset += sg_len;
>>>>> +		len -= sg_len;
>>>>> +		if (len <= 0)
>>>>> +			break;
>>>>> +	}
>>>>> +
>>>>> +	*nr_segs = k;
>>>>> +	return bvec;
>>>>> +}
>>>>> +
>>>>> +static int system_heap_rw_file(struct system_heap_buffer *buffer,
>>>>> +bool
>>>> is_read,
>>>>> +		bool direct_io, struct file *filp, loff_t file_offset,
>>>>> +		size_t buf_offset, size_t len)
>>>>> +{
>>>>> +	struct bio_vec *bvec;
>>>>> +	int nr_segs = 0;
>>>>> +	struct iov_iter iter;
>>>>> +	struct kiocb kiocb;
>>>>> +	ssize_t ret = 0;
>>>>> +
>>>>> +	if (direct_io) {
>>>>> +		if (!(filp->f_mode & FMODE_CAN_ODIRECT))
>>>>> +			return -EINVAL;
>>>>> +	}
>>>>> +
>>>>> +	bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs);
>>>>> +	if (!bvec)
>>>>> +		return -ENOMEM;
>>>>> +
>>>>> +	iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec,
>>>> nr_segs, len);
>>>>> +	init_sync_kiocb(&kiocb, filp);
>>>>> +	kiocb.ki_pos = file_offset;
>>>>> +	if (direct_io)
>>>>> +		kiocb.ki_flags |= IOCB_DIRECT;
>>>>> +
>>>>> +	while (kiocb.ki_pos < file_offset + len) {
>>>>> +		if (is_read)
>>>>> +			ret = vfs_iocb_iter_read(filp, &kiocb, &iter);
>>>>> +		else
>>>>> +			ret = vfs_iocb_iter_write(filp, &kiocb, &iter);
>>>>> +		if (ret <= 0)
>>>>> +			break;
>>>>> +	}
>>>>> +
>>>>> +	kvfree(bvec);
>>>>> +	return ret < 0 ? ret : 0;
>>>>> +}
>>>>> +
>>>>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf,
>>>>> +			struct dma_buf_rw_file *back)
>>>>> +{
>>>>> +	struct system_heap_buffer *buffer = dmabuf->priv;
>>>>> +	int ret = 0;
>>>>> +	__u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK;
>>>>> +	bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT;
>>>>> +	struct file *filp;
>>>>> +
>>>>> +	if (op != DMA_BUF_RW_FLAGS_READ && op !=
>>>> DMA_BUF_RW_FLAGS_WRITE)
>>>>> +		return -EINVAL;
>>>>> +	if (direct_io) {
>>>>> +		if (!PAGE_ALIGNED(back->file_offset) ||
>>>>> +			!PAGE_ALIGNED(back->buf_offset) ||
>>>>> +			!PAGE_ALIGNED(back->buf_len))
>>>>> +		return -EINVAL;
>>>>> +	}
>>>>> +	if (!back->buf_len || back->buf_len > dmabuf->size ||
>>>>> +		back->buf_offset >= dmabuf->size ||
>>>>> +		back->buf_offset + back->buf_len > dmabuf->size)
>>>>> +		return -EINVAL;
>>>>> +	if (back->file_offset + back->buf_len < back->file_offset)
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	filp = fget(back->fd);
>>>>> +	if (!filp)
>>>>> +		return -EBADF;
>>>>> +
>>>>> +	mutex_lock(&buffer->lock);
>>>>> +	ret = system_heap_rw_file(buffer, op ==
>>>> DMA_BUF_RW_FLAGS_READ, direct_io,
>>>>> +			filp, back->file_offset, back->buf_offset, back-
>>>>> buf_len);
>>>>> +	mutex_unlock(&buffer->lock);
>>>>> +
>>>>> +	fput(filp);
>>>>> +	return ret;
>>>>> +}
>>>>> +
>>>>>  static void system_heap_dma_buf_release(struct dma_buf *dmabuf)  {
>>>>>  	struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6
>>>> +425,7
>>>>> @@ static const struct dma_buf_ops system_heap_buf_ops = {
>>>>>  	.mmap = system_heap_mmap,
>>>>>  	.vmap = system_heap_vmap,
>>>>>  	.vunmap = system_heap_vunmap,
>>>>> +	.rw_file = system_heap_dma_buf_rw_file,
>>>>>  	.release = system_heap_dma_buf_release,  };
>>>>>
>>>
>
wangtao May 15, 2025, 2:03 p.m. UTC | #4
> -----Original Message-----
> From: Christian König <christian.koenig@amd.com>
> Sent: Wednesday, May 14, 2025 8:00 PM
> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com;
> jstultz@google.com; tjmercier@google.com
> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro-
> mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org;
> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang
> <yipengxiang@honor.com>; <liulu.liu@honor.com>; hanfeng
>  <feng.han@honor.com>
> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
> DMA_BUF_IOCTL_RW_FILE for system_heap
> 
> On 5/14/25 13:02, wangtao wrote:
> >> -----Original Message-----
> >> From: Christian König <christian.koenig@amd.com>
> >> Sent: Tuesday, May 13, 2025 9:18 PM
> >> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
> >> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com;
> >> jstultz@google.com; tjmercier@google.com
> >> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org;
> >> linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org;
> >> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang
> >> <yipengxiang@honor.com>; <liulu.liu@honor.com>;
> <feng.han@honor.com>
> >> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
> >> DMA_BUF_IOCTL_RW_FILE for system_heap
> >>
> >> On 5/13/25 14:30, wangtao wrote:
> >>>> -----Original Message-----
> >>>> From: Christian König <christian.koenig@amd.com>
> >>>> Sent: Tuesday, May 13, 2025 7:32 PM
> >>>> To: wangtao <tao.wangtao@honor.com>; sumit.semwal@linaro.org;
> >>>> benjamin.gaignard@collabora.com; Brian.Starkey@arm.com;
> >>>> jstultz@google.com; tjmercier@google.com
> >>>> Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org;
> >>>> linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org;
> >>>> wangbintian(BintianWang) <bintian.wang@honor.com>; yipengxiang
> >>>> <yipengxiang@honor.com>; <liulu.liu@honor.com>;
> >>>> <feng.han@honor.com>
> >>>> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
> >>>> DMA_BUF_IOCTL_RW_FILE for system_heap
> >>>>
> >>>> On 5/13/25 11:28, wangtao wrote:
> >>>>> Support direct file I/O operations for system_heap dma-buf objects.
> >>>>> Implementation includes:
> >>>>> 1. Convert sg_table to bio_vec
> >>>>
> >>>> That is usually illegal for DMA-bufs.
> >>> [wangtao] The term 'convert' is misleading in this context. The
> >>> appropriate
> >> phrasing should be: Construct bio_vec from sg_table.
> >>
> >> Well it doesn't matter what you call it. Touching the page inside an
> >> sg table of a DMA-buf is illegal, we even have code to actively prevent
> that.
> > [wangtao] For a driver using DMA-buf: Don't touch pages in the sg_table.
> But the system heap exporter (sg_table owner) should be allowed to use
> them.
> 
> Good point that might be possible.
> 
> > If a driver takes ownership via dma_buf_map_attachment or similar calls,
> the exporter must stop using the sg_table.
> > User-space programs should call DMA_BUF_IOCTL_RW_FILE only when the
> DMA-buf is not attached.
> > The exporter must check ownership (e.g., ensure no map_dma_buf/vmap
> is active) and block new calls during operations.
> > I'll add these checks in patch v2.
> >
> >>
> >> Once more: This approach was already rejected multiple times! Please
> >> use udmabuf instead!
> >>
> >> The hack you came up here is simply not necessary.
> > [wangtao] Many people need DMA-buf direct I/O. I tried it 2 years ago. My
> method is simpler, uses less CPU/power, and performs better:
> 
> I don't think that this is a valid argument.
> 
> >   - Speed: 3418 MB/s vs. 2073 MB/s (udmabuf) at 1GHz CPU.
> >   - udmabuf wastes half its CPU time on __get_user_pages.
> >   - Creating 32x32MB DMA-bufs + reading 1GB file takes 346 ms vs. 1145 ms
> for udmabuf (10x slower) vs. 1503 ms for DMA-buf normal.
> 
> Why would using udmabuf be slower here?
> 
> > udmabuf is slightly faster but not enough. Switching to udmabuf is easy for
> small apps but hard in complex systems without major benefits.
> 
> Yeah, but your approach here is a rather clear hack. Using udmabuf is much
> more cleaner and generally accepted by everybody now.
> 
> As far as I can see I have to reject your approach here.
> 
[wangtao] My Test Configuration (CPU 1GHz, 5-test average):
Allocation: 32x32MB buffer creation
- dmabuf 53ms vs. udmabuf 694ms (10X slower)
- Note: shmem shows excessive allocation time

Read 1024MB File:
- dmabuf direct 326ms vs. udmabuf direct 461ms (40% slower)
- Note: pin_user_pages_fast consumes majority CPU cycles

Key function call timing: See details below. 

Dmabuf direct io:
|- 12.39% DmaBufTest_PerfDmabufDirectIO_Test::TestBody()
|-|- 5.95% perf_dmabuf_alloc_and_io
|-|-|- 4.38% dmabuf_io_back
|-|-|-|- 3.47% ioctl
|-|-|-|-|- 3.47% __ioctl
|-|-|-|-|-|-|-|-|-|-|-|- 3.47% dma_buf_ioctl
|-|-|-|-|-|-|-|-|-|-|-|-|- 3.47% system_heap_dma_buf_rw_file
|-|-|-|-|-|-|-|-|-|-|-|-|-|- 3.46% system_heap_rw_file
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 3.46% f2fs_file_read_iter
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 3.46% __iomap_dio_rw
|-|-|- 1.33% ioctl
|-|-|-|- 1.33% __ioctl
|-|-|-|-|-|-|-|-|-|-|- 1.33% dma_heap_ioctl
|-|-|-|-|-|-|-|-|-|-|-|- 1.33% dma_heap_buffer_alloc
|-|-|-|-|-|-|-|-|-|-|-|-|- 1.33% system_heap_allocate
|-|-|-|-|-|-|-|-|-|-|-|-|-|- 1.33% system_heap_do_allocate
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 1.28% __alloc_pages

Udmabuf direct io:
|- 39.35% DmaBufTest_PerfDmabufUDirectIO_Test::TestBody()
|-|- 32.76% perf_dmabuf_alloc_and_io
|-|-|- 17.43% alloc_udmabuf
|-|-|-|- 13.34% ioctl
|-|-|-|-|-|-|-|-|-|-|-|- 13.34% udmabuf_ioctl
|-|-|-|-|-|-|-|-|-|-|-|-|- 13.32% udmabuf_create
|-|-|-|-|-|-|-|-|-|-|-|-|-|- 13.26% shmem_read_mapping_page_gfp
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 11.94% shmem_read_folio_gfp
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 11.28% shmem_get_folio_gfp
|-|-|- 10.81% dmabuf_io_back
|-|-|-|- 8.85% read
|-|-|-|-|-|-|-|-|-|- 8.85% __arm64_sys_read
|-|-|-|-|-|-|-|-|-|-|- 8.85% f2fs_file_read_iter
|-|-|-|-|-|-|-|-|-|-|-|- 8.84% __iomap_dio_rw
|-|-|-|-|-|-|-|-|-|-|-|-|- 7.85% iomap_dio_bio_iter
|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.61% bio_iov_iter_get_pages
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.46% iov_iter_extract_pages
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.46% pin_user_pages_fast
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.46% internal_get_user_pages_fast
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.46% __gup_longterm_locked
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|- 5.36% __get_user_pages

> Regards,
> Christian.
> 
> 
> >>
> >> Regards,
> >> Christian.
> >>
> >>
> >>> Appreciate your feedback.
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke
> >>>>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O
> >>>>>
> >>>>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz):
> >>>>>
> >>>>> | Metric             |    1MB |    8MB |    64MB |   1024MB |   3072MB |
> >>>>> |--------------------|-------:|-------:|--------:|---------:|-----
> >>>>> |--------------------|--
> >>>>> |--------------------|--
> >>>>> |--------------------|:|
> >>>>> | Buffer Read (us)   |   1658 |   9028 |   69295 |  1019783 |  2978179 |
> >>>>> | Direct Read (us)   |    707 |   2647 |   18689 |   299627 |   937758 |
> >>>>> | Buffer Rate (MB/s) |    603 |    886 |     924 |     1004 |     1032 |
> >>>>> | Direct Rate (MB/s) |   1414 |   3022 |    3425 |     3418 |     3276 |
> >>>>>
> >>>>> Signed-off-by: wangtao <tao.wangtao@honor.com>
> >>>>> ---
> >>>>>  drivers/dma-buf/heaps/system_heap.c | 118
> >>>>> ++++++++++++++++++++++++++++
> >>>>>  1 file changed, 118 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/dma-buf/heaps/system_heap.c
> >>>>> b/drivers/dma-buf/heaps/system_heap.c
> >>>>> index 26d5dc89ea16..f7b71b9843aa 100644
> >>>>> --- a/drivers/dma-buf/heaps/system_heap.c
> >>>>> +++ b/drivers/dma-buf/heaps/system_heap.c
> >>>>> @@ -20,6 +20,8 @@
> >>>>>  #include <linux/scatterlist.h>
> >>>>>  #include <linux/slab.h>
> >>>>>  #include <linux/vmalloc.h>
> >>>>> +#include <linux/bvec.h>
> >>>>> +#include <linux/uio.h>
> >>>>>
> >>>>>  static struct dma_heap *sys_heap;
> >>>>>
> >>>>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct
> >> dma_buf
> >>>> *dmabuf, struct iosys_map *map)
> >>>>>  	iosys_map_clear(map);
> >>>>>  }
> >>>>>
> >>>>> +static struct bio_vec *system_heap_init_bvec(struct
> >>>> system_heap_buffer *buffer,
> >>>>> +			size_t offset, size_t len, int *nr_segs) {
> >>>>> +	struct sg_table *sgt = &buffer->sg_table;
> >>>>> +	struct scatterlist *sg;
> >>>>> +	size_t length = 0;
> >>>>> +	unsigned int i, k = 0;
> >>>>> +	struct bio_vec *bvec;
> >>>>> +	size_t sg_left;
> >>>>> +	size_t sg_offset;
> >>>>> +	size_t sg_len;
> >>>>> +
> >>>>> +	bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL);
> >>>>> +	if (!bvec)
> >>>>> +		return NULL;
> >>>>> +
> >>>>> +	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
> >>>>> +		length += sg->length;
> >>>>> +		if (length <= offset)
> >>>>> +			continue;
> >>>>> +
> >>>>> +		sg_left = length - offset;
> >>>>> +		sg_offset = sg->offset + sg->length - sg_left;
> >>>>> +		sg_len = min(sg_left, len);
> >>>>> +
> >>>>> +		bvec[k].bv_page = sg_page(sg);
> >>>>> +		bvec[k].bv_len = sg_len;
> >>>>> +		bvec[k].bv_offset = sg_offset;
> >>>>> +		k++;
> >>>>> +
> >>>>> +		offset += sg_len;
> >>>>> +		len -= sg_len;
> >>>>> +		if (len <= 0)
> >>>>> +			break;
> >>>>> +	}
> >>>>> +
> >>>>> +	*nr_segs = k;
> >>>>> +	return bvec;
> >>>>> +}
> >>>>> +
> >>>>> +static int system_heap_rw_file(struct system_heap_buffer *buffer,
> >>>>> +bool
> >>>> is_read,
> >>>>> +		bool direct_io, struct file *filp, loff_t file_offset,
> >>>>> +		size_t buf_offset, size_t len)
> >>>>> +{
> >>>>> +	struct bio_vec *bvec;
> >>>>> +	int nr_segs = 0;
> >>>>> +	struct iov_iter iter;
> >>>>> +	struct kiocb kiocb;
> >>>>> +	ssize_t ret = 0;
> >>>>> +
> >>>>> +	if (direct_io) {
> >>>>> +		if (!(filp->f_mode & FMODE_CAN_ODIRECT))
> >>>>> +			return -EINVAL;
> >>>>> +	}
> >>>>> +
> >>>>> +	bvec = system_heap_init_bvec(buffer, buf_offset, len,
> &nr_segs);
> >>>>> +	if (!bvec)
> >>>>> +		return -ENOMEM;
> >>>>> +
> >>>>> +	iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE,
> bvec,
> >>>> nr_segs, len);
> >>>>> +	init_sync_kiocb(&kiocb, filp);
> >>>>> +	kiocb.ki_pos = file_offset;
> >>>>> +	if (direct_io)
> >>>>> +		kiocb.ki_flags |= IOCB_DIRECT;
> >>>>> +
> >>>>> +	while (kiocb.ki_pos < file_offset + len) {
> >>>>> +		if (is_read)
> >>>>> +			ret = vfs_iocb_iter_read(filp, &kiocb, &iter);
> >>>>> +		else
> >>>>> +			ret = vfs_iocb_iter_write(filp, &kiocb, &iter);
> >>>>> +		if (ret <= 0)
> >>>>> +			break;
> >>>>> +	}
> >>>>> +
> >>>>> +	kvfree(bvec);
> >>>>> +	return ret < 0 ? ret : 0;
> >>>>> +}
> >>>>> +
> >>>>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf,
> >>>>> +			struct dma_buf_rw_file *back)
> >>>>> +{
> >>>>> +	struct system_heap_buffer *buffer = dmabuf->priv;
> >>>>> +	int ret = 0;
> >>>>> +	__u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK;
> >>>>> +	bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT;
> >>>>> +	struct file *filp;
> >>>>> +
> >>>>> +	if (op != DMA_BUF_RW_FLAGS_READ && op !=
> >>>> DMA_BUF_RW_FLAGS_WRITE)
> >>>>> +		return -EINVAL;
> >>>>> +	if (direct_io) {
> >>>>> +		if (!PAGE_ALIGNED(back->file_offset) ||
> >>>>> +			!PAGE_ALIGNED(back->buf_offset) ||
> >>>>> +			!PAGE_ALIGNED(back->buf_len))
> >>>>> +		return -EINVAL;
> >>>>> +	}
> >>>>> +	if (!back->buf_len || back->buf_len > dmabuf->size ||
> >>>>> +		back->buf_offset >= dmabuf->size ||
> >>>>> +		back->buf_offset + back->buf_len > dmabuf->size)
> >>>>> +		return -EINVAL;
> >>>>> +	if (back->file_offset + back->buf_len < back->file_offset)
> >>>>> +		return -EINVAL;
> >>>>> +
> >>>>> +	filp = fget(back->fd);
> >>>>> +	if (!filp)
> >>>>> +		return -EBADF;
> >>>>> +
> >>>>> +	mutex_lock(&buffer->lock);
> >>>>> +	ret = system_heap_rw_file(buffer, op ==
> >>>> DMA_BUF_RW_FLAGS_READ, direct_io,
> >>>>> +			filp, back->file_offset, back->buf_offset,
> back-
> >>>>> buf_len);
> >>>>> +	mutex_unlock(&buffer->lock);
> >>>>> +
> >>>>> +	fput(filp);
> >>>>> +	return ret;
> >>>>> +}
> >>>>> +
> >>>>>  static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
> {
> >>>>>  	struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6
> >>>> +425,7
> >>>>> @@ static const struct dma_buf_ops system_heap_buf_ops = {
> >>>>>  	.mmap = system_heap_mmap,
> >>>>>  	.vmap = system_heap_vmap,
> >>>>>  	.vunmap = system_heap_vunmap,
> >>>>> +	.rw_file = system_heap_dma_buf_rw_file,
> >>>>>  	.release = system_heap_dma_buf_release,  };
> >>>>>
> >>>
> >
diff mbox series

Patch

diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c
index 26d5dc89ea16..f7b71b9843aa 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -20,6 +20,8 @@ 
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
 
 static struct dma_heap *sys_heap;
 
@@ -281,6 +283,121 @@  static void system_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map)
 	iosys_map_clear(map);
 }
 
+static struct bio_vec *system_heap_init_bvec(struct system_heap_buffer *buffer,
+			size_t offset, size_t len, int *nr_segs)
+{
+	struct sg_table *sgt = &buffer->sg_table;
+	struct scatterlist *sg;
+	size_t length = 0;
+	unsigned int i, k = 0;
+	struct bio_vec *bvec;
+	size_t sg_left;
+	size_t sg_offset;
+	size_t sg_len;
+
+	bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL);
+	if (!bvec)
+		return NULL;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		length += sg->length;
+		if (length <= offset)
+			continue;
+
+		sg_left = length - offset;
+		sg_offset = sg->offset + sg->length - sg_left;
+		sg_len = min(sg_left, len);
+
+		bvec[k].bv_page = sg_page(sg);
+		bvec[k].bv_len = sg_len;
+		bvec[k].bv_offset = sg_offset;
+		k++;
+
+		offset += sg_len;
+		len -= sg_len;
+		if (len <= 0)
+			break;
+	}
+
+	*nr_segs = k;
+	return bvec;
+}
+
+static int system_heap_rw_file(struct system_heap_buffer *buffer, bool is_read,
+		bool direct_io, struct file *filp, loff_t file_offset,
+		size_t buf_offset, size_t len)
+{
+	struct bio_vec *bvec;
+	int nr_segs = 0;
+	struct iov_iter iter;
+	struct kiocb kiocb;
+	ssize_t ret = 0;
+
+	if (direct_io) {
+		if (!(filp->f_mode & FMODE_CAN_ODIRECT))
+			return -EINVAL;
+	}
+
+	bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs);
+	if (!bvec)
+		return -ENOMEM;
+
+	iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec, nr_segs, len);
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = file_offset;
+	if (direct_io)
+		kiocb.ki_flags |= IOCB_DIRECT;
+
+	while (kiocb.ki_pos < file_offset + len) {
+		if (is_read)
+			ret = vfs_iocb_iter_read(filp, &kiocb, &iter);
+		else
+			ret = vfs_iocb_iter_write(filp, &kiocb, &iter);
+		if (ret <= 0)
+			break;
+	}
+
+	kvfree(bvec);
+	return ret < 0 ? ret : 0;
+}
+
+static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf,
+			struct dma_buf_rw_file *back)
+{
+	struct system_heap_buffer *buffer = dmabuf->priv;
+	int ret = 0;
+	__u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK;
+	bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT;
+	struct file *filp;
+
+	if (op != DMA_BUF_RW_FLAGS_READ && op != DMA_BUF_RW_FLAGS_WRITE)
+		return -EINVAL;
+	if (direct_io) {
+		if (!PAGE_ALIGNED(back->file_offset) ||
+			!PAGE_ALIGNED(back->buf_offset) ||
+			!PAGE_ALIGNED(back->buf_len))
+		return -EINVAL;
+	}
+	if (!back->buf_len || back->buf_len > dmabuf->size ||
+		back->buf_offset >= dmabuf->size ||
+		back->buf_offset + back->buf_len > dmabuf->size)
+		return -EINVAL;
+	if (back->file_offset + back->buf_len < back->file_offset)
+		return -EINVAL;
+
+	filp = fget(back->fd);
+	if (!filp)
+		return -EBADF;
+
+	mutex_lock(&buffer->lock);
+	ret = system_heap_rw_file(buffer, op == DMA_BUF_RW_FLAGS_READ, direct_io,
+			filp, back->file_offset, back->buf_offset, back->buf_len);
+	mutex_unlock(&buffer->lock);
+
+	fput(filp);
+	return ret;
+}
+
 static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
 {
 	struct system_heap_buffer *buffer = dmabuf->priv;
@@ -308,6 +425,7 @@  static const struct dma_buf_ops system_heap_buf_ops = {
 	.mmap = system_heap_mmap,
 	.vmap = system_heap_vmap,
 	.vunmap = system_heap_vunmap,
+	.rw_file = system_heap_dma_buf_rw_file,
 	.release = system_heap_dma_buf_release,
 };