diff mbox series

[v4] dma-buf: Add DmaBufTotal counter in meminfo

Message ID 20210417104032.5521-1-peter.enderborg@sony.com
State Superseded
Headers show
Series [v4] dma-buf: Add DmaBufTotal counter in meminfo | expand

Commit Message

Peter.Enderborg@sony.com April 17, 2021, 10:40 a.m. UTC
This adds a total used dma-buf memory. Details
can be found in debugfs, however it is not for everyone
and not always available. dma-buf are indirect allocated by
userspace. So with this value we can monitor and detect
userspace applications that have problems.

Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
---
 drivers/dma-buf/dma-buf.c | 13 +++++++++++++
 fs/proc/meminfo.c         |  5 ++++-
 include/linux/dma-buf.h   |  1 +
 3 files changed, 18 insertions(+), 1 deletion(-)

Comments

Christian König April 17, 2021, 10:59 a.m. UTC | #1
Am 17.04.21 um 12:40 schrieb Peter Enderborg:
> This adds a total used dma-buf memory. Details
> can be found in debugfs, however it is not for everyone
> and not always available. dma-buf are indirect allocated by
> userspace. So with this value we can monitor and detect
> userspace applications that have problems.
>
> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

How do you want to upstream this?

> ---
>   drivers/dma-buf/dma-buf.c | 13 +++++++++++++
>   fs/proc/meminfo.c         |  5 ++++-
>   include/linux/dma-buf.h   |  1 +
>   3 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index f264b70c383e..197e5c45dd26 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -37,6 +37,7 @@ struct dma_buf_list {
>   };
>   
>   static struct dma_buf_list db_list;
> +static atomic_long_t dma_buf_global_allocated;
>   
>   static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
>   {
> @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry)
>   	if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
>   		dma_resv_fini(dmabuf->resv);
>   
> +	atomic_long_sub(dmabuf->size, &dma_buf_global_allocated);
>   	module_put(dmabuf->owner);
>   	kfree(dmabuf->name);
>   	kfree(dmabuf);
> @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
>   	mutex_lock(&db_list.lock);
>   	list_add(&dmabuf->list_node, &db_list.head);
>   	mutex_unlock(&db_list.lock);
> +	atomic_long_add(dmabuf->size, &dma_buf_global_allocated);
>   
>   	return dmabuf;
>   
> @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
>   }
>   EXPORT_SYMBOL_GPL(dma_buf_vunmap);
>   
> +/**
> + * dma_buf_allocated_pages - Return the used nr of pages
> + * allocated for dma-buf
> + */
> +long dma_buf_allocated_pages(void)
> +{
> +	return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT;
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages);
> +
>   #ifdef CONFIG_DEBUG_FS
>   static int dma_buf_debug_show(struct seq_file *s, void *unused)
>   {
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index 6fa761c9cc78..ccc7c40c8db7 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -16,6 +16,7 @@
>   #ifdef CONFIG_CMA
>   #include <linux/cma.h>
>   #endif
> +#include <linux/dma-buf.h>
>   #include <asm/page.h>
>   #include "internal.h"
>   
> @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>   	show_val_kb(m, "CmaFree:        ",
>   		    global_zone_page_state(NR_FREE_CMA_PAGES));
>   #endif
> -
> +#ifdef CONFIG_DMA_SHARED_BUFFER
> +	show_val_kb(m, "DmaBufTotal:    ", dma_buf_allocated_pages());
> +#endif
>   	hugetlb_report_meminfo(m);
>   
>   	arch_report_meminfo(m);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index efdc56b9d95f..5b05816bd2cd 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
>   		 unsigned long);
>   int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
>   void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
> +long dma_buf_allocated_pages(void);
>   #endif /* __DMA_BUF_H__ */
Christian König April 17, 2021, 11:54 a.m. UTC | #2
Am 17.04.21 um 13:20 schrieb Peter.Enderborg@sony.com:
> On 4/17/21 12:59 PM, Christian König wrote:
>> Am 17.04.21 um 12:40 schrieb Peter Enderborg:
>>> This adds a total used dma-buf memory. Details
>>> can be found in debugfs, however it is not for everyone
>>> and not always available. dma-buf are indirect allocated by
>>> userspace. So with this value we can monitor and detect
>>> userspace applications that have problems.
>>>
>>> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>
>> How do you want to upstream this?
> I don't understand that question. The patch applies on Torvalds 5.12-rc7,
> but I guess 5.13 is what we work on right now.

Yeah, but how do you want to get it into Linus tree?

I can push it together with other DMA-buf patches through drm-misc-next 
and then Dave will send it to Linus for inclusion in 5.13.

But could be that you are pushing multiple changes towards Linus through 
some other branch. In this case I'm fine if you pick that way instead if 
you want to keep your patches together for some reason.

Christian.

>
>>> ---
>>>    drivers/dma-buf/dma-buf.c | 13 +++++++++++++
>>>    fs/proc/meminfo.c         |  5 ++++-
>>>    include/linux/dma-buf.h   |  1 +
>>>    3 files changed, 18 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>>> index f264b70c383e..197e5c45dd26 100644
>>> --- a/drivers/dma-buf/dma-buf.c
>>> +++ b/drivers/dma-buf/dma-buf.c
>>> @@ -37,6 +37,7 @@ struct dma_buf_list {
>>>    };
>>>      static struct dma_buf_list db_list;
>>> +static atomic_long_t dma_buf_global_allocated;
>>>      static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
>>>    {
>>> @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry)
>>>        if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
>>>            dma_resv_fini(dmabuf->resv);
>>>    +    atomic_long_sub(dmabuf->size, &dma_buf_global_allocated);
>>>        module_put(dmabuf->owner);
>>>        kfree(dmabuf->name);
>>>        kfree(dmabuf);
>>> @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
>>>        mutex_lock(&db_list.lock);
>>>        list_add(&dmabuf->list_node, &db_list.head);
>>>        mutex_unlock(&db_list.lock);
>>> +    atomic_long_add(dmabuf->size, &dma_buf_global_allocated);
>>>          return dmabuf;
>>>    @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
>>>    }
>>>    EXPORT_SYMBOL_GPL(dma_buf_vunmap);
>>>    +/**
>>> + * dma_buf_allocated_pages - Return the used nr of pages
>>> + * allocated for dma-buf
>>> + */
>>> +long dma_buf_allocated_pages(void)
>>> +{
>>> +    return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT;
>>> +}
>>> +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages);
>>> +
>>>    #ifdef CONFIG_DEBUG_FS
>>>    static int dma_buf_debug_show(struct seq_file *s, void *unused)
>>>    {
>>> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
>>> index 6fa761c9cc78..ccc7c40c8db7 100644
>>> --- a/fs/proc/meminfo.c
>>> +++ b/fs/proc/meminfo.c
>>> @@ -16,6 +16,7 @@
>>>    #ifdef CONFIG_CMA
>>>    #include <linux/cma.h>
>>>    #endif
>>> +#include <linux/dma-buf.h>
>>>    #include <asm/page.h>
>>>    #include "internal.h"
>>>    @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>>>        show_val_kb(m, "CmaFree:        ",
>>>                global_zone_page_state(NR_FREE_CMA_PAGES));
>>>    #endif
>>> -
>>> +#ifdef CONFIG_DMA_SHARED_BUFFER
>>> +    show_val_kb(m, "DmaBufTotal:    ", dma_buf_allocated_pages());
>>> +#endif
>>>        hugetlb_report_meminfo(m);
>>>          arch_report_meminfo(m);
>>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
>>> index efdc56b9d95f..5b05816bd2cd 100644
>>> --- a/include/linux/dma-buf.h
>>> +++ b/include/linux/dma-buf.h
>>> @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
>>>             unsigned long);
>>>    int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
>>>    void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
>>> +long dma_buf_allocated_pages(void);
>>>    #endif /* __DMA_BUF_H__ */
Muchun Song April 17, 2021, 1:07 p.m. UTC | #3
On Sat, Apr 17, 2021 at 6:41 PM Peter Enderborg
<peter.enderborg@sony.com> wrote:
>
> This adds a total used dma-buf memory. Details
> can be found in debugfs, however it is not for everyone
> and not always available. dma-buf are indirect allocated by
> userspace. So with this value we can monitor and detect
> userspace applications that have problems.
>
> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
> ---
>  drivers/dma-buf/dma-buf.c | 13 +++++++++++++
>  fs/proc/meminfo.c         |  5 ++++-
>  include/linux/dma-buf.h   |  1 +
>  3 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index f264b70c383e..197e5c45dd26 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -37,6 +37,7 @@ struct dma_buf_list {
>  };
>
>  static struct dma_buf_list db_list;
> +static atomic_long_t dma_buf_global_allocated;
>
>  static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
>  {
> @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry)
>         if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
>                 dma_resv_fini(dmabuf->resv);
>
> +       atomic_long_sub(dmabuf->size, &dma_buf_global_allocated);
>         module_put(dmabuf->owner);
>         kfree(dmabuf->name);
>         kfree(dmabuf);
> @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
>         mutex_lock(&db_list.lock);
>         list_add(&dmabuf->list_node, &db_list.head);
>         mutex_unlock(&db_list.lock);
> +       atomic_long_add(dmabuf->size, &dma_buf_global_allocated);
>
>         return dmabuf;
>
> @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_vunmap);
>
> +/**
> + * dma_buf_allocated_pages - Return the used nr of pages
> + * allocated for dma-buf
> + */
> +long dma_buf_allocated_pages(void)
> +{
> +       return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT;
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages);

dma_buf_allocated_pages is only called from fs/proc/meminfo.c.
I am confused why it should be exported. If it won't be called
from the driver module, we should not export it.

Thanks.

> +
>  #ifdef CONFIG_DEBUG_FS
>  static int dma_buf_debug_show(struct seq_file *s, void *unused)
>  {
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index 6fa761c9cc78..ccc7c40c8db7 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -16,6 +16,7 @@
>  #ifdef CONFIG_CMA
>  #include <linux/cma.h>
>  #endif
> +#include <linux/dma-buf.h>
>  #include <asm/page.h>
>  #include "internal.h"
>
> @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>         show_val_kb(m, "CmaFree:        ",
>                     global_zone_page_state(NR_FREE_CMA_PAGES));
>  #endif
> -
> +#ifdef CONFIG_DMA_SHARED_BUFFER
> +       show_val_kb(m, "DmaBufTotal:    ", dma_buf_allocated_pages());
> +#endif
>         hugetlb_report_meminfo(m);
>
>         arch_report_meminfo(m);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index efdc56b9d95f..5b05816bd2cd 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
>                  unsigned long);
>  int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
>  void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
> +long dma_buf_allocated_pages(void);
>  #endif /* __DMA_BUF_H__ */
> --
> 2.17.1
>
Peter.Enderborg@sony.com April 17, 2021, 1:43 p.m. UTC | #4
On 4/17/21 3:07 PM, Muchun Song wrote:
> On Sat, Apr 17, 2021 at 6:41 PM Peter Enderborg

> <peter.enderborg@sony.com> wrote:

>> This adds a total used dma-buf memory. Details

>> can be found in debugfs, however it is not for everyone

>> and not always available. dma-buf are indirect allocated by

>> userspace. So with this value we can monitor and detect

>> userspace applications that have problems.

>>

>> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>

>> ---

>>  drivers/dma-buf/dma-buf.c | 13 +++++++++++++

>>  fs/proc/meminfo.c         |  5 ++++-

>>  include/linux/dma-buf.h   |  1 +

>>  3 files changed, 18 insertions(+), 1 deletion(-)

>>

>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c

>> index f264b70c383e..197e5c45dd26 100644

>> --- a/drivers/dma-buf/dma-buf.c

>> +++ b/drivers/dma-buf/dma-buf.c

>> @@ -37,6 +37,7 @@ struct dma_buf_list {

>>  };

>>

>>  static struct dma_buf_list db_list;

>> +static atomic_long_t dma_buf_global_allocated;

>>

>>  static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)

>>  {

>> @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry)

>>         if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])

>>                 dma_resv_fini(dmabuf->resv);

>>

>> +       atomic_long_sub(dmabuf->size, &dma_buf_global_allocated);

>>         module_put(dmabuf->owner);

>>         kfree(dmabuf->name);

>>         kfree(dmabuf);

>> @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)

>>         mutex_lock(&db_list.lock);

>>         list_add(&dmabuf->list_node, &db_list.head);

>>         mutex_unlock(&db_list.lock);

>> +       atomic_long_add(dmabuf->size, &dma_buf_global_allocated);

>>

>>         return dmabuf;

>>

>> @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)

>>  }

>>  EXPORT_SYMBOL_GPL(dma_buf_vunmap);

>>

>> +/**

>> + * dma_buf_allocated_pages - Return the used nr of pages

>> + * allocated for dma-buf

>> + */

>> +long dma_buf_allocated_pages(void)

>> +{

>> +       return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT;

>> +}

>> +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages);

> dma_buf_allocated_pages is only called from fs/proc/meminfo.c.

> I am confused why it should be exported. If it won't be called

> from the driver module, we should not export it.


Ah. I thought you did not want the GPL restriction. I don't have real
opinion about it. It's written to be following the rest of the module.
It is not needed for the usage of dma-buf in kernel module. But I
don't see any reason for hiding it either.


> Thanks.

>

>> +

>>  #ifdef CONFIG_DEBUG_FS

>>  static int dma_buf_debug_show(struct seq_file *s, void *unused)

>>  {

>> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c

>> index 6fa761c9cc78..ccc7c40c8db7 100644

>> --- a/fs/proc/meminfo.c

>> +++ b/fs/proc/meminfo.c

>> @@ -16,6 +16,7 @@

>>  #ifdef CONFIG_CMA

>>  #include <linux/cma.h>

>>  #endif

>> +#include <linux/dma-buf.h>

>>  #include <asm/page.h>

>>  #include "internal.h"

>>

>> @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)

>>         show_val_kb(m, "CmaFree:        ",

>>                     global_zone_page_state(NR_FREE_CMA_PAGES));

>>  #endif

>> -

>> +#ifdef CONFIG_DMA_SHARED_BUFFER

>> +       show_val_kb(m, "DmaBufTotal:    ", dma_buf_allocated_pages());

>> +#endif

>>         hugetlb_report_meminfo(m);

>>

>>         arch_report_meminfo(m);

>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h

>> index efdc56b9d95f..5b05816bd2cd 100644

>> --- a/include/linux/dma-buf.h

>> +++ b/include/linux/dma-buf.h

>> @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,

>>                  unsigned long);

>>  int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map);

>>  void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map);

>> +long dma_buf_allocated_pages(void);

>>  #endif /* __DMA_BUF_H__ */

>> --

>> 2.17.1

>>
Michal Hocko April 19, 2021, 12:16 p.m. UTC | #5
On Sat 17-04-21 12:40:32, Peter Enderborg wrote:
> This adds a total used dma-buf memory. Details

> can be found in debugfs, however it is not for everyone

> and not always available. dma-buf are indirect allocated by

> userspace. So with this value we can monitor and detect

> userspace applications that have problems.


The changelog would benefit from more background on why this is needed,
and who is the primary consumer of that value.

I cannot really comment on the dma-buf internals but I have two remarks.
Documentation/filesystems/proc.rst needs an update with the counter
explanation and secondly is this information useful for OOM situations
analysis? If yes then show_mem should dump the value as well.

From the implementation point of view, is there any reason why this
hasn't used the existing global_node_page_state infrastructure?
-- 
Michal Hocko
SUSE Labs
Michal Hocko April 19, 2021, 3 p.m. UTC | #6
On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote:
> On 4/19/21 2:16 PM, Michal Hocko wrote:
> > On Sat 17-04-21 12:40:32, Peter Enderborg wrote:
> >> This adds a total used dma-buf memory. Details
> >> can be found in debugfs, however it is not for everyone
> >> and not always available. dma-buf are indirect allocated by
> >> userspace. So with this value we can monitor and detect
> >> userspace applications that have problems.
> > The changelog would benefit from more background on why this is needed,
> > and who is the primary consumer of that value.
> >
> > I cannot really comment on the dma-buf internals but I have two remarks.
> > Documentation/filesystems/proc.rst needs an update with the counter
> > explanation and secondly is this information useful for OOM situations
> > analysis? If yes then show_mem should dump the value as well.
> >
> > From the implementation point of view, is there any reason why this
> > hasn't used the existing global_node_page_state infrastructure?
> 
> I fix doc in next version.  Im not sure what you expect the commit message to include.

As I've said. Usual justification covers answers to following questions
	- Why do we need it?
	- Why the existing data is insuficient?
	- Who is supposed to use the data and for what?

I can see an answer for the first two questions (because this can be a
lot of memory and the existing infrastructure is not production suitable
- debugfs). But the changelog doesn't really explain who is going to use
the new data. Is this a monitoring to raise an early alarm when the
value grows? Is this for debugging misbehaving drivers? How is it
valuable for those?

> The function of the meminfo is: (From Documentation/filesystems/proc.rst)
> 
> "Provides information about distribution and utilization of memory."

True. Yet we do not export any random counters, do we?

> Im not the designed of dma-buf, I think  global_node_page_state as a kernel
> internal.

It provides a node specific and optimized counters. Is this a good fit
with your new counter? Or the NUMA locality is of no importance?

> dma-buf is a device driver that provides a function so I might be
> on the outside. However I also see that it might be relevant for a OOM.
> It is memory that can be freed by killing userspace processes.
> 
> The show_mem thing. Should it be a separate patch?

This is up to you but if you want to expose the counter then send it in
one series.
Peter.Enderborg@sony.com April 19, 2021, 3:19 p.m. UTC | #7
On 4/19/21 5:00 PM, Michal Hocko wrote:
> On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote:

>> On 4/19/21 2:16 PM, Michal Hocko wrote:

>>> On Sat 17-04-21 12:40:32, Peter Enderborg wrote:

>>>> This adds a total used dma-buf memory. Details

>>>> can be found in debugfs, however it is not for everyone

>>>> and not always available. dma-buf are indirect allocated by

>>>> userspace. So with this value we can monitor and detect

>>>> userspace applications that have problems.

>>> The changelog would benefit from more background on why this is needed,

>>> and who is the primary consumer of that value.

>>>

>>> I cannot really comment on the dma-buf internals but I have two remarks.

>>> Documentation/filesystems/proc.rst needs an update with the counter

>>> explanation and secondly is this information useful for OOM situations

>>> analysis? If yes then show_mem should dump the value as well.

>>>

>>> From the implementation point of view, is there any reason why this

>>> hasn't used the existing global_node_page_state infrastructure?

>> I fix doc in next version.  Im not sure what you expect the commit message to include.

> As I've said. Usual justification covers answers to following questions

> 	- Why do we need it?

> 	- Why the existing data is insuficient?

> 	- Who is supposed to use the data and for what?

>

> I can see an answer for the first two questions (because this can be a

> lot of memory and the existing infrastructure is not production suitable

> - debugfs). But the changelog doesn't really explain who is going to use

> the new data. Is this a monitoring to raise an early alarm when the

> value grows? Is this for debugging misbehaving drivers? How is it

> valuable for those?

>

>> The function of the meminfo is: (From Documentation/filesystems/proc.rst)

>>

>> "Provides information about distribution and utilization of memory."

> True. Yet we do not export any random counters, do we?

>

>> Im not the designed of dma-buf, I think  global_node_page_state as a kernel

>> internal.

> It provides a node specific and optimized counters. Is this a good fit

> with your new counter? Or the NUMA locality is of no importance?


Sounds good to me, if Christian Koenig think it is good, I will use that.
It is only virtio in drivers that use the global_node_page_state if
that matters.


>

>> dma-buf is a device driver that provides a function so I might be

>> on the outside. However I also see that it might be relevant for a OOM.

>> It is memory that can be freed by killing userspace processes.

>>

>> The show_mem thing. Should it be a separate patch?

> This is up to you but if you want to expose the counter then send it in

> one series.

>
Christian König April 19, 2021, 3:44 p.m. UTC | #8
Am 19.04.21 um 17:19 schrieb Peter.Enderborg@sony.com:
> On 4/19/21 5:00 PM, Michal Hocko wrote:

>> On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote:

>>> On 4/19/21 2:16 PM, Michal Hocko wrote:

>>>> On Sat 17-04-21 12:40:32, Peter Enderborg wrote:

>>>>> This adds a total used dma-buf memory. Details

>>>>> can be found in debugfs, however it is not for everyone

>>>>> and not always available. dma-buf are indirect allocated by

>>>>> userspace. So with this value we can monitor and detect

>>>>> userspace applications that have problems.

>>>> The changelog would benefit from more background on why this is needed,

>>>> and who is the primary consumer of that value.

>>>>

>>>> I cannot really comment on the dma-buf internals but I have two remarks.

>>>> Documentation/filesystems/proc.rst needs an update with the counter

>>>> explanation and secondly is this information useful for OOM situations

>>>> analysis? If yes then show_mem should dump the value as well.

>>>>

>>>>  From the implementation point of view, is there any reason why this

>>>> hasn't used the existing global_node_page_state infrastructure?

>>> I fix doc in next version.  Im not sure what you expect the commit message to include.

>> As I've said. Usual justification covers answers to following questions

>> 	- Why do we need it?

>> 	- Why the existing data is insuficient?

>> 	- Who is supposed to use the data and for what?

>>

>> I can see an answer for the first two questions (because this can be a

>> lot of memory and the existing infrastructure is not production suitable

>> - debugfs). But the changelog doesn't really explain who is going to use

>> the new data. Is this a monitoring to raise an early alarm when the

>> value grows? Is this for debugging misbehaving drivers? How is it

>> valuable for those?

>>

>>> The function of the meminfo is: (From Documentation/filesystems/proc.rst)

>>>

>>> "Provides information about distribution and utilization of memory."

>> True. Yet we do not export any random counters, do we?

>>

>>> Im not the designed of dma-buf, I think  global_node_page_state as a kernel

>>> internal.

>> It provides a node specific and optimized counters. Is this a good fit

>> with your new counter? Or the NUMA locality is of no importance?

> Sounds good to me, if Christian Koenig think it is good, I will use that.

> It is only virtio in drivers that use the global_node_page_state if

> that matters.


DMA-buf are not NUMA aware at all. On which node the pages are allocated 
(and if we use pages at all and not internal device memory) is up to the 
exporter and importer.

Christian.

>

>

>>> dma-buf is a device driver that provides a function so I might be

>>> on the outside. However I also see that it might be relevant for a OOM.

>>> It is memory that can be freed by killing userspace processes.

>>>

>>> The show_mem thing. Should it be a separate patch?

>> This is up to you but if you want to expose the counter then send it in

>> one series.

>>
Michal Hocko April 19, 2021, 4:11 p.m. UTC | #9
On Mon 19-04-21 17:44:13, Christian König wrote:
> Am 19.04.21 um 17:19 schrieb Peter.Enderborg@sony.com:

> > On 4/19/21 5:00 PM, Michal Hocko wrote:

> > > On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote:

> > > > On 4/19/21 2:16 PM, Michal Hocko wrote:

> > > > > On Sat 17-04-21 12:40:32, Peter Enderborg wrote:

> > > > > > This adds a total used dma-buf memory. Details

> > > > > > can be found in debugfs, however it is not for everyone

> > > > > > and not always available. dma-buf are indirect allocated by

> > > > > > userspace. So with this value we can monitor and detect

> > > > > > userspace applications that have problems.

> > > > > The changelog would benefit from more background on why this is needed,

> > > > > and who is the primary consumer of that value.

> > > > > 

> > > > > I cannot really comment on the dma-buf internals but I have two remarks.

> > > > > Documentation/filesystems/proc.rst needs an update with the counter

> > > > > explanation and secondly is this information useful for OOM situations

> > > > > analysis? If yes then show_mem should dump the value as well.

> > > > > 

> > > > >  From the implementation point of view, is there any reason why this

> > > > > hasn't used the existing global_node_page_state infrastructure?

> > > > I fix doc in next version.  Im not sure what you expect the commit message to include.

> > > As I've said. Usual justification covers answers to following questions

> > > 	- Why do we need it?

> > > 	- Why the existing data is insuficient?

> > > 	- Who is supposed to use the data and for what?

> > > 

> > > I can see an answer for the first two questions (because this can be a

> > > lot of memory and the existing infrastructure is not production suitable

> > > - debugfs). But the changelog doesn't really explain who is going to use

> > > the new data. Is this a monitoring to raise an early alarm when the

> > > value grows? Is this for debugging misbehaving drivers? How is it

> > > valuable for those?

> > > 

> > > > The function of the meminfo is: (From Documentation/filesystems/proc.rst)

> > > > 

> > > > "Provides information about distribution and utilization of memory."

> > > True. Yet we do not export any random counters, do we?

> > > 

> > > > Im not the designed of dma-buf, I think  global_node_page_state as a kernel

> > > > internal.

> > > It provides a node specific and optimized counters. Is this a good fit

> > > with your new counter? Or the NUMA locality is of no importance?

> > Sounds good to me, if Christian Koenig think it is good, I will use that.

> > It is only virtio in drivers that use the global_node_page_state if

> > that matters.

> 

> DMA-buf are not NUMA aware at all. On which node the pages are allocated

> (and if we use pages at all and not internal device memory) is up to the

> exporter and importer.


The question is not whether it is NUMA aware but whether it is useful to
know per-numa data for the purpose the counter is supposed to serve.
-- 
Michal Hocko
SUSE Labs
Christian König April 19, 2021, 4:37 p.m. UTC | #10
Am 19.04.21 um 18:11 schrieb Michal Hocko:
> On Mon 19-04-21 17:44:13, Christian König wrote:

>> Am 19.04.21 um 17:19 schrieb Peter.Enderborg@sony.com:

>>> On 4/19/21 5:00 PM, Michal Hocko wrote:

>>>> On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote:

>>>>> On 4/19/21 2:16 PM, Michal Hocko wrote:

>>>>>> On Sat 17-04-21 12:40:32, Peter Enderborg wrote:

>>>>>>> This adds a total used dma-buf memory. Details

>>>>>>> can be found in debugfs, however it is not for everyone

>>>>>>> and not always available. dma-buf are indirect allocated by

>>>>>>> userspace. So with this value we can monitor and detect

>>>>>>> userspace applications that have problems.

>>>>>> The changelog would benefit from more background on why this is needed,

>>>>>> and who is the primary consumer of that value.

>>>>>>

>>>>>> I cannot really comment on the dma-buf internals but I have two remarks.

>>>>>> Documentation/filesystems/proc.rst needs an update with the counter

>>>>>> explanation and secondly is this information useful for OOM situations

>>>>>> analysis? If yes then show_mem should dump the value as well.

>>>>>>

>>>>>>   From the implementation point of view, is there any reason why this

>>>>>> hasn't used the existing global_node_page_state infrastructure?

>>>>> I fix doc in next version.  Im not sure what you expect the commit message to include.

>>>> As I've said. Usual justification covers answers to following questions

>>>> 	- Why do we need it?

>>>> 	- Why the existing data is insuficient?

>>>> 	- Who is supposed to use the data and for what?

>>>>

>>>> I can see an answer for the first two questions (because this can be a

>>>> lot of memory and the existing infrastructure is not production suitable

>>>> - debugfs). But the changelog doesn't really explain who is going to use

>>>> the new data. Is this a monitoring to raise an early alarm when the

>>>> value grows? Is this for debugging misbehaving drivers? How is it

>>>> valuable for those?

>>>>

>>>>> The function of the meminfo is: (From Documentation/filesystems/proc.rst)

>>>>>

>>>>> "Provides information about distribution and utilization of memory."

>>>> True. Yet we do not export any random counters, do we?

>>>>

>>>>> Im not the designed of dma-buf, I think  global_node_page_state as a kernel

>>>>> internal.

>>>> It provides a node specific and optimized counters. Is this a good fit

>>>> with your new counter? Or the NUMA locality is of no importance?

>>> Sounds good to me, if Christian Koenig think it is good, I will use that.

>>> It is only virtio in drivers that use the global_node_page_state if

>>> that matters.

>> DMA-buf are not NUMA aware at all. On which node the pages are allocated

>> (and if we use pages at all and not internal device memory) is up to the

>> exporter and importer.

> The question is not whether it is NUMA aware but whether it is useful to

> know per-numa data for the purpose the counter is supposed to serve.


No, not at all. The pages of a single DMA-buf could even be from 
different NUMA nodes if the exporting driver decides that this is 
somehow useful.

Christian.
Michal Hocko April 20, 2021, 7:04 a.m. UTC | #11
On Mon 19-04-21 18:37:13, Christian König wrote:
> Am 19.04.21 um 18:11 schrieb Michal Hocko:

[...]
> > The question is not whether it is NUMA aware but whether it is useful to

> > know per-numa data for the purpose the counter is supposed to serve.

> 

> No, not at all. The pages of a single DMA-buf could even be from different

> NUMA nodes if the exporting driver decides that this is somehow useful.


As the use of the counter hasn't been explained yet I can only
speculate. One thing that I can imagine to be useful is to fill gaps in
our accounting. It is quite often that the memroy accounted in
/proc/meminfo (or oom report) doesn't add up to the overall memory
usage. In some workloads the workload can be huge! In many cases there
are other means to find out additional memory by a subsystem specific
interfaces (e.g. networking buffers). I do assume that dma-buf is just
one of those and the counter can fill the said gap at least partially
for some workloads. That is definitely useful.

What I am trying to bring up with NUMA side is that the same problem can
happen on per-node basis. Let's say that some user consumes unexpectedly
large amount of dma-buf on a certain node. This can lead to observable
performance impact on anybody on allocating from that node and even
worse cause an OOM for node bound consumers. How do I find out that it
was dma-buf that has caused the problem?

See where I am heading?
-- 
Michal Hocko
SUSE Labs
Mike Rapoport April 20, 2021, 7:20 a.m. UTC | #12
On Tue, Apr 20, 2021 at 09:04:51AM +0200, Michal Hocko wrote:
> On Mon 19-04-21 18:37:13, Christian König wrote:

> > Am 19.04.21 um 18:11 schrieb Michal Hocko:

> [...]

> > > The question is not whether it is NUMA aware but whether it is useful to

> > > know per-numa data for the purpose the counter is supposed to serve.

> > 

> > No, not at all. The pages of a single DMA-buf could even be from different

> > NUMA nodes if the exporting driver decides that this is somehow useful.

> 

> As the use of the counter hasn't been explained yet I can only

> speculate. One thing that I can imagine to be useful is to fill gaps in

> our accounting. It is quite often that the memroy accounted in

> /proc/meminfo (or oom report) doesn't add up to the overall memory

> usage. In some workloads the workload can be huge! In many cases there

> are other means to find out additional memory by a subsystem specific

> interfaces (e.g. networking buffers). I do assume that dma-buf is just

> one of those and the counter can fill the said gap at least partially

> for some workloads. That is definitely useful.


A bit off-topic.

Michal, I think it would have been nice to have an explanation like above
in Documentation/proc/meminfo, what do you say?
 
> What I am trying to bring up with NUMA side is that the same problem can

> happen on per-node basis. Let's say that some user consumes unexpectedly

> large amount of dma-buf on a certain node. This can lead to observable

> performance impact on anybody on allocating from that node and even

> worse cause an OOM for node bound consumers. How do I find out that it

> was dma-buf that has caused the problem?

> 

> See where I am heading?


-- 
Sincerely yours,
Mike.
Christian König April 20, 2021, 7:32 a.m. UTC | #13
Am 20.04.21 um 09:04 schrieb Michal Hocko:
> On Mon 19-04-21 18:37:13, Christian König wrote:
>> Am 19.04.21 um 18:11 schrieb Michal Hocko:
> [...]
>>> The question is not whether it is NUMA aware but whether it is useful to
>>> know per-numa data for the purpose the counter is supposed to serve.
>> No, not at all. The pages of a single DMA-buf could even be from different
>> NUMA nodes if the exporting driver decides that this is somehow useful.
> As the use of the counter hasn't been explained yet I can only
> speculate. One thing that I can imagine to be useful is to fill gaps in
> our accounting. It is quite often that the memroy accounted in
> /proc/meminfo (or oom report) doesn't add up to the overall memory
> usage. In some workloads the workload can be huge! In many cases there
> are other means to find out additional memory by a subsystem specific
> interfaces (e.g. networking buffers). I do assume that dma-buf is just
> one of those and the counter can fill the said gap at least partially
> for some workloads. That is definitely useful.

Yes, completely agree. I'm just not 100% sure if the DMA-buf framework 
should account for that or the individual drivers exporting DMA-bufs.

See below for a further explanation.

> What I am trying to bring up with NUMA side is that the same problem can
> happen on per-node basis. Let's say that some user consumes unexpectedly
> large amount of dma-buf on a certain node. This can lead to observable
> performance impact on anybody on allocating from that node and even
> worse cause an OOM for node bound consumers. How do I find out that it
> was dma-buf that has caused the problem?

Yes, that is the direction my thinking goes as well, but also even further.

See DMA-buf is also used to share device local memory between processes 
as well. In other words VRAM on graphics hardware.

On my test system here I have 32GB of system memory and 16GB of VRAM. I 
can use DMA-buf to allocate that 16GB of VRAM quite easily which then 
shows up under /proc/meminfo as used memory.

But that isn't really system memory at all, it's just allocated device 
memory.

> See where I am heading?

Yeah, totally. Thanks for pointing this out.

Suggestions how to handle that?

Regards,
Christian.
Michal Hocko April 20, 2021, 7:46 a.m. UTC | #14
On Tue 20-04-21 09:32:14, Christian König wrote:
> Am 20.04.21 um 09:04 schrieb Michal Hocko:

> > On Mon 19-04-21 18:37:13, Christian König wrote:

> > > Am 19.04.21 um 18:11 schrieb Michal Hocko:

[...]
> > What I am trying to bring up with NUMA side is that the same problem can

> > happen on per-node basis. Let's say that some user consumes unexpectedly

> > large amount of dma-buf on a certain node. This can lead to observable

> > performance impact on anybody on allocating from that node and even

> > worse cause an OOM for node bound consumers. How do I find out that it

> > was dma-buf that has caused the problem?

> 

> Yes, that is the direction my thinking goes as well, but also even further.

> 

> See DMA-buf is also used to share device local memory between processes as

> well. In other words VRAM on graphics hardware.

> 

> On my test system here I have 32GB of system memory and 16GB of VRAM. I can

> use DMA-buf to allocate that 16GB of VRAM quite easily which then shows up

> under /proc/meminfo as used memory.


This is something that would be really interesting in the changelog. I
mean the expected and extreme memory consumption of this memory. Ideally
with some hints on what to do when the number is really high (e.g. mount
debugfs and have a look here and there to check whether this is just too
many users or an unexpected pattern to be reported).

> But that isn't really system memory at all, it's just allocated device

> memory.


OK, that was not really clear to me. So this is not really accounted to
MemTotal? If that is really the case then reporting it into the oom
report is completely pointless and I am not even sure /proc/meminfo is
the right interface either. It would just add more confusion I am
afraid.
 
> > See where I am heading?

> 

> Yeah, totally. Thanks for pointing this out.

> 

> Suggestions how to handle that?


As I've pointed out in previous reply we do have an API to account per
node memory but now that you have brought up that this is not something
we account as a regular memory then this doesn't really fit into that
model. But maybe I am just confused.
-- 
Michal Hocko
SUSE Labs
Michal Hocko April 20, 2021, 7:47 a.m. UTC | #15
On Tue 20-04-21 10:20:43, Mike Rapoport wrote:
> On Tue, Apr 20, 2021 at 09:04:51AM +0200, Michal Hocko wrote:

> > On Mon 19-04-21 18:37:13, Christian König wrote:

> > > Am 19.04.21 um 18:11 schrieb Michal Hocko:

> > [...]

> > > > The question is not whether it is NUMA aware but whether it is useful to

> > > > know per-numa data for the purpose the counter is supposed to serve.

> > > 

> > > No, not at all. The pages of a single DMA-buf could even be from different

> > > NUMA nodes if the exporting driver decides that this is somehow useful.

> > 

> > As the use of the counter hasn't been explained yet I can only

> > speculate. One thing that I can imagine to be useful is to fill gaps in

> > our accounting. It is quite often that the memroy accounted in

> > /proc/meminfo (or oom report) doesn't add up to the overall memory

> > usage. In some workloads the workload can be huge! In many cases there

> > are other means to find out additional memory by a subsystem specific

> > interfaces (e.g. networking buffers). I do assume that dma-buf is just

> > one of those and the counter can fill the said gap at least partially

> > for some workloads. That is definitely useful.

> 

> A bit off-topic.

> 

> Michal, I think it would have been nice to have an explanation like above

> in Documentation/proc/meminfo, what do you say?


Not sure which specific parts (likely the unaccounted memory?) but sure
why not. Our /proc/meminfo is rather underdocumented. More information
cannot hurt.
-- 
Michal Hocko
SUSE Labs
Christian König April 20, 2021, 8 a.m. UTC | #16
Am 20.04.21 um 09:46 schrieb Michal Hocko:
> On Tue 20-04-21 09:32:14, Christian König wrote:

>> Am 20.04.21 um 09:04 schrieb Michal Hocko:

>>> On Mon 19-04-21 18:37:13, Christian König wrote:

>>>> Am 19.04.21 um 18:11 schrieb Michal Hocko:

> [...]

>>> What I am trying to bring up with NUMA side is that the same problem can

>>> happen on per-node basis. Let's say that some user consumes unexpectedly

>>> large amount of dma-buf on a certain node. This can lead to observable

>>> performance impact on anybody on allocating from that node and even

>>> worse cause an OOM for node bound consumers. How do I find out that it

>>> was dma-buf that has caused the problem?

>> Yes, that is the direction my thinking goes as well, but also even further.

>>

>> See DMA-buf is also used to share device local memory between processes as

>> well. In other words VRAM on graphics hardware.

>>

>> On my test system here I have 32GB of system memory and 16GB of VRAM. I can

>> use DMA-buf to allocate that 16GB of VRAM quite easily which then shows up

>> under /proc/meminfo as used memory.

> This is something that would be really interesting in the changelog. I

> mean the expected and extreme memory consumption of this memory. Ideally

> with some hints on what to do when the number is really high (e.g. mount

> debugfs and have a look here and there to check whether this is just too

> many users or an unexpected pattern to be reported).

>

>> But that isn't really system memory at all, it's just allocated device

>> memory.

> OK, that was not really clear to me. So this is not really accounted to

> MemTotal?


It depends. In a lot of embedded systems you only have system memory and 
in this case that value here is indeed really useful.

> If that is really the case then reporting it into the oom

> report is completely pointless and I am not even sure /proc/meminfo is

> the right interface either. It would just add more confusion I am

> afraid.


I kind of agree. As I said a DMA-buf could be backed by system memory or 
device memory.

In the case when it is backed by system memory it does make sense to 
report this in an OOM dump.

But only the exporting driver knows what the DMA-buf handle represents, 
the framework just provides the common ground for inter driver 
communication.

>>> See where I am heading?

>> Yeah, totally. Thanks for pointing this out.

>>

>> Suggestions how to handle that?

> As I've pointed out in previous reply we do have an API to account per

> node memory but now that you have brought up that this is not something

> we account as a regular memory then this doesn't really fit into that

> model. But maybe I am just confused.


Well does that API also has a counter for memory used by device drivers?

If yes then the device driver who exported the DMA-buf should probably 
use that API. If no we might want to create one.

I mean the author of this patch seems to have an use case where this is 
needed and I also see that we have some hole in how we account memory.

Christian.
Michal Hocko April 20, 2021, 8:28 a.m. UTC | #17
On Tue 20-04-21 10:00:07, Christian König wrote:
> Am 20.04.21 um 09:46 schrieb Michal Hocko:
> > On Tue 20-04-21 09:32:14, Christian König wrote:
> > > Am 20.04.21 um 09:04 schrieb Michal Hocko:
> > > > On Mon 19-04-21 18:37:13, Christian König wrote:
> > > > > Am 19.04.21 um 18:11 schrieb Michal Hocko:
> > [...]
> > > > What I am trying to bring up with NUMA side is that the same problem can
> > > > happen on per-node basis. Let's say that some user consumes unexpectedly
> > > > large amount of dma-buf on a certain node. This can lead to observable
> > > > performance impact on anybody on allocating from that node and even
> > > > worse cause an OOM for node bound consumers. How do I find out that it
> > > > was dma-buf that has caused the problem?
> > > Yes, that is the direction my thinking goes as well, but also even further.
> > > 
> > > See DMA-buf is also used to share device local memory between processes as
> > > well. In other words VRAM on graphics hardware.
> > > 
> > > On my test system here I have 32GB of system memory and 16GB of VRAM. I can
> > > use DMA-buf to allocate that 16GB of VRAM quite easily which then shows up
> > > under /proc/meminfo as used memory.
> > This is something that would be really interesting in the changelog. I
> > mean the expected and extreme memory consumption of this memory. Ideally
> > with some hints on what to do when the number is really high (e.g. mount
> > debugfs and have a look here and there to check whether this is just too
> > many users or an unexpected pattern to be reported).
> > 
> > > But that isn't really system memory at all, it's just allocated device
> > > memory.
> > OK, that was not really clear to me. So this is not really accounted to
> > MemTotal?
> 
> It depends. In a lot of embedded systems you only have system memory and in
> this case that value here is indeed really useful.
> 
> > If that is really the case then reporting it into the oom
> > report is completely pointless and I am not even sure /proc/meminfo is
> > the right interface either. It would just add more confusion I am
> > afraid.
> 
> I kind of agree. As I said a DMA-buf could be backed by system memory or
> device memory.
> 
> In the case when it is backed by system memory it does make sense to report
> this in an OOM dump.
> 
> But only the exporting driver knows what the DMA-buf handle represents, the
> framework just provides the common ground for inter driver communication.

Then those drivers need to account for meminfo/oom report purposes.

> > > > See where I am heading?
> > > Yeah, totally. Thanks for pointing this out.
> > > 
> > > Suggestions how to handle that?
> > As I've pointed out in previous reply we do have an API to account per
> > node memory but now that you have brought up that this is not something
> > we account as a regular memory then this doesn't really fit into that
> > model. But maybe I am just confused.
> 
> Well does that API also has a counter for memory used by device drivers?

I think that "memory used by device drivers" is immaterial. The only
important thing is to account that memory where it makes sense. So for
RAM based allocations to report them via meminfo and find other way to
report device memory allocations.
Daniel Vetter April 20, 2021, 8:39 a.m. UTC | #18
On Sat, Apr 17, 2021 at 01:54:08PM +0200, Christian König wrote:
> Am 17.04.21 um 13:20 schrieb Peter.Enderborg@sony.com:
> > On 4/17/21 12:59 PM, Christian König wrote:
> > > Am 17.04.21 um 12:40 schrieb Peter Enderborg:
> > > > This adds a total used dma-buf memory. Details
> > > > can be found in debugfs, however it is not for everyone
> > > > and not always available. dma-buf are indirect allocated by
> > > > userspace. So with this value we can monitor and detect
> > > > userspace applications that have problems.
> > > > 
> > > > Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
> > > Reviewed-by: Christian König <christian.koenig@amd.com>
> > > 
> > > How do you want to upstream this?
> > I don't understand that question. The patch applies on Torvalds 5.12-rc7,
> > but I guess 5.13 is what we work on right now.
> 
> Yeah, but how do you want to get it into Linus tree?
> 
> I can push it together with other DMA-buf patches through drm-misc-next and
> then Dave will send it to Linus for inclusion in 5.13.

Small correction, we've already frozen for the merge window so this will
land in 5.14.
-Daniel

> 
> But could be that you are pushing multiple changes towards Linus through
> some other branch. In this case I'm fine if you pick that way instead if you
> want to keep your patches together for some reason.
> 
> Christian.
> 
> > 
> > > > ---
> > > >    drivers/dma-buf/dma-buf.c | 13 +++++++++++++
> > > >    fs/proc/meminfo.c         |  5 ++++-
> > > >    include/linux/dma-buf.h   |  1 +
> > > >    3 files changed, 18 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> > > > index f264b70c383e..197e5c45dd26 100644
> > > > --- a/drivers/dma-buf/dma-buf.c
> > > > +++ b/drivers/dma-buf/dma-buf.c
> > > > @@ -37,6 +37,7 @@ struct dma_buf_list {
> > > >    };
> > > >      static struct dma_buf_list db_list;
> > > > +static atomic_long_t dma_buf_global_allocated;
> > > >      static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
> > > >    {
> > > > @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry)
> > > >        if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
> > > >            dma_resv_fini(dmabuf->resv);
> > > >    +    atomic_long_sub(dmabuf->size, &dma_buf_global_allocated);
> > > >        module_put(dmabuf->owner);
> > > >        kfree(dmabuf->name);
> > > >        kfree(dmabuf);
> > > > @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
> > > >        mutex_lock(&db_list.lock);
> > > >        list_add(&dmabuf->list_node, &db_list.head);
> > > >        mutex_unlock(&db_list.lock);
> > > > +    atomic_long_add(dmabuf->size, &dma_buf_global_allocated);
> > > >          return dmabuf;
> > > >    @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
> > > >    }
> > > >    EXPORT_SYMBOL_GPL(dma_buf_vunmap);
> > > >    +/**
> > > > + * dma_buf_allocated_pages - Return the used nr of pages
> > > > + * allocated for dma-buf
> > > > + */
> > > > +long dma_buf_allocated_pages(void)
> > > > +{
> > > > +    return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages);
> > > > +
> > > >    #ifdef CONFIG_DEBUG_FS
> > > >    static int dma_buf_debug_show(struct seq_file *s, void *unused)
> > > >    {
> > > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> > > > index 6fa761c9cc78..ccc7c40c8db7 100644
> > > > --- a/fs/proc/meminfo.c
> > > > +++ b/fs/proc/meminfo.c
> > > > @@ -16,6 +16,7 @@
> > > >    #ifdef CONFIG_CMA
> > > >    #include <linux/cma.h>
> > > >    #endif
> > > > +#include <linux/dma-buf.h>
> > > >    #include <asm/page.h>
> > > >    #include "internal.h"
> > > >    @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> > > >        show_val_kb(m, "CmaFree:        ",
> > > >                global_zone_page_state(NR_FREE_CMA_PAGES));
> > > >    #endif
> > > > -
> > > > +#ifdef CONFIG_DMA_SHARED_BUFFER
> > > > +    show_val_kb(m, "DmaBufTotal:    ", dma_buf_allocated_pages());
> > > > +#endif
> > > >        hugetlb_report_meminfo(m);
> > > >          arch_report_meminfo(m);
> > > > diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> > > > index efdc56b9d95f..5b05816bd2cd 100644
> > > > --- a/include/linux/dma-buf.h
> > > > +++ b/include/linux/dma-buf.h
> > > > @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
> > > >             unsigned long);
> > > >    int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
> > > >    void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
> > > > +long dma_buf_allocated_pages(void);
> > > >    #endif /* __DMA_BUF_H__ */
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Peter.Enderborg@sony.com April 20, 2021, 9:02 a.m. UTC | #19
>> But that isn't really system memory at all, it's just allocated device

>> memory.

> OK, that was not really clear to me. So this is not really accounted to

> MemTotal? If that is really the case then reporting it into the oom

> report is completely pointless and I am not even sure /proc/meminfo is

> the right interface either. It would just add more confusion I am

> afraid.

>  


Why is it confusing? Documentation is quite clear:

"Provides information about distribution and utilization of memory. This
varies by architecture and compile options."

A topology with VRAM fits very well on that. The point is to have an
overview.


>>> See where I am heading?

>> Yeah, totally. Thanks for pointing this out.

>>

>> Suggestions how to handle that?

> As I've pointed out in previous reply we do have an API to account per

> node memory but now that you have brought up that this is not something

> we account as a regular memory then this doesn't really fit into that

> model. But maybe I am just confused.
Michal Hocko April 20, 2021, 9:12 a.m. UTC | #20
On Tue 20-04-21 09:02:57, Peter.Enderborg@sony.com wrote:
> 
> >> But that isn't really system memory at all, it's just allocated device
> >> memory.
> > OK, that was not really clear to me. So this is not really accounted to
> > MemTotal? If that is really the case then reporting it into the oom
> > report is completely pointless and I am not even sure /proc/meminfo is
> > the right interface either. It would just add more confusion I am
> > afraid.
> >  
> 
> Why is it confusing? Documentation is quite clear:

Because a single counter without a wider context cannot be put into any
reasonable context. There is no notion of the total amount of device
memory usable for dma-buf. As Christian explained some of it can be RAM
based. So a single number is rather pointless on its own in many cases.

Or let me just ask. What can you tell from dma-bud: $FOO kB in its
current form?
Peter.Enderborg@sony.com April 20, 2021, 9:25 a.m. UTC | #21
On 4/20/21 11:12 AM, Michal Hocko wrote:
> On Tue 20-04-21 09:02:57, Peter.Enderborg@sony.com wrote:

>>>> But that isn't really system memory at all, it's just allocated device

>>>> memory.

>>> OK, that was not really clear to me. So this is not really accounted to

>>> MemTotal? If that is really the case then reporting it into the oom

>>> report is completely pointless and I am not even sure /proc/meminfo is

>>> the right interface either. It would just add more confusion I am

>>> afraid.

>>>  

>> Why is it confusing? Documentation is quite clear:

> Because a single counter without a wider context cannot be put into any

> reasonable context. There is no notion of the total amount of device

> memory usable for dma-buf. As Christian explained some of it can be RAM

> based. So a single number is rather pointless on its own in many cases.

>

> Or let me just ask. What can you tell from dma-bud: $FOO kB in its

> current form?


It is better to be blind? The value can still be used a relative metric.
You collect the data and see how it change. And when you see
a unexpected change you start to dig in. It fore sure wont tell what line
in your application that has a bug.  But it might be an indicator that
a new game trigger a leak. And it is very well specified, it exactly the
size of mapped dma-buf. For what you use dma-buf you need to know
other parts of the system.
Michal Hocko April 20, 2021, 11:04 a.m. UTC | #22
On Tue 20-04-21 09:25:51, Peter.Enderborg@sony.com wrote:
> On 4/20/21 11:12 AM, Michal Hocko wrote:
> > On Tue 20-04-21 09:02:57, Peter.Enderborg@sony.com wrote:
> >>>> But that isn't really system memory at all, it's just allocated device
> >>>> memory.
> >>> OK, that was not really clear to me. So this is not really accounted to
> >>> MemTotal? If that is really the case then reporting it into the oom
> >>> report is completely pointless and I am not even sure /proc/meminfo is
> >>> the right interface either. It would just add more confusion I am
> >>> afraid.
> >>>  
> >> Why is it confusing? Documentation is quite clear:
> > Because a single counter without a wider context cannot be put into any
> > reasonable context. There is no notion of the total amount of device
> > memory usable for dma-buf. As Christian explained some of it can be RAM
> > based. So a single number is rather pointless on its own in many cases.
> >
> > Or let me just ask. What can you tell from dma-bud: $FOO kB in its
> > current form?
> 
> It is better to be blind?

No it is better to have a sensible counter that can be reasoned about.
So far you are only claiming that having something is better than
nothing and I would agree with you if that was a debugging one off
interface. But /proc/meminfo and other proc files have to be maintained
with future portability in mind. This is not a dumping ground for _some_
counters that might be interesting at the _current_ moment. E.g. what
happens if somebody wants to have a per device resp. memory based
dma-buf data? Are you going to change the semantic or add another
2 counters?
Peter.Enderborg@sony.com April 20, 2021, 11:24 a.m. UTC | #23
On 4/20/21 1:04 PM, Michal Hocko wrote:
> On Tue 20-04-21 09:25:51, Peter.Enderborg@sony.com wrote:

>> On 4/20/21 11:12 AM, Michal Hocko wrote:

>>> On Tue 20-04-21 09:02:57, Peter.Enderborg@sony.com wrote:

>>>>>> But that isn't really system memory at all, it's just allocated device

>>>>>> memory.

>>>>> OK, that was not really clear to me. So this is not really accounted to

>>>>> MemTotal? If that is really the case then reporting it into the oom

>>>>> report is completely pointless and I am not even sure /proc/meminfo is

>>>>> the right interface either. It would just add more confusion I am

>>>>> afraid.

>>>>>  

>>>> Why is it confusing? Documentation is quite clear:

>>> Because a single counter without a wider context cannot be put into any

>>> reasonable context. There is no notion of the total amount of device

>>> memory usable for dma-buf. As Christian explained some of it can be RAM

>>> based. So a single number is rather pointless on its own in many cases.

>>>

>>> Or let me just ask. What can you tell from dma-bud: $FOO kB in its

>>> current form?

>> It is better to be blind?

> No it is better to have a sensible counter that can be reasoned about.

> So far you are only claiming that having something is better than

> nothing and I would agree with you if that was a debugging one off

> interface. But /proc/meminfo and other proc files have to be maintained

> with future portability in mind. This is not a dumping ground for _some_

> counters that might be interesting at the _current_ moment. E.g. what

> happens if somebody wants to have a per device resp. memory based

> dma-buf data? Are you going to change the semantic or add another

> 2 counters?


This is the DmaBufTotal. It is the upper limit. If is not there is  is something else.

And when we have a better resolution on measuring it, it would make sense
to add a DmaBufVram, DmaBufMemGC or what ever we can pickup.

This is what we can measure today.
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f264b70c383e..197e5c45dd26 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -37,6 +37,7 @@  struct dma_buf_list {
 };
 
 static struct dma_buf_list db_list;
+static atomic_long_t dma_buf_global_allocated;
 
 static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
@@ -79,6 +80,7 @@  static void dma_buf_release(struct dentry *dentry)
 	if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
 		dma_resv_fini(dmabuf->resv);
 
+	atomic_long_sub(dmabuf->size, &dma_buf_global_allocated);
 	module_put(dmabuf->owner);
 	kfree(dmabuf->name);
 	kfree(dmabuf);
@@ -586,6 +588,7 @@  struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	mutex_lock(&db_list.lock);
 	list_add(&dmabuf->list_node, &db_list.head);
 	mutex_unlock(&db_list.lock);
+	atomic_long_add(dmabuf->size, &dma_buf_global_allocated);
 
 	return dmabuf;
 
@@ -1346,6 +1349,16 @@  void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
 }
 EXPORT_SYMBOL_GPL(dma_buf_vunmap);
 
+/**
+ * dma_buf_allocated_pages - Return the used nr of pages
+ * allocated for dma-buf
+ */
+long dma_buf_allocated_pages(void)
+{
+	return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dma_buf_allocated_pages);
+
 #ifdef CONFIG_DEBUG_FS
 static int dma_buf_debug_show(struct seq_file *s, void *unused)
 {
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6fa761c9cc78..ccc7c40c8db7 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -16,6 +16,7 @@ 
 #ifdef CONFIG_CMA
 #include <linux/cma.h>
 #endif
+#include <linux/dma-buf.h>
 #include <asm/page.h>
 #include "internal.h"
 
@@ -145,7 +146,9 @@  static int meminfo_proc_show(struct seq_file *m, void *v)
 	show_val_kb(m, "CmaFree:        ",
 		    global_zone_page_state(NR_FREE_CMA_PAGES));
 #endif
-
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	show_val_kb(m, "DmaBufTotal:    ", dma_buf_allocated_pages());
+#endif
 	hugetlb_report_meminfo(m);
 
 	arch_report_meminfo(m);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index efdc56b9d95f..5b05816bd2cd 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -507,4 +507,5 @@  int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
 		 unsigned long);
 int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
 void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
+long dma_buf_allocated_pages(void);
 #endif /* __DMA_BUF_H__ */