Message ID | 20210417104032.5521-1-peter.enderborg@sony.com |
---|---|
State | Superseded |
Headers | show |
Series | [v4] dma-buf: Add DmaBufTotal counter in meminfo | expand |
Am 17.04.21 um 12:40 schrieb Peter Enderborg: > This adds a total used dma-buf memory. Details > can be found in debugfs, however it is not for everyone > and not always available. dma-buf are indirect allocated by > userspace. So with this value we can monitor and detect > userspace applications that have problems. > > Signed-off-by: Peter Enderborg <peter.enderborg@sony.com> Reviewed-by: Christian König <christian.koenig@amd.com> How do you want to upstream this? > --- > drivers/dma-buf/dma-buf.c | 13 +++++++++++++ > fs/proc/meminfo.c | 5 ++++- > include/linux/dma-buf.h | 1 + > 3 files changed, 18 insertions(+), 1 deletion(-) > > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c > index f264b70c383e..197e5c45dd26 100644 > --- a/drivers/dma-buf/dma-buf.c > +++ b/drivers/dma-buf/dma-buf.c > @@ -37,6 +37,7 @@ struct dma_buf_list { > }; > > static struct dma_buf_list db_list; > +static atomic_long_t dma_buf_global_allocated; > > static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) > { > @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry) > if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) > dma_resv_fini(dmabuf->resv); > > + atomic_long_sub(dmabuf->size, &dma_buf_global_allocated); > module_put(dmabuf->owner); > kfree(dmabuf->name); > kfree(dmabuf); > @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) > mutex_lock(&db_list.lock); > list_add(&dmabuf->list_node, &db_list.head); > mutex_unlock(&db_list.lock); > + atomic_long_add(dmabuf->size, &dma_buf_global_allocated); > > return dmabuf; > > @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map) > } > EXPORT_SYMBOL_GPL(dma_buf_vunmap); > > +/** > + * dma_buf_allocated_pages - Return the used nr of pages > + * allocated for dma-buf > + */ > +long dma_buf_allocated_pages(void) > +{ > + return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT; > +} > +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages); > + > #ifdef CONFIG_DEBUG_FS > static int dma_buf_debug_show(struct seq_file *s, void *unused) > { > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > index 6fa761c9cc78..ccc7c40c8db7 100644 > --- a/fs/proc/meminfo.c > +++ b/fs/proc/meminfo.c > @@ -16,6 +16,7 @@ > #ifdef CONFIG_CMA > #include <linux/cma.h> > #endif > +#include <linux/dma-buf.h> > #include <asm/page.h> > #include "internal.h" > > @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > show_val_kb(m, "CmaFree: ", > global_zone_page_state(NR_FREE_CMA_PAGES)); > #endif > - > +#ifdef CONFIG_DMA_SHARED_BUFFER > + show_val_kb(m, "DmaBufTotal: ", dma_buf_allocated_pages()); > +#endif > hugetlb_report_meminfo(m); > > arch_report_meminfo(m); > diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h > index efdc56b9d95f..5b05816bd2cd 100644 > --- a/include/linux/dma-buf.h > +++ b/include/linux/dma-buf.h > @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, > unsigned long); > int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); > void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); > +long dma_buf_allocated_pages(void); > #endif /* __DMA_BUF_H__ */
Am 17.04.21 um 13:20 schrieb Peter.Enderborg@sony.com: > On 4/17/21 12:59 PM, Christian König wrote: >> Am 17.04.21 um 12:40 schrieb Peter Enderborg: >>> This adds a total used dma-buf memory. Details >>> can be found in debugfs, however it is not for everyone >>> and not always available. dma-buf are indirect allocated by >>> userspace. So with this value we can monitor and detect >>> userspace applications that have problems. >>> >>> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com> >> Reviewed-by: Christian König <christian.koenig@amd.com> >> >> How do you want to upstream this? > I don't understand that question. The patch applies on Torvalds 5.12-rc7, > but I guess 5.13 is what we work on right now. Yeah, but how do you want to get it into Linus tree? I can push it together with other DMA-buf patches through drm-misc-next and then Dave will send it to Linus for inclusion in 5.13. But could be that you are pushing multiple changes towards Linus through some other branch. In this case I'm fine if you pick that way instead if you want to keep your patches together for some reason. Christian. > >>> --- >>> drivers/dma-buf/dma-buf.c | 13 +++++++++++++ >>> fs/proc/meminfo.c | 5 ++++- >>> include/linux/dma-buf.h | 1 + >>> 3 files changed, 18 insertions(+), 1 deletion(-) >>> >>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c >>> index f264b70c383e..197e5c45dd26 100644 >>> --- a/drivers/dma-buf/dma-buf.c >>> +++ b/drivers/dma-buf/dma-buf.c >>> @@ -37,6 +37,7 @@ struct dma_buf_list { >>> }; >>> static struct dma_buf_list db_list; >>> +static atomic_long_t dma_buf_global_allocated; >>> static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) >>> { >>> @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry) >>> if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) >>> dma_resv_fini(dmabuf->resv); >>> + atomic_long_sub(dmabuf->size, &dma_buf_global_allocated); >>> module_put(dmabuf->owner); >>> kfree(dmabuf->name); >>> kfree(dmabuf); >>> @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) >>> mutex_lock(&db_list.lock); >>> list_add(&dmabuf->list_node, &db_list.head); >>> mutex_unlock(&db_list.lock); >>> + atomic_long_add(dmabuf->size, &dma_buf_global_allocated); >>> return dmabuf; >>> @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map) >>> } >>> EXPORT_SYMBOL_GPL(dma_buf_vunmap); >>> +/** >>> + * dma_buf_allocated_pages - Return the used nr of pages >>> + * allocated for dma-buf >>> + */ >>> +long dma_buf_allocated_pages(void) >>> +{ >>> + return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT; >>> +} >>> +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages); >>> + >>> #ifdef CONFIG_DEBUG_FS >>> static int dma_buf_debug_show(struct seq_file *s, void *unused) >>> { >>> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c >>> index 6fa761c9cc78..ccc7c40c8db7 100644 >>> --- a/fs/proc/meminfo.c >>> +++ b/fs/proc/meminfo.c >>> @@ -16,6 +16,7 @@ >>> #ifdef CONFIG_CMA >>> #include <linux/cma.h> >>> #endif >>> +#include <linux/dma-buf.h> >>> #include <asm/page.h> >>> #include "internal.h" >>> @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) >>> show_val_kb(m, "CmaFree: ", >>> global_zone_page_state(NR_FREE_CMA_PAGES)); >>> #endif >>> - >>> +#ifdef CONFIG_DMA_SHARED_BUFFER >>> + show_val_kb(m, "DmaBufTotal: ", dma_buf_allocated_pages()); >>> +#endif >>> hugetlb_report_meminfo(m); >>> arch_report_meminfo(m); >>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h >>> index efdc56b9d95f..5b05816bd2cd 100644 >>> --- a/include/linux/dma-buf.h >>> +++ b/include/linux/dma-buf.h >>> @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, >>> unsigned long); >>> int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); >>> void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); >>> +long dma_buf_allocated_pages(void); >>> #endif /* __DMA_BUF_H__ */
On Sat, Apr 17, 2021 at 6:41 PM Peter Enderborg <peter.enderborg@sony.com> wrote: > > This adds a total used dma-buf memory. Details > can be found in debugfs, however it is not for everyone > and not always available. dma-buf are indirect allocated by > userspace. So with this value we can monitor and detect > userspace applications that have problems. > > Signed-off-by: Peter Enderborg <peter.enderborg@sony.com> > --- > drivers/dma-buf/dma-buf.c | 13 +++++++++++++ > fs/proc/meminfo.c | 5 ++++- > include/linux/dma-buf.h | 1 + > 3 files changed, 18 insertions(+), 1 deletion(-) > > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c > index f264b70c383e..197e5c45dd26 100644 > --- a/drivers/dma-buf/dma-buf.c > +++ b/drivers/dma-buf/dma-buf.c > @@ -37,6 +37,7 @@ struct dma_buf_list { > }; > > static struct dma_buf_list db_list; > +static atomic_long_t dma_buf_global_allocated; > > static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) > { > @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry) > if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) > dma_resv_fini(dmabuf->resv); > > + atomic_long_sub(dmabuf->size, &dma_buf_global_allocated); > module_put(dmabuf->owner); > kfree(dmabuf->name); > kfree(dmabuf); > @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) > mutex_lock(&db_list.lock); > list_add(&dmabuf->list_node, &db_list.head); > mutex_unlock(&db_list.lock); > + atomic_long_add(dmabuf->size, &dma_buf_global_allocated); > > return dmabuf; > > @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map) > } > EXPORT_SYMBOL_GPL(dma_buf_vunmap); > > +/** > + * dma_buf_allocated_pages - Return the used nr of pages > + * allocated for dma-buf > + */ > +long dma_buf_allocated_pages(void) > +{ > + return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT; > +} > +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages); dma_buf_allocated_pages is only called from fs/proc/meminfo.c. I am confused why it should be exported. If it won't be called from the driver module, we should not export it. Thanks. > + > #ifdef CONFIG_DEBUG_FS > static int dma_buf_debug_show(struct seq_file *s, void *unused) > { > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > index 6fa761c9cc78..ccc7c40c8db7 100644 > --- a/fs/proc/meminfo.c > +++ b/fs/proc/meminfo.c > @@ -16,6 +16,7 @@ > #ifdef CONFIG_CMA > #include <linux/cma.h> > #endif > +#include <linux/dma-buf.h> > #include <asm/page.h> > #include "internal.h" > > @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > show_val_kb(m, "CmaFree: ", > global_zone_page_state(NR_FREE_CMA_PAGES)); > #endif > - > +#ifdef CONFIG_DMA_SHARED_BUFFER > + show_val_kb(m, "DmaBufTotal: ", dma_buf_allocated_pages()); > +#endif > hugetlb_report_meminfo(m); > > arch_report_meminfo(m); > diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h > index efdc56b9d95f..5b05816bd2cd 100644 > --- a/include/linux/dma-buf.h > +++ b/include/linux/dma-buf.h > @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, > unsigned long); > int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); > void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); > +long dma_buf_allocated_pages(void); > #endif /* __DMA_BUF_H__ */ > -- > 2.17.1 >
On 4/17/21 3:07 PM, Muchun Song wrote: > On Sat, Apr 17, 2021 at 6:41 PM Peter Enderborg > <peter.enderborg@sony.com> wrote: >> This adds a total used dma-buf memory. Details >> can be found in debugfs, however it is not for everyone >> and not always available. dma-buf are indirect allocated by >> userspace. So with this value we can monitor and detect >> userspace applications that have problems. >> >> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com> >> --- >> drivers/dma-buf/dma-buf.c | 13 +++++++++++++ >> fs/proc/meminfo.c | 5 ++++- >> include/linux/dma-buf.h | 1 + >> 3 files changed, 18 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c >> index f264b70c383e..197e5c45dd26 100644 >> --- a/drivers/dma-buf/dma-buf.c >> +++ b/drivers/dma-buf/dma-buf.c >> @@ -37,6 +37,7 @@ struct dma_buf_list { >> }; >> >> static struct dma_buf_list db_list; >> +static atomic_long_t dma_buf_global_allocated; >> >> static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) >> { >> @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry) >> if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) >> dma_resv_fini(dmabuf->resv); >> >> + atomic_long_sub(dmabuf->size, &dma_buf_global_allocated); >> module_put(dmabuf->owner); >> kfree(dmabuf->name); >> kfree(dmabuf); >> @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) >> mutex_lock(&db_list.lock); >> list_add(&dmabuf->list_node, &db_list.head); >> mutex_unlock(&db_list.lock); >> + atomic_long_add(dmabuf->size, &dma_buf_global_allocated); >> >> return dmabuf; >> >> @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map) >> } >> EXPORT_SYMBOL_GPL(dma_buf_vunmap); >> >> +/** >> + * dma_buf_allocated_pages - Return the used nr of pages >> + * allocated for dma-buf >> + */ >> +long dma_buf_allocated_pages(void) >> +{ >> + return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT; >> +} >> +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages); > dma_buf_allocated_pages is only called from fs/proc/meminfo.c. > I am confused why it should be exported. If it won't be called > from the driver module, we should not export it. Ah. I thought you did not want the GPL restriction. I don't have real opinion about it. It's written to be following the rest of the module. It is not needed for the usage of dma-buf in kernel module. But I don't see any reason for hiding it either. > Thanks. > >> + >> #ifdef CONFIG_DEBUG_FS >> static int dma_buf_debug_show(struct seq_file *s, void *unused) >> { >> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c >> index 6fa761c9cc78..ccc7c40c8db7 100644 >> --- a/fs/proc/meminfo.c >> +++ b/fs/proc/meminfo.c >> @@ -16,6 +16,7 @@ >> #ifdef CONFIG_CMA >> #include <linux/cma.h> >> #endif >> +#include <linux/dma-buf.h> >> #include <asm/page.h> >> #include "internal.h" >> >> @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) >> show_val_kb(m, "CmaFree: ", >> global_zone_page_state(NR_FREE_CMA_PAGES)); >> #endif >> - >> +#ifdef CONFIG_DMA_SHARED_BUFFER >> + show_val_kb(m, "DmaBufTotal: ", dma_buf_allocated_pages()); >> +#endif >> hugetlb_report_meminfo(m); >> >> arch_report_meminfo(m); >> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h >> index efdc56b9d95f..5b05816bd2cd 100644 >> --- a/include/linux/dma-buf.h >> +++ b/include/linux/dma-buf.h >> @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, >> unsigned long); >> int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); >> void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); >> +long dma_buf_allocated_pages(void); >> #endif /* __DMA_BUF_H__ */ >> -- >> 2.17.1 >>
On Sat 17-04-21 12:40:32, Peter Enderborg wrote: > This adds a total used dma-buf memory. Details > can be found in debugfs, however it is not for everyone > and not always available. dma-buf are indirect allocated by > userspace. So with this value we can monitor and detect > userspace applications that have problems. The changelog would benefit from more background on why this is needed, and who is the primary consumer of that value. I cannot really comment on the dma-buf internals but I have two remarks. Documentation/filesystems/proc.rst needs an update with the counter explanation and secondly is this information useful for OOM situations analysis? If yes then show_mem should dump the value as well. From the implementation point of view, is there any reason why this hasn't used the existing global_node_page_state infrastructure? -- Michal Hocko SUSE Labs
On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote: > On 4/19/21 2:16 PM, Michal Hocko wrote: > > On Sat 17-04-21 12:40:32, Peter Enderborg wrote: > >> This adds a total used dma-buf memory. Details > >> can be found in debugfs, however it is not for everyone > >> and not always available. dma-buf are indirect allocated by > >> userspace. So with this value we can monitor and detect > >> userspace applications that have problems. > > The changelog would benefit from more background on why this is needed, > > and who is the primary consumer of that value. > > > > I cannot really comment on the dma-buf internals but I have two remarks. > > Documentation/filesystems/proc.rst needs an update with the counter > > explanation and secondly is this information useful for OOM situations > > analysis? If yes then show_mem should dump the value as well. > > > > From the implementation point of view, is there any reason why this > > hasn't used the existing global_node_page_state infrastructure? > > I fix doc in next version. Im not sure what you expect the commit message to include. As I've said. Usual justification covers answers to following questions - Why do we need it? - Why the existing data is insuficient? - Who is supposed to use the data and for what? I can see an answer for the first two questions (because this can be a lot of memory and the existing infrastructure is not production suitable - debugfs). But the changelog doesn't really explain who is going to use the new data. Is this a monitoring to raise an early alarm when the value grows? Is this for debugging misbehaving drivers? How is it valuable for those? > The function of the meminfo is: (From Documentation/filesystems/proc.rst) > > "Provides information about distribution and utilization of memory." True. Yet we do not export any random counters, do we? > Im not the designed of dma-buf, I think global_node_page_state as a kernel > internal. It provides a node specific and optimized counters. Is this a good fit with your new counter? Or the NUMA locality is of no importance? > dma-buf is a device driver that provides a function so I might be > on the outside. However I also see that it might be relevant for a OOM. > It is memory that can be freed by killing userspace processes. > > The show_mem thing. Should it be a separate patch? This is up to you but if you want to expose the counter then send it in one series.
On 4/19/21 5:00 PM, Michal Hocko wrote: > On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote: >> On 4/19/21 2:16 PM, Michal Hocko wrote: >>> On Sat 17-04-21 12:40:32, Peter Enderborg wrote: >>>> This adds a total used dma-buf memory. Details >>>> can be found in debugfs, however it is not for everyone >>>> and not always available. dma-buf are indirect allocated by >>>> userspace. So with this value we can monitor and detect >>>> userspace applications that have problems. >>> The changelog would benefit from more background on why this is needed, >>> and who is the primary consumer of that value. >>> >>> I cannot really comment on the dma-buf internals but I have two remarks. >>> Documentation/filesystems/proc.rst needs an update with the counter >>> explanation and secondly is this information useful for OOM situations >>> analysis? If yes then show_mem should dump the value as well. >>> >>> From the implementation point of view, is there any reason why this >>> hasn't used the existing global_node_page_state infrastructure? >> I fix doc in next version. Im not sure what you expect the commit message to include. > As I've said. Usual justification covers answers to following questions > - Why do we need it? > - Why the existing data is insuficient? > - Who is supposed to use the data and for what? > > I can see an answer for the first two questions (because this can be a > lot of memory and the existing infrastructure is not production suitable > - debugfs). But the changelog doesn't really explain who is going to use > the new data. Is this a monitoring to raise an early alarm when the > value grows? Is this for debugging misbehaving drivers? How is it > valuable for those? > >> The function of the meminfo is: (From Documentation/filesystems/proc.rst) >> >> "Provides information about distribution and utilization of memory." > True. Yet we do not export any random counters, do we? > >> Im not the designed of dma-buf, I think global_node_page_state as a kernel >> internal. > It provides a node specific and optimized counters. Is this a good fit > with your new counter? Or the NUMA locality is of no importance? Sounds good to me, if Christian Koenig think it is good, I will use that. It is only virtio in drivers that use the global_node_page_state if that matters. > >> dma-buf is a device driver that provides a function so I might be >> on the outside. However I also see that it might be relevant for a OOM. >> It is memory that can be freed by killing userspace processes. >> >> The show_mem thing. Should it be a separate patch? > This is up to you but if you want to expose the counter then send it in > one series. >
Am 19.04.21 um 17:19 schrieb Peter.Enderborg@sony.com: > On 4/19/21 5:00 PM, Michal Hocko wrote: >> On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote: >>> On 4/19/21 2:16 PM, Michal Hocko wrote: >>>> On Sat 17-04-21 12:40:32, Peter Enderborg wrote: >>>>> This adds a total used dma-buf memory. Details >>>>> can be found in debugfs, however it is not for everyone >>>>> and not always available. dma-buf are indirect allocated by >>>>> userspace. So with this value we can monitor and detect >>>>> userspace applications that have problems. >>>> The changelog would benefit from more background on why this is needed, >>>> and who is the primary consumer of that value. >>>> >>>> I cannot really comment on the dma-buf internals but I have two remarks. >>>> Documentation/filesystems/proc.rst needs an update with the counter >>>> explanation and secondly is this information useful for OOM situations >>>> analysis? If yes then show_mem should dump the value as well. >>>> >>>> From the implementation point of view, is there any reason why this >>>> hasn't used the existing global_node_page_state infrastructure? >>> I fix doc in next version. Im not sure what you expect the commit message to include. >> As I've said. Usual justification covers answers to following questions >> - Why do we need it? >> - Why the existing data is insuficient? >> - Who is supposed to use the data and for what? >> >> I can see an answer for the first two questions (because this can be a >> lot of memory and the existing infrastructure is not production suitable >> - debugfs). But the changelog doesn't really explain who is going to use >> the new data. Is this a monitoring to raise an early alarm when the >> value grows? Is this for debugging misbehaving drivers? How is it >> valuable for those? >> >>> The function of the meminfo is: (From Documentation/filesystems/proc.rst) >>> >>> "Provides information about distribution and utilization of memory." >> True. Yet we do not export any random counters, do we? >> >>> Im not the designed of dma-buf, I think global_node_page_state as a kernel >>> internal. >> It provides a node specific and optimized counters. Is this a good fit >> with your new counter? Or the NUMA locality is of no importance? > Sounds good to me, if Christian Koenig think it is good, I will use that. > It is only virtio in drivers that use the global_node_page_state if > that matters. DMA-buf are not NUMA aware at all. On which node the pages are allocated (and if we use pages at all and not internal device memory) is up to the exporter and importer. Christian. > > >>> dma-buf is a device driver that provides a function so I might be >>> on the outside. However I also see that it might be relevant for a OOM. >>> It is memory that can be freed by killing userspace processes. >>> >>> The show_mem thing. Should it be a separate patch? >> This is up to you but if you want to expose the counter then send it in >> one series. >>
On Mon 19-04-21 17:44:13, Christian König wrote: > Am 19.04.21 um 17:19 schrieb Peter.Enderborg@sony.com: > > On 4/19/21 5:00 PM, Michal Hocko wrote: > > > On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote: > > > > On 4/19/21 2:16 PM, Michal Hocko wrote: > > > > > On Sat 17-04-21 12:40:32, Peter Enderborg wrote: > > > > > > This adds a total used dma-buf memory. Details > > > > > > can be found in debugfs, however it is not for everyone > > > > > > and not always available. dma-buf are indirect allocated by > > > > > > userspace. So with this value we can monitor and detect > > > > > > userspace applications that have problems. > > > > > The changelog would benefit from more background on why this is needed, > > > > > and who is the primary consumer of that value. > > > > > > > > > > I cannot really comment on the dma-buf internals but I have two remarks. > > > > > Documentation/filesystems/proc.rst needs an update with the counter > > > > > explanation and secondly is this information useful for OOM situations > > > > > analysis? If yes then show_mem should dump the value as well. > > > > > > > > > > From the implementation point of view, is there any reason why this > > > > > hasn't used the existing global_node_page_state infrastructure? > > > > I fix doc in next version. Im not sure what you expect the commit message to include. > > > As I've said. Usual justification covers answers to following questions > > > - Why do we need it? > > > - Why the existing data is insuficient? > > > - Who is supposed to use the data and for what? > > > > > > I can see an answer for the first two questions (because this can be a > > > lot of memory and the existing infrastructure is not production suitable > > > - debugfs). But the changelog doesn't really explain who is going to use > > > the new data. Is this a monitoring to raise an early alarm when the > > > value grows? Is this for debugging misbehaving drivers? How is it > > > valuable for those? > > > > > > > The function of the meminfo is: (From Documentation/filesystems/proc.rst) > > > > > > > > "Provides information about distribution and utilization of memory." > > > True. Yet we do not export any random counters, do we? > > > > > > > Im not the designed of dma-buf, I think global_node_page_state as a kernel > > > > internal. > > > It provides a node specific and optimized counters. Is this a good fit > > > with your new counter? Or the NUMA locality is of no importance? > > Sounds good to me, if Christian Koenig think it is good, I will use that. > > It is only virtio in drivers that use the global_node_page_state if > > that matters. > > DMA-buf are not NUMA aware at all. On which node the pages are allocated > (and if we use pages at all and not internal device memory) is up to the > exporter and importer. The question is not whether it is NUMA aware but whether it is useful to know per-numa data for the purpose the counter is supposed to serve. -- Michal Hocko SUSE Labs
Am 19.04.21 um 18:11 schrieb Michal Hocko: > On Mon 19-04-21 17:44:13, Christian König wrote: >> Am 19.04.21 um 17:19 schrieb Peter.Enderborg@sony.com: >>> On 4/19/21 5:00 PM, Michal Hocko wrote: >>>> On Mon 19-04-21 12:41:58, Peter.Enderborg@sony.com wrote: >>>>> On 4/19/21 2:16 PM, Michal Hocko wrote: >>>>>> On Sat 17-04-21 12:40:32, Peter Enderborg wrote: >>>>>>> This adds a total used dma-buf memory. Details >>>>>>> can be found in debugfs, however it is not for everyone >>>>>>> and not always available. dma-buf are indirect allocated by >>>>>>> userspace. So with this value we can monitor and detect >>>>>>> userspace applications that have problems. >>>>>> The changelog would benefit from more background on why this is needed, >>>>>> and who is the primary consumer of that value. >>>>>> >>>>>> I cannot really comment on the dma-buf internals but I have two remarks. >>>>>> Documentation/filesystems/proc.rst needs an update with the counter >>>>>> explanation and secondly is this information useful for OOM situations >>>>>> analysis? If yes then show_mem should dump the value as well. >>>>>> >>>>>> From the implementation point of view, is there any reason why this >>>>>> hasn't used the existing global_node_page_state infrastructure? >>>>> I fix doc in next version. Im not sure what you expect the commit message to include. >>>> As I've said. Usual justification covers answers to following questions >>>> - Why do we need it? >>>> - Why the existing data is insuficient? >>>> - Who is supposed to use the data and for what? >>>> >>>> I can see an answer for the first two questions (because this can be a >>>> lot of memory and the existing infrastructure is not production suitable >>>> - debugfs). But the changelog doesn't really explain who is going to use >>>> the new data. Is this a monitoring to raise an early alarm when the >>>> value grows? Is this for debugging misbehaving drivers? How is it >>>> valuable for those? >>>> >>>>> The function of the meminfo is: (From Documentation/filesystems/proc.rst) >>>>> >>>>> "Provides information about distribution and utilization of memory." >>>> True. Yet we do not export any random counters, do we? >>>> >>>>> Im not the designed of dma-buf, I think global_node_page_state as a kernel >>>>> internal. >>>> It provides a node specific and optimized counters. Is this a good fit >>>> with your new counter? Or the NUMA locality is of no importance? >>> Sounds good to me, if Christian Koenig think it is good, I will use that. >>> It is only virtio in drivers that use the global_node_page_state if >>> that matters. >> DMA-buf are not NUMA aware at all. On which node the pages are allocated >> (and if we use pages at all and not internal device memory) is up to the >> exporter and importer. > The question is not whether it is NUMA aware but whether it is useful to > know per-numa data for the purpose the counter is supposed to serve. No, not at all. The pages of a single DMA-buf could even be from different NUMA nodes if the exporting driver decides that this is somehow useful. Christian.
On Mon 19-04-21 18:37:13, Christian König wrote: > Am 19.04.21 um 18:11 schrieb Michal Hocko: [...] > > The question is not whether it is NUMA aware but whether it is useful to > > know per-numa data for the purpose the counter is supposed to serve. > > No, not at all. The pages of a single DMA-buf could even be from different > NUMA nodes if the exporting driver decides that this is somehow useful. As the use of the counter hasn't been explained yet I can only speculate. One thing that I can imagine to be useful is to fill gaps in our accounting. It is quite often that the memroy accounted in /proc/meminfo (or oom report) doesn't add up to the overall memory usage. In some workloads the workload can be huge! In many cases there are other means to find out additional memory by a subsystem specific interfaces (e.g. networking buffers). I do assume that dma-buf is just one of those and the counter can fill the said gap at least partially for some workloads. That is definitely useful. What I am trying to bring up with NUMA side is that the same problem can happen on per-node basis. Let's say that some user consumes unexpectedly large amount of dma-buf on a certain node. This can lead to observable performance impact on anybody on allocating from that node and even worse cause an OOM for node bound consumers. How do I find out that it was dma-buf that has caused the problem? See where I am heading? -- Michal Hocko SUSE Labs
On Tue, Apr 20, 2021 at 09:04:51AM +0200, Michal Hocko wrote: > On Mon 19-04-21 18:37:13, Christian König wrote: > > Am 19.04.21 um 18:11 schrieb Michal Hocko: > [...] > > > The question is not whether it is NUMA aware but whether it is useful to > > > know per-numa data for the purpose the counter is supposed to serve. > > > > No, not at all. The pages of a single DMA-buf could even be from different > > NUMA nodes if the exporting driver decides that this is somehow useful. > > As the use of the counter hasn't been explained yet I can only > speculate. One thing that I can imagine to be useful is to fill gaps in > our accounting. It is quite often that the memroy accounted in > /proc/meminfo (or oom report) doesn't add up to the overall memory > usage. In some workloads the workload can be huge! In many cases there > are other means to find out additional memory by a subsystem specific > interfaces (e.g. networking buffers). I do assume that dma-buf is just > one of those and the counter can fill the said gap at least partially > for some workloads. That is definitely useful. A bit off-topic. Michal, I think it would have been nice to have an explanation like above in Documentation/proc/meminfo, what do you say? > What I am trying to bring up with NUMA side is that the same problem can > happen on per-node basis. Let's say that some user consumes unexpectedly > large amount of dma-buf on a certain node. This can lead to observable > performance impact on anybody on allocating from that node and even > worse cause an OOM for node bound consumers. How do I find out that it > was dma-buf that has caused the problem? > > See where I am heading? -- Sincerely yours, Mike.
Am 20.04.21 um 09:04 schrieb Michal Hocko: > On Mon 19-04-21 18:37:13, Christian König wrote: >> Am 19.04.21 um 18:11 schrieb Michal Hocko: > [...] >>> The question is not whether it is NUMA aware but whether it is useful to >>> know per-numa data for the purpose the counter is supposed to serve. >> No, not at all. The pages of a single DMA-buf could even be from different >> NUMA nodes if the exporting driver decides that this is somehow useful. > As the use of the counter hasn't been explained yet I can only > speculate. One thing that I can imagine to be useful is to fill gaps in > our accounting. It is quite often that the memroy accounted in > /proc/meminfo (or oom report) doesn't add up to the overall memory > usage. In some workloads the workload can be huge! In many cases there > are other means to find out additional memory by a subsystem specific > interfaces (e.g. networking buffers). I do assume that dma-buf is just > one of those and the counter can fill the said gap at least partially > for some workloads. That is definitely useful. Yes, completely agree. I'm just not 100% sure if the DMA-buf framework should account for that or the individual drivers exporting DMA-bufs. See below for a further explanation. > What I am trying to bring up with NUMA side is that the same problem can > happen on per-node basis. Let's say that some user consumes unexpectedly > large amount of dma-buf on a certain node. This can lead to observable > performance impact on anybody on allocating from that node and even > worse cause an OOM for node bound consumers. How do I find out that it > was dma-buf that has caused the problem? Yes, that is the direction my thinking goes as well, but also even further. See DMA-buf is also used to share device local memory between processes as well. In other words VRAM on graphics hardware. On my test system here I have 32GB of system memory and 16GB of VRAM. I can use DMA-buf to allocate that 16GB of VRAM quite easily which then shows up under /proc/meminfo as used memory. But that isn't really system memory at all, it's just allocated device memory. > See where I am heading? Yeah, totally. Thanks for pointing this out. Suggestions how to handle that? Regards, Christian.
On Tue 20-04-21 09:32:14, Christian König wrote: > Am 20.04.21 um 09:04 schrieb Michal Hocko: > > On Mon 19-04-21 18:37:13, Christian König wrote: > > > Am 19.04.21 um 18:11 schrieb Michal Hocko: [...] > > What I am trying to bring up with NUMA side is that the same problem can > > happen on per-node basis. Let's say that some user consumes unexpectedly > > large amount of dma-buf on a certain node. This can lead to observable > > performance impact on anybody on allocating from that node and even > > worse cause an OOM for node bound consumers. How do I find out that it > > was dma-buf that has caused the problem? > > Yes, that is the direction my thinking goes as well, but also even further. > > See DMA-buf is also used to share device local memory between processes as > well. In other words VRAM on graphics hardware. > > On my test system here I have 32GB of system memory and 16GB of VRAM. I can > use DMA-buf to allocate that 16GB of VRAM quite easily which then shows up > under /proc/meminfo as used memory. This is something that would be really interesting in the changelog. I mean the expected and extreme memory consumption of this memory. Ideally with some hints on what to do when the number is really high (e.g. mount debugfs and have a look here and there to check whether this is just too many users or an unexpected pattern to be reported). > But that isn't really system memory at all, it's just allocated device > memory. OK, that was not really clear to me. So this is not really accounted to MemTotal? If that is really the case then reporting it into the oom report is completely pointless and I am not even sure /proc/meminfo is the right interface either. It would just add more confusion I am afraid. > > See where I am heading? > > Yeah, totally. Thanks for pointing this out. > > Suggestions how to handle that? As I've pointed out in previous reply we do have an API to account per node memory but now that you have brought up that this is not something we account as a regular memory then this doesn't really fit into that model. But maybe I am just confused. -- Michal Hocko SUSE Labs
On Tue 20-04-21 10:20:43, Mike Rapoport wrote: > On Tue, Apr 20, 2021 at 09:04:51AM +0200, Michal Hocko wrote: > > On Mon 19-04-21 18:37:13, Christian König wrote: > > > Am 19.04.21 um 18:11 schrieb Michal Hocko: > > [...] > > > > The question is not whether it is NUMA aware but whether it is useful to > > > > know per-numa data for the purpose the counter is supposed to serve. > > > > > > No, not at all. The pages of a single DMA-buf could even be from different > > > NUMA nodes if the exporting driver decides that this is somehow useful. > > > > As the use of the counter hasn't been explained yet I can only > > speculate. One thing that I can imagine to be useful is to fill gaps in > > our accounting. It is quite often that the memroy accounted in > > /proc/meminfo (or oom report) doesn't add up to the overall memory > > usage. In some workloads the workload can be huge! In many cases there > > are other means to find out additional memory by a subsystem specific > > interfaces (e.g. networking buffers). I do assume that dma-buf is just > > one of those and the counter can fill the said gap at least partially > > for some workloads. That is definitely useful. > > A bit off-topic. > > Michal, I think it would have been nice to have an explanation like above > in Documentation/proc/meminfo, what do you say? Not sure which specific parts (likely the unaccounted memory?) but sure why not. Our /proc/meminfo is rather underdocumented. More information cannot hurt. -- Michal Hocko SUSE Labs
Am 20.04.21 um 09:46 schrieb Michal Hocko: > On Tue 20-04-21 09:32:14, Christian König wrote: >> Am 20.04.21 um 09:04 schrieb Michal Hocko: >>> On Mon 19-04-21 18:37:13, Christian König wrote: >>>> Am 19.04.21 um 18:11 schrieb Michal Hocko: > [...] >>> What I am trying to bring up with NUMA side is that the same problem can >>> happen on per-node basis. Let's say that some user consumes unexpectedly >>> large amount of dma-buf on a certain node. This can lead to observable >>> performance impact on anybody on allocating from that node and even >>> worse cause an OOM for node bound consumers. How do I find out that it >>> was dma-buf that has caused the problem? >> Yes, that is the direction my thinking goes as well, but also even further. >> >> See DMA-buf is also used to share device local memory between processes as >> well. In other words VRAM on graphics hardware. >> >> On my test system here I have 32GB of system memory and 16GB of VRAM. I can >> use DMA-buf to allocate that 16GB of VRAM quite easily which then shows up >> under /proc/meminfo as used memory. > This is something that would be really interesting in the changelog. I > mean the expected and extreme memory consumption of this memory. Ideally > with some hints on what to do when the number is really high (e.g. mount > debugfs and have a look here and there to check whether this is just too > many users or an unexpected pattern to be reported). > >> But that isn't really system memory at all, it's just allocated device >> memory. > OK, that was not really clear to me. So this is not really accounted to > MemTotal? It depends. In a lot of embedded systems you only have system memory and in this case that value here is indeed really useful. > If that is really the case then reporting it into the oom > report is completely pointless and I am not even sure /proc/meminfo is > the right interface either. It would just add more confusion I am > afraid. I kind of agree. As I said a DMA-buf could be backed by system memory or device memory. In the case when it is backed by system memory it does make sense to report this in an OOM dump. But only the exporting driver knows what the DMA-buf handle represents, the framework just provides the common ground for inter driver communication. >>> See where I am heading? >> Yeah, totally. Thanks for pointing this out. >> >> Suggestions how to handle that? > As I've pointed out in previous reply we do have an API to account per > node memory but now that you have brought up that this is not something > we account as a regular memory then this doesn't really fit into that > model. But maybe I am just confused. Well does that API also has a counter for memory used by device drivers? If yes then the device driver who exported the DMA-buf should probably use that API. If no we might want to create one. I mean the author of this patch seems to have an use case where this is needed and I also see that we have some hole in how we account memory. Christian.
On Tue 20-04-21 10:00:07, Christian König wrote: > Am 20.04.21 um 09:46 schrieb Michal Hocko: > > On Tue 20-04-21 09:32:14, Christian König wrote: > > > Am 20.04.21 um 09:04 schrieb Michal Hocko: > > > > On Mon 19-04-21 18:37:13, Christian König wrote: > > > > > Am 19.04.21 um 18:11 schrieb Michal Hocko: > > [...] > > > > What I am trying to bring up with NUMA side is that the same problem can > > > > happen on per-node basis. Let's say that some user consumes unexpectedly > > > > large amount of dma-buf on a certain node. This can lead to observable > > > > performance impact on anybody on allocating from that node and even > > > > worse cause an OOM for node bound consumers. How do I find out that it > > > > was dma-buf that has caused the problem? > > > Yes, that is the direction my thinking goes as well, but also even further. > > > > > > See DMA-buf is also used to share device local memory between processes as > > > well. In other words VRAM on graphics hardware. > > > > > > On my test system here I have 32GB of system memory and 16GB of VRAM. I can > > > use DMA-buf to allocate that 16GB of VRAM quite easily which then shows up > > > under /proc/meminfo as used memory. > > This is something that would be really interesting in the changelog. I > > mean the expected and extreme memory consumption of this memory. Ideally > > with some hints on what to do when the number is really high (e.g. mount > > debugfs and have a look here and there to check whether this is just too > > many users or an unexpected pattern to be reported). > > > > > But that isn't really system memory at all, it's just allocated device > > > memory. > > OK, that was not really clear to me. So this is not really accounted to > > MemTotal? > > It depends. In a lot of embedded systems you only have system memory and in > this case that value here is indeed really useful. > > > If that is really the case then reporting it into the oom > > report is completely pointless and I am not even sure /proc/meminfo is > > the right interface either. It would just add more confusion I am > > afraid. > > I kind of agree. As I said a DMA-buf could be backed by system memory or > device memory. > > In the case when it is backed by system memory it does make sense to report > this in an OOM dump. > > But only the exporting driver knows what the DMA-buf handle represents, the > framework just provides the common ground for inter driver communication. Then those drivers need to account for meminfo/oom report purposes. > > > > See where I am heading? > > > Yeah, totally. Thanks for pointing this out. > > > > > > Suggestions how to handle that? > > As I've pointed out in previous reply we do have an API to account per > > node memory but now that you have brought up that this is not something > > we account as a regular memory then this doesn't really fit into that > > model. But maybe I am just confused. > > Well does that API also has a counter for memory used by device drivers? I think that "memory used by device drivers" is immaterial. The only important thing is to account that memory where it makes sense. So for RAM based allocations to report them via meminfo and find other way to report device memory allocations.
On Sat, Apr 17, 2021 at 01:54:08PM +0200, Christian König wrote: > Am 17.04.21 um 13:20 schrieb Peter.Enderborg@sony.com: > > On 4/17/21 12:59 PM, Christian König wrote: > > > Am 17.04.21 um 12:40 schrieb Peter Enderborg: > > > > This adds a total used dma-buf memory. Details > > > > can be found in debugfs, however it is not for everyone > > > > and not always available. dma-buf are indirect allocated by > > > > userspace. So with this value we can monitor and detect > > > > userspace applications that have problems. > > > > > > > > Signed-off-by: Peter Enderborg <peter.enderborg@sony.com> > > > Reviewed-by: Christian König <christian.koenig@amd.com> > > > > > > How do you want to upstream this? > > I don't understand that question. The patch applies on Torvalds 5.12-rc7, > > but I guess 5.13 is what we work on right now. > > Yeah, but how do you want to get it into Linus tree? > > I can push it together with other DMA-buf patches through drm-misc-next and > then Dave will send it to Linus for inclusion in 5.13. Small correction, we've already frozen for the merge window so this will land in 5.14. -Daniel > > But could be that you are pushing multiple changes towards Linus through > some other branch. In this case I'm fine if you pick that way instead if you > want to keep your patches together for some reason. > > Christian. > > > > > > > --- > > > > drivers/dma-buf/dma-buf.c | 13 +++++++++++++ > > > > fs/proc/meminfo.c | 5 ++++- > > > > include/linux/dma-buf.h | 1 + > > > > 3 files changed, 18 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c > > > > index f264b70c383e..197e5c45dd26 100644 > > > > --- a/drivers/dma-buf/dma-buf.c > > > > +++ b/drivers/dma-buf/dma-buf.c > > > > @@ -37,6 +37,7 @@ struct dma_buf_list { > > > > }; > > > > static struct dma_buf_list db_list; > > > > +static atomic_long_t dma_buf_global_allocated; > > > > static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) > > > > { > > > > @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry) > > > > if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) > > > > dma_resv_fini(dmabuf->resv); > > > > + atomic_long_sub(dmabuf->size, &dma_buf_global_allocated); > > > > module_put(dmabuf->owner); > > > > kfree(dmabuf->name); > > > > kfree(dmabuf); > > > > @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) > > > > mutex_lock(&db_list.lock); > > > > list_add(&dmabuf->list_node, &db_list.head); > > > > mutex_unlock(&db_list.lock); > > > > + atomic_long_add(dmabuf->size, &dma_buf_global_allocated); > > > > return dmabuf; > > > > @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map) > > > > } > > > > EXPORT_SYMBOL_GPL(dma_buf_vunmap); > > > > +/** > > > > + * dma_buf_allocated_pages - Return the used nr of pages > > > > + * allocated for dma-buf > > > > + */ > > > > +long dma_buf_allocated_pages(void) > > > > +{ > > > > + return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT; > > > > +} > > > > +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages); > > > > + > > > > #ifdef CONFIG_DEBUG_FS > > > > static int dma_buf_debug_show(struct seq_file *s, void *unused) > > > > { > > > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > > > > index 6fa761c9cc78..ccc7c40c8db7 100644 > > > > --- a/fs/proc/meminfo.c > > > > +++ b/fs/proc/meminfo.c > > > > @@ -16,6 +16,7 @@ > > > > #ifdef CONFIG_CMA > > > > #include <linux/cma.h> > > > > #endif > > > > +#include <linux/dma-buf.h> > > > > #include <asm/page.h> > > > > #include "internal.h" > > > > @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > > > > show_val_kb(m, "CmaFree: ", > > > > global_zone_page_state(NR_FREE_CMA_PAGES)); > > > > #endif > > > > - > > > > +#ifdef CONFIG_DMA_SHARED_BUFFER > > > > + show_val_kb(m, "DmaBufTotal: ", dma_buf_allocated_pages()); > > > > +#endif > > > > hugetlb_report_meminfo(m); > > > > arch_report_meminfo(m); > > > > diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h > > > > index efdc56b9d95f..5b05816bd2cd 100644 > > > > --- a/include/linux/dma-buf.h > > > > +++ b/include/linux/dma-buf.h > > > > @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, > > > > unsigned long); > > > > int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); > > > > void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); > > > > +long dma_buf_allocated_pages(void); > > > > #endif /* __DMA_BUF_H__ */ > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>> But that isn't really system memory at all, it's just allocated device >> memory. > OK, that was not really clear to me. So this is not really accounted to > MemTotal? If that is really the case then reporting it into the oom > report is completely pointless and I am not even sure /proc/meminfo is > the right interface either. It would just add more confusion I am > afraid. > Why is it confusing? Documentation is quite clear: "Provides information about distribution and utilization of memory. This varies by architecture and compile options." A topology with VRAM fits very well on that. The point is to have an overview. >>> See where I am heading? >> Yeah, totally. Thanks for pointing this out. >> >> Suggestions how to handle that? > As I've pointed out in previous reply we do have an API to account per > node memory but now that you have brought up that this is not something > we account as a regular memory then this doesn't really fit into that > model. But maybe I am just confused.
On Tue 20-04-21 09:02:57, Peter.Enderborg@sony.com wrote: > > >> But that isn't really system memory at all, it's just allocated device > >> memory. > > OK, that was not really clear to me. So this is not really accounted to > > MemTotal? If that is really the case then reporting it into the oom > > report is completely pointless and I am not even sure /proc/meminfo is > > the right interface either. It would just add more confusion I am > > afraid. > > > > Why is it confusing? Documentation is quite clear: Because a single counter without a wider context cannot be put into any reasonable context. There is no notion of the total amount of device memory usable for dma-buf. As Christian explained some of it can be RAM based. So a single number is rather pointless on its own in many cases. Or let me just ask. What can you tell from dma-bud: $FOO kB in its current form?
On 4/20/21 11:12 AM, Michal Hocko wrote: > On Tue 20-04-21 09:02:57, Peter.Enderborg@sony.com wrote: >>>> But that isn't really system memory at all, it's just allocated device >>>> memory. >>> OK, that was not really clear to me. So this is not really accounted to >>> MemTotal? If that is really the case then reporting it into the oom >>> report is completely pointless and I am not even sure /proc/meminfo is >>> the right interface either. It would just add more confusion I am >>> afraid. >>> >> Why is it confusing? Documentation is quite clear: > Because a single counter without a wider context cannot be put into any > reasonable context. There is no notion of the total amount of device > memory usable for dma-buf. As Christian explained some of it can be RAM > based. So a single number is rather pointless on its own in many cases. > > Or let me just ask. What can you tell from dma-bud: $FOO kB in its > current form? It is better to be blind? The value can still be used a relative metric. You collect the data and see how it change. And when you see a unexpected change you start to dig in. It fore sure wont tell what line in your application that has a bug. But it might be an indicator that a new game trigger a leak. And it is very well specified, it exactly the size of mapped dma-buf. For what you use dma-buf you need to know other parts of the system.
On Tue 20-04-21 09:25:51, Peter.Enderborg@sony.com wrote: > On 4/20/21 11:12 AM, Michal Hocko wrote: > > On Tue 20-04-21 09:02:57, Peter.Enderborg@sony.com wrote: > >>>> But that isn't really system memory at all, it's just allocated device > >>>> memory. > >>> OK, that was not really clear to me. So this is not really accounted to > >>> MemTotal? If that is really the case then reporting it into the oom > >>> report is completely pointless and I am not even sure /proc/meminfo is > >>> the right interface either. It would just add more confusion I am > >>> afraid. > >>> > >> Why is it confusing? Documentation is quite clear: > > Because a single counter without a wider context cannot be put into any > > reasonable context. There is no notion of the total amount of device > > memory usable for dma-buf. As Christian explained some of it can be RAM > > based. So a single number is rather pointless on its own in many cases. > > > > Or let me just ask. What can you tell from dma-bud: $FOO kB in its > > current form? > > It is better to be blind? No it is better to have a sensible counter that can be reasoned about. So far you are only claiming that having something is better than nothing and I would agree with you if that was a debugging one off interface. But /proc/meminfo and other proc files have to be maintained with future portability in mind. This is not a dumping ground for _some_ counters that might be interesting at the _current_ moment. E.g. what happens if somebody wants to have a per device resp. memory based dma-buf data? Are you going to change the semantic or add another 2 counters?
On 4/20/21 1:04 PM, Michal Hocko wrote: > On Tue 20-04-21 09:25:51, Peter.Enderborg@sony.com wrote: >> On 4/20/21 11:12 AM, Michal Hocko wrote: >>> On Tue 20-04-21 09:02:57, Peter.Enderborg@sony.com wrote: >>>>>> But that isn't really system memory at all, it's just allocated device >>>>>> memory. >>>>> OK, that was not really clear to me. So this is not really accounted to >>>>> MemTotal? If that is really the case then reporting it into the oom >>>>> report is completely pointless and I am not even sure /proc/meminfo is >>>>> the right interface either. It would just add more confusion I am >>>>> afraid. >>>>> >>>> Why is it confusing? Documentation is quite clear: >>> Because a single counter without a wider context cannot be put into any >>> reasonable context. There is no notion of the total amount of device >>> memory usable for dma-buf. As Christian explained some of it can be RAM >>> based. So a single number is rather pointless on its own in many cases. >>> >>> Or let me just ask. What can you tell from dma-bud: $FOO kB in its >>> current form? >> It is better to be blind? > No it is better to have a sensible counter that can be reasoned about. > So far you are only claiming that having something is better than > nothing and I would agree with you if that was a debugging one off > interface. But /proc/meminfo and other proc files have to be maintained > with future portability in mind. This is not a dumping ground for _some_ > counters that might be interesting at the _current_ moment. E.g. what > happens if somebody wants to have a per device resp. memory based > dma-buf data? Are you going to change the semantic or add another > 2 counters? This is the DmaBufTotal. It is the upper limit. If is not there is is something else. And when we have a better resolution on measuring it, it would make sense to add a DmaBufVram, DmaBufMemGC or what ever we can pickup. This is what we can measure today.
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index f264b70c383e..197e5c45dd26 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -37,6 +37,7 @@ struct dma_buf_list { }; static struct dma_buf_list db_list; +static atomic_long_t dma_buf_global_allocated; static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) { @@ -79,6 +80,7 @@ static void dma_buf_release(struct dentry *dentry) if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) dma_resv_fini(dmabuf->resv); + atomic_long_sub(dmabuf->size, &dma_buf_global_allocated); module_put(dmabuf->owner); kfree(dmabuf->name); kfree(dmabuf); @@ -586,6 +588,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) mutex_lock(&db_list.lock); list_add(&dmabuf->list_node, &db_list.head); mutex_unlock(&db_list.lock); + atomic_long_add(dmabuf->size, &dma_buf_global_allocated); return dmabuf; @@ -1346,6 +1349,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map) } EXPORT_SYMBOL_GPL(dma_buf_vunmap); +/** + * dma_buf_allocated_pages - Return the used nr of pages + * allocated for dma-buf + */ +long dma_buf_allocated_pages(void) +{ + return atomic_long_read(&dma_buf_global_allocated) >> PAGE_SHIFT; +} +EXPORT_SYMBOL_GPL(dma_buf_allocated_pages); + #ifdef CONFIG_DEBUG_FS static int dma_buf_debug_show(struct seq_file *s, void *unused) { diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 6fa761c9cc78..ccc7c40c8db7 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -16,6 +16,7 @@ #ifdef CONFIG_CMA #include <linux/cma.h> #endif +#include <linux/dma-buf.h> #include <asm/page.h> #include "internal.h" @@ -145,7 +146,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "CmaFree: ", global_zone_page_state(NR_FREE_CMA_PAGES)); #endif - +#ifdef CONFIG_DMA_SHARED_BUFFER + show_val_kb(m, "DmaBufTotal: ", dma_buf_allocated_pages()); +#endif hugetlb_report_meminfo(m); arch_report_meminfo(m); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index efdc56b9d95f..5b05816bd2cd 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -507,4 +507,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); +long dma_buf_allocated_pages(void); #endif /* __DMA_BUF_H__ */
This adds a total used dma-buf memory. Details can be found in debugfs, however it is not for everyone and not always available. dma-buf are indirect allocated by userspace. So with this value we can monitor and detect userspace applications that have problems. Signed-off-by: Peter Enderborg <peter.enderborg@sony.com> --- drivers/dma-buf/dma-buf.c | 13 +++++++++++++ fs/proc/meminfo.c | 5 ++++- include/linux/dma-buf.h | 1 + 3 files changed, 18 insertions(+), 1 deletion(-)