mbox series

[rfc,v2,0/5] add elevated refcnt support for page pool

Message ID 1625903002-31619-1-git-send-email-linyunsheng@huawei.com
Headers show
Series add elevated refcnt support for page pool | expand

Message

Yunsheng Lin July 10, 2021, 7:43 a.m. UTC
This patchset adds elevated refcnt support for page pool
and enable skb's page frag recycling based on page pool
in hns3 drvier.

RFC v2:
1. Split patch 1 to more reviewable one.
2. Repurpose the lower 12 bits of the dma address to store the
   pagecnt_bias as suggested by Alexander.
3. support recycling to pool->alloc for elevated refcnt case
   too.

Yunsheng Lin (5):
  page_pool: keep pp info as long as page pool owns the page
  page_pool: add interface for getting and setting pagecnt_bias
  page_pool: add page recycling support based on elevated refcnt
  page_pool: support page frag API for page pool
  net: hns3: support skb's frag page recycling based on page pool

 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c |  79 ++++++++++-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h |   3 +
 drivers/net/ethernet/marvell/mvneta.c           |   6 +-
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c |   2 +-
 drivers/net/ethernet/ti/cpsw.c                  |   2 +-
 drivers/net/ethernet/ti/cpsw_new.c              |   2 +-
 include/linux/skbuff.h                          |   4 +-
 include/net/page_pool.h                         |  50 +++++--
 net/core/page_pool.c                            | 172 ++++++++++++++++++++----
 9 files changed, 266 insertions(+), 54 deletions(-)

Comments

Matteo Croce July 10, 2021, 3:40 p.m. UTC | #1
On Sat, Jul 10, 2021 at 9:44 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> This patchset adds elevated refcnt support for page pool
> and enable skb's page frag recycling based on page pool
> in hns3 drvier.
>
> RFC v2:
> 1. Split patch 1 to more reviewable one.
> 2. Repurpose the lower 12 bits of the dma address to store the
>    pagecnt_bias as suggested by Alexander.
> 3. support recycling to pool->alloc for elevated refcnt case
>    too.
>
> Yunsheng Lin (5):
>   page_pool: keep pp info as long as page pool owns the page
>   page_pool: add interface for getting and setting pagecnt_bias
>   page_pool: add page recycling support based on elevated refcnt
>   page_pool: support page frag API for page pool
>   net: hns3: support skb's frag page recycling based on page pool
>
>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c |  79 ++++++++++-
>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.h |   3 +
>  drivers/net/ethernet/marvell/mvneta.c           |   6 +-
>  drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c |   2 +-
>  drivers/net/ethernet/ti/cpsw.c                  |   2 +-
>  drivers/net/ethernet/ti/cpsw_new.c              |   2 +-
>  include/linux/skbuff.h                          |   4 +-
>  include/net/page_pool.h                         |  50 +++++--
>  net/core/page_pool.c                            | 172 ++++++++++++++++++++----
>  9 files changed, 266 insertions(+), 54 deletions(-)
>
> --
> 2.7.4
>

For mvpp2:

Tested-by: Matteo Croce <mcroce@microsoft.com>
Alexander Duyck July 10, 2021, 4:55 p.m. UTC | #2
On Sat, Jul 10, 2021 at 12:44 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> As suggested by Alexander, "A DMA mapping should be page
> aligned anyway so the lower 12 bits would be reserved 0",
> so it might make more sense to repurpose the lower 12 bits
> of the dma address to store the pagecnt_bias for elevated
> refcnt case in page pool.
>
> As newly added page_pool_get_pagecnt_bias() may be called
> outside of the softirq context, so annotate the access to
> page->dma_addr[0] with READ_ONCE() and WRITE_ONCE().
>
> Other three interfaces using page->dma_addr[0] is only called
> in the softirq context during normal rx processing, hopefully
> the barrier in the rx processing will ensure the correct order
> between getting and setting pagecnt_bias.
>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/net/page_pool.h | 24 ++++++++++++++++++++++--
>  1 file changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index 8d7744d..5746f17 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -200,7 +200,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -       dma_addr_t ret = page->dma_addr[0];
> +       dma_addr_t ret = READ_ONCE(page->dma_addr[0]) & PAGE_MASK;
>         if (sizeof(dma_addr_t) > sizeof(unsigned long))
>                 ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
>         return ret;
> @@ -208,11 +208,31 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>
>  static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>  {
> -       page->dma_addr[0] = addr;
> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
> +
> +       dma_addr_0 &= ~PAGE_MASK;
> +       dma_addr_0 |= (addr & PAGE_MASK);

So rather than doing all this testing and clearing it would probably
be better to add a return value to the function and do something like:

if (WARN_ON(dma_addr_0 & ~PAGE_MASK))
    return -1;

That way you could have page_pool_dma_map unmap, free the page, and
return false indicating that the DMA mapping failed with a visible
error in the event that our expectionat that the dma_addr is page
aligned is ever violated.

> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
> +
>         if (sizeof(dma_addr_t) > sizeof(unsigned long))
>                 page->dma_addr[1] = upper_32_bits(addr);
>  }
>
> +static inline int page_pool_get_pagecnt_bias(struct page *page)
> +{
> +       return (READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK);

You don't need the parenthesis around the READ_ONCE and PAGE_MASK.

> +}
> +
> +static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
> +{
> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
> +
> +       dma_addr_0 &= PAGE_MASK;
> +       dma_addr_0 |= (bias & ~PAGE_MASK);
> +
> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
> +}
> +
>  static inline bool is_page_pool_compiled_in(void)
>  {
>  #ifdef CONFIG_PAGE_POOL
> --
> 2.7.4
>
Yunsheng Lin July 12, 2021, 7:44 a.m. UTC | #3
On 2021/7/11 0:55, Alexander Duyck wrote:
> On Sat, Jul 10, 2021 at 12:44 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:

>>

>> As suggested by Alexander, "A DMA mapping should be page

>> aligned anyway so the lower 12 bits would be reserved 0",

>> so it might make more sense to repurpose the lower 12 bits

>> of the dma address to store the pagecnt_bias for elevated

>> refcnt case in page pool.

>>

>> As newly added page_pool_get_pagecnt_bias() may be called

>> outside of the softirq context, so annotate the access to

>> page->dma_addr[0] with READ_ONCE() and WRITE_ONCE().

>>

>> Other three interfaces using page->dma_addr[0] is only called

>> in the softirq context during normal rx processing, hopefully

>> the barrier in the rx processing will ensure the correct order

>> between getting and setting pagecnt_bias.

>>

>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>

>> ---

>>  include/net/page_pool.h | 24 ++++++++++++++++++++++--

>>  1 file changed, 22 insertions(+), 2 deletions(-)

>>

>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h

>> index 8d7744d..5746f17 100644

>> --- a/include/net/page_pool.h

>> +++ b/include/net/page_pool.h

>> @@ -200,7 +200,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,

>>

>>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)

>>  {

>> -       dma_addr_t ret = page->dma_addr[0];

>> +       dma_addr_t ret = READ_ONCE(page->dma_addr[0]) & PAGE_MASK;

>>         if (sizeof(dma_addr_t) > sizeof(unsigned long))

>>                 ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;

>>         return ret;

>> @@ -208,11 +208,31 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)

>>

>>  static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)

>>  {

>> -       page->dma_addr[0] = addr;

>> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);

>> +

>> +       dma_addr_0 &= ~PAGE_MASK;

>> +       dma_addr_0 |= (addr & PAGE_MASK);

> 

> So rather than doing all this testing and clearing it would probably

> be better to add a return value to the function and do something like:

> 

> if (WARN_ON(dma_addr_0 & ~PAGE_MASK))

>     return -1;

> 

> That way you could have page_pool_dma_map unmap, free the page, and

> return false indicating that the DMA mapping failed with a visible

> error in the event that our expectionat that the dma_addr is page

> aligned is ever violated.


I suppose the above is based on that page_pool_set_dma_addr() is called
only once before page_pool_set_pagecnt_bias(), right? so we could:

static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
{
	if (WARN_ON(dma_addr_0 & ~PAGE_MASK))
		return false;

	page->dma_addr[0] = addr;
	if (sizeof(dma_addr_t) > sizeof(unsigned long))
		page->dma_addr[1] = upper_32_bits(addr);

	return true;
}

> 

>> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);

>> +

>>         if (sizeof(dma_addr_t) > sizeof(unsigned long))

>>                 page->dma_addr[1] = upper_32_bits(addr);

>>  }

>>

>> +static inline int page_pool_get_pagecnt_bias(struct page *page)

>> +{

>> +       return (READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK);

> 

> You don't need the parenthesis around the READ_ONCE and PAGE_MASK.


ok.

> 

>> +}

>> +

>> +static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)

>> +{

>> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);

>> +

>> +       dma_addr_0 &= PAGE_MASK;

>> +       dma_addr_0 |= (bias & ~PAGE_MASK);

>> +

>> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);

>> +}

>> +

>>  static inline bool is_page_pool_compiled_in(void)

>>  {

>>  #ifdef CONFIG_PAGE_POOL

>> --

>> 2.7.4

>>

> .

>