Message ID | 20220901220138.182896-5-vishal.moola@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | Convert to filemap_get_folios_tag() | expand |
On Thu, Sep 01, 2022 at 03:01:19PM -0700, Vishal Moola (Oracle) wrote: > Converted function to use folios throughout. This is in preparation for > the removal of find_get_pages_range_tag(). > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> > --- > mm/page-writeback.c | 44 +++++++++++++++++++++++--------------------- > 1 file changed, 23 insertions(+), 21 deletions(-) > > diff --git a/mm/page-writeback.c b/mm/page-writeback.c > index 032a7bf8d259..087165357a5a 100644 > --- a/mm/page-writeback.c > +++ b/mm/page-writeback.c > @@ -2285,15 +2285,15 @@ int write_cache_pages(struct address_space *mapping, > int ret = 0; > int done = 0; > int error; > - struct pagevec pvec; > - int nr_pages; > + struct folio_batch fbatch; > + int nr_folios; > pgoff_t index; > pgoff_t end; /* Inclusive */ > pgoff_t done_index; > int range_whole = 0; > xa_mark_t tag; > > - pagevec_init(&pvec); > + folio_batch_init(&fbatch); > if (wbc->range_cyclic) { > index = mapping->writeback_index; /* prev offset */ > end = -1; > @@ -2313,17 +2313,18 @@ int write_cache_pages(struct address_space *mapping, > while (!done && (index <= end)) { > int i; > > - nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, > - tag); > - if (nr_pages == 0) > + nr_folios = filemap_get_folios_tag(mapping, &index, end, > + tag, &fbatch); This can find and return dirty multi-page folios if the filesystem enables them in the mapping at instantiation time, right? > + > + if (nr_folios == 0) > break; > > - for (i = 0; i < nr_pages; i++) { > - struct page *page = pvec.pages[i]; > + for (i = 0; i < nr_folios; i++) { > + struct folio *folio = fbatch.folios[i]; > > - done_index = page->index; > + done_index = folio->index; > > - lock_page(page); > + folio_lock(folio); > > /* > * Page truncated or invalidated. We can freely skip it > @@ -2333,30 +2334,30 @@ int write_cache_pages(struct address_space *mapping, > * even if there is now a new, dirty page at the same > * pagecache address. > */ > - if (unlikely(page->mapping != mapping)) { > + if (unlikely(folio->mapping != mapping)) { > continue_unlock: > - unlock_page(page); > + folio_unlock(folio); > continue; > } > > - if (!PageDirty(page)) { > + if (!folio_test_dirty(folio)) { > /* someone wrote it for us */ > goto continue_unlock; > } > > - if (PageWriteback(page)) { > + if (folio_test_writeback(folio)) { > if (wbc->sync_mode != WB_SYNC_NONE) > - wait_on_page_writeback(page); > + folio_wait_writeback(folio); > else > goto continue_unlock; > } > > - BUG_ON(PageWriteback(page)); > - if (!clear_page_dirty_for_io(page)) > + BUG_ON(folio_test_writeback(folio)); > + if (!folio_clear_dirty_for_io(folio)) > goto continue_unlock; > > trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); > - error = (*writepage)(page, wbc, data); > + error = writepage(&folio->page, wbc, data); Yet, IIUC, this treats all folios as if they are single page folios. i.e. it passes the head page of a multi-page folio to a callback that will treat it as a single PAGE_SIZE page, because that's all the writepage callbacks are currently expected to be passed... So won't this break writeback of dirty multipage folios? -Dave.
On Wed, Oct 19, 2022 at 08:01:52AM +1100, Dave Chinner wrote: > On Thu, Sep 01, 2022 at 03:01:19PM -0700, Vishal Moola (Oracle) wrote: > > Converted function to use folios throughout. This is in preparation for > > the removal of find_get_pages_range_tag(). > > > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> > > --- > > mm/page-writeback.c | 44 +++++++++++++++++++++++--------------------- > > 1 file changed, 23 insertions(+), 21 deletions(-) > > > > diff --git a/mm/page-writeback.c b/mm/page-writeback.c > > index 032a7bf8d259..087165357a5a 100644 > > --- a/mm/page-writeback.c > > +++ b/mm/page-writeback.c > > @@ -2285,15 +2285,15 @@ int write_cache_pages(struct address_space *mapping, > > int ret = 0; > > int done = 0; > > int error; > > - struct pagevec pvec; > > - int nr_pages; > > + struct folio_batch fbatch; > > + int nr_folios; > > pgoff_t index; > > pgoff_t end; /* Inclusive */ > > pgoff_t done_index; > > int range_whole = 0; > > xa_mark_t tag; > > > > - pagevec_init(&pvec); > > + folio_batch_init(&fbatch); > > if (wbc->range_cyclic) { > > index = mapping->writeback_index; /* prev offset */ > > end = -1; > > @@ -2313,17 +2313,18 @@ int write_cache_pages(struct address_space *mapping, > > while (!done && (index <= end)) { > > int i; > > > > - nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, > > - tag); > > - if (nr_pages == 0) > > + nr_folios = filemap_get_folios_tag(mapping, &index, end, > > + tag, &fbatch); > > This can find and return dirty multi-page folios if the filesystem > enables them in the mapping at instantiation time, right? Yup, it will. > > + > > + if (nr_folios == 0) > > break; > > > > - for (i = 0; i < nr_pages; i++) { > > - struct page *page = pvec.pages[i]; > > + for (i = 0; i < nr_folios; i++) { > > + struct folio *folio = fbatch.folios[i]; > > > > - done_index = page->index; > > + done_index = folio->index; > > > > - lock_page(page); > > + folio_lock(folio); > > > > /* > > * Page truncated or invalidated. We can freely skip it > > @@ -2333,30 +2334,30 @@ int write_cache_pages(struct address_space *mapping, > > * even if there is now a new, dirty page at the same > > * pagecache address. > > */ > > - if (unlikely(page->mapping != mapping)) { > > + if (unlikely(folio->mapping != mapping)) { > > continue_unlock: > > - unlock_page(page); > > + folio_unlock(folio); > > continue; > > } > > > > - if (!PageDirty(page)) { > > + if (!folio_test_dirty(folio)) { > > /* someone wrote it for us */ > > goto continue_unlock; > > } > > > > - if (PageWriteback(page)) { > > + if (folio_test_writeback(folio)) { > > if (wbc->sync_mode != WB_SYNC_NONE) > > - wait_on_page_writeback(page); > > + folio_wait_writeback(folio); > > else > > goto continue_unlock; > > } > > > > - BUG_ON(PageWriteback(page)); > > - if (!clear_page_dirty_for_io(page)) > > + BUG_ON(folio_test_writeback(folio)); > > + if (!folio_clear_dirty_for_io(folio)) > > goto continue_unlock; > > > > trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); > > - error = (*writepage)(page, wbc, data); > > + error = writepage(&folio->page, wbc, data); > > Yet, IIUC, this treats all folios as if they are single page folios. > i.e. it passes the head page of a multi-page folio to a callback > that will treat it as a single PAGE_SIZE page, because that's all > the writepage callbacks are currently expected to be passed... > > So won't this break writeback of dirty multipage folios? Yes, it appears it would. But it wouldn't because its already 'broken'. The current find_get_pages_range_tag() actually has the exact same issue. The current code to fill up the pages array is: pages[ret] = &folio->page; if (++ret == nr_pages) { *index = folio->index + folio_nr_pages(folio); goto out; which behaves the same way as the issue you pointed out (both break large folios). When I spoke to Matthew about this earlier, we decided to go ahead with replacing the function and leave it up to the callers to fix/handle large folios when the filesystem gets to it. Its not great to leave it 'broken' but its something that isn't - or at least shouldn't be - creating any problems at present. And I believe Matthew has plans to address them at some point before they actually become problems?
On Fri, Nov 04, 2022 at 11:32:35AM +1100, Dave Chinner wrote: > At minimum, it needs to be documented, though I'd much prefer that > we explicitly duplicate write_cache_pages() as write_cache_folios() > with a callback that takes a folio and change the code to be fully > multi-page folio safe. Then filesystems that support folios (and > large folios) natively can be passed folios without going through > this crappy "folio->page, page->folio" dance because the writepage > APIs are unaware of multi-page folio constructs. There are a lot of places which go through the folio->page->folio dance, and this one wasn't even close to the top of my list. That said, it has a fairly small number of callers -- ext4, fuse, iomap, mpage, nfs, orangefs. So Vishal, this seems like a good project for you to take on next -- convert write_cache_pages() to write_cache_folios() and writepage_t to write_folio_t.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 032a7bf8d259..087165357a5a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2285,15 +2285,15 @@ int write_cache_pages(struct address_space *mapping, int ret = 0; int done = 0; int error; - struct pagevec pvec; - int nr_pages; + struct folio_batch fbatch; + int nr_folios; pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; int range_whole = 0; xa_mark_t tag; - pagevec_init(&pvec); + folio_batch_init(&fbatch); if (wbc->range_cyclic) { index = mapping->writeback_index; /* prev offset */ end = -1; @@ -2313,17 +2313,18 @@ int write_cache_pages(struct address_space *mapping, while (!done && (index <= end)) { int i; - nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, - tag); - if (nr_pages == 0) + nr_folios = filemap_get_folios_tag(mapping, &index, end, + tag, &fbatch); + + if (nr_folios == 0) break; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; + for (i = 0; i < nr_folios; i++) { + struct folio *folio = fbatch.folios[i]; - done_index = page->index; + done_index = folio->index; - lock_page(page); + folio_lock(folio); /* * Page truncated or invalidated. We can freely skip it @@ -2333,30 +2334,30 @@ int write_cache_pages(struct address_space *mapping, * even if there is now a new, dirty page at the same * pagecache address. */ - if (unlikely(page->mapping != mapping)) { + if (unlikely(folio->mapping != mapping)) { continue_unlock: - unlock_page(page); + folio_unlock(folio); continue; } - if (!PageDirty(page)) { + if (!folio_test_dirty(folio)) { /* someone wrote it for us */ goto continue_unlock; } - if (PageWriteback(page)) { + if (folio_test_writeback(folio)) { if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); + folio_wait_writeback(folio); else goto continue_unlock; } - BUG_ON(PageWriteback(page)); - if (!clear_page_dirty_for_io(page)) + BUG_ON(folio_test_writeback(folio)); + if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); - error = (*writepage)(page, wbc, data); + error = writepage(&folio->page, wbc, data); if (unlikely(error)) { /* * Handle errors according to the type of @@ -2371,11 +2372,12 @@ int write_cache_pages(struct address_space *mapping, * the first error. */ if (error == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); + folio_unlock(folio); error = 0; } else if (wbc->sync_mode != WB_SYNC_ALL) { ret = error; - done_index = page->index + 1; + done_index = folio->index + + folio_nr_pages(folio); done = 1; break; } @@ -2395,7 +2397,7 @@ int write_cache_pages(struct address_space *mapping, break; } } - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); }
Converted function to use folios throughout. This is in preparation for the removal of find_get_pages_range_tag(). Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> --- mm/page-writeback.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-)