diff mbox series

ceph: implement writeback livelock avoidance using page tagging

Message ID 20230308025630.276866-1-xiubli@redhat.com
State New
Headers show
Series ceph: implement writeback livelock avoidance using page tagging | expand

Commit Message

Xiubo Li March 8, 2023, 2:56 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

While the mapped IOs continue if we try to flush a file's buffer
we can see that the fsync() won't complete until the IOs finish.

This is analogous to Jan Kara's commit (f446daaea9d4 mm: implement
writeback livelock avoidance using page tagging), we will try to
avoid livelocks of writeback when some steadily creates dirty pages
in a mapping we are writing out.

Cc: stable@vger.kernel.org
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/addr.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5731b82bf368..caf6ac5c1390 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -879,6 +879,7 @@  static int ceph_writepages_start(struct address_space *mapping,
 	bool should_loop, range_whole = false;
 	bool done = false;
 	bool caching = ceph_is_cache_enabled(inode);
+	xa_mark_t tag;
 
 	if (wbc->sync_mode == WB_SYNC_NONE &&
 	    fsc->write_congested)
@@ -905,6 +906,11 @@  static int ceph_writepages_start(struct address_space *mapping,
 	start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
 	index = start_index;
 
+	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
+		tag = PAGECACHE_TAG_TOWRITE;
+	} else {
+		tag = PAGECACHE_TAG_DIRTY;
+	}
 retry:
 	/* find oldest snap context with dirty data */
 	snapc = get_oldest_context(inode, &ceph_wbc, NULL);
@@ -943,6 +949,9 @@  static int ceph_writepages_start(struct address_space *mapping,
 		dout(" non-head snapc, range whole\n");
 	}
 
+	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+		tag_pages_for_writeback(mapping, index, end);
+
 	ceph_put_snap_context(last_snapc);
 	last_snapc = snapc;
 
@@ -959,7 +968,7 @@  static int ceph_writepages_start(struct address_space *mapping,
 
 get_more_pages:
 		pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
-						end, PAGECACHE_TAG_DIRTY);
+						      end, tag);
 		dout("pagevec_lookup_range_tag got %d\n", pvec_pages);
 		if (!pvec_pages && !locked_pages)
 			break;