diff mbox series

[v5,11/12] mm: zswap: Restructure & simplify zswap_store() to make it amenable for batching.

Message ID 20241221063119.29140-12-kanchana.p.sridhar@intel.com
State New
Headers show
Series zswap IAA compress batching | expand

Commit Message

Sridhar, Kanchana P Dec. 21, 2024, 6:31 a.m. UTC
This patch introduces zswap_store_folio() that implements all the computes
done earlier in zswap_store_page() for a single-page, for all the pages in
a folio. This allows us to move the loop over the folio's pages from
zswap_store() to zswap_store_folio().

A distinct zswap_compress_folio() is also added, that simply calls
zswap_compress() for each page in the folio it is called with.

zswap_store_folio() starts by allocating all zswap entries required to
store the folio. Next, it calls zswap_compress_folio() and finally, adds
the entries to the xarray and LRU.

The error handling and cleanup required for all failure scenarios that can
occur while storing a folio in zswap is now consolidated to a
"store_folio_failed" label in zswap_store_folio().

These changes facilitate developing support for compress batching in
zswap_store_folio().

Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
---
 mm/zswap.c | 183 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 116 insertions(+), 67 deletions(-)

Comments

Sridhar, Kanchana P Jan. 8, 2025, 3:57 a.m. UTC | #1
> -----Original Message-----
> From: Yosry Ahmed <yosryahmed@google.com>
> Sent: Monday, January 6, 2025 5:17 PM
> To: Sridhar, Kanchana P <kanchana.p.sridhar@intel.com>
> Cc: linux-kernel@vger.kernel.org; linux-mm@kvack.org;
> hannes@cmpxchg.org; nphamcs@gmail.com; chengming.zhou@linux.dev;
> usamaarif642@gmail.com; ryan.roberts@arm.com; 21cnbao@gmail.com;
> akpm@linux-foundation.org; linux-crypto@vger.kernel.org;
> herbert@gondor.apana.org.au; davem@davemloft.net;
> clabbe@baylibre.com; ardb@kernel.org; ebiggers@google.com;
> surenb@google.com; Accardi, Kristen C <kristen.c.accardi@intel.com>;
> Feghali, Wajdi K <wajdi.k.feghali@intel.com>; Gopal, Vinodh
> <vinodh.gopal@intel.com>
> Subject: Re: [PATCH v5 11/12] mm: zswap: Restructure & simplify
> zswap_store() to make it amenable for batching.
> 
> On Fri, Dec 20, 2024 at 10:31 PM Kanchana P Sridhar
> <kanchana.p.sridhar@intel.com> wrote:
> >
> > This patch introduces zswap_store_folio() that implements all the computes
> > done earlier in zswap_store_page() for a single-page, for all the pages in
> > a folio. This allows us to move the loop over the folio's pages from
> > zswap_store() to zswap_store_folio().
> >
> > A distinct zswap_compress_folio() is also added, that simply calls
> > zswap_compress() for each page in the folio it is called with.
> 
> The git diff looks funky, it may make things clearer to introduce
> zswap_compress_folio() in a separate patch.

Ok, will do so.

> 
> >
> > zswap_store_folio() starts by allocating all zswap entries required to
> > store the folio. Next, it calls zswap_compress_folio() and finally, adds
> > the entries to the xarray and LRU.
> >
> > The error handling and cleanup required for all failure scenarios that can
> > occur while storing a folio in zswap is now consolidated to a
> > "store_folio_failed" label in zswap_store_folio().
> >
> > These changes facilitate developing support for compress batching in
> > zswap_store_folio().
> >
> > Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> > ---
> >  mm/zswap.c | 183 +++++++++++++++++++++++++++++++++-----------------
> ---
> >  1 file changed, 116 insertions(+), 67 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index 99cd78891fd0..1be0f1807bfc 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -1467,77 +1467,129 @@ static void shrink_worker(struct work_struct
> *w)
> >  * main API
> >  **********************************/
> >
> > -static ssize_t zswap_store_page(struct page *page,
> > -                               struct obj_cgroup *objcg,
> > -                               struct zswap_pool *pool)
> > +static bool zswap_compress_folio(struct folio *folio,
> > +                                struct zswap_entry *entries[],
> > +                                struct zswap_pool *pool)
> >  {
> > -       swp_entry_t page_swpentry = page_swap_entry(page);
> > -       struct zswap_entry *entry, *old;
> > +       long index, nr_pages = folio_nr_pages(folio);
> >
> > -       /* allocate entry */
> > -       entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> > -       if (!entry) {
> > -               zswap_reject_kmemcache_fail++;
> > -               return -EINVAL;
> > +       for (index = 0; index < nr_pages; ++index) {
> > +               struct page *page = folio_page(folio, index);
> > +
> > +               if (!zswap_compress(page, entries[index], pool))
> > +                       return false;
> >         }
> >
> > -       if (!zswap_compress(page, entry, pool))
> > -               goto compress_failed;
> > +       return true;
> > +}
> >
> > -       old = xa_store(swap_zswap_tree(page_swpentry),
> > -                      swp_offset(page_swpentry),
> > -                      entry, GFP_KERNEL);
> > -       if (xa_is_err(old)) {
> > -               int err = xa_err(old);
> > +/*
> > + * Store all pages in a folio.
> > + *
> > + * The error handling from all failure points is consolidated to the
> > + * "store_folio_failed" label, based on the initialization of the zswap
> entries'
> > + * handles to ERR_PTR(-EINVAL) at allocation time, and the fact that the
> > + * entry's handle is subsequently modified only upon a successful
> zpool_malloc()
> > + * after the page is compressed.
> > + */
> > +static ssize_t zswap_store_folio(struct folio *folio,
> > +                                struct obj_cgroup *objcg,
> > +                                struct zswap_pool *pool)
> > +{
> > +       long index, nr_pages = folio_nr_pages(folio);
> > +       struct zswap_entry **entries = NULL;
> > +       int node_id = folio_nid(folio);
> > +       size_t compressed_bytes = 0;
> >
> > -               WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n",
> err);
> > -               zswap_reject_alloc_fail++;
> > -               goto store_failed;
> > +       entries = kmalloc(nr_pages * sizeof(*entries), GFP_KERNEL);
> 
> We can probably use kcalloc() here.

I am a little worried about the latency penalty of kcalloc() in the reclaim path,
especially since I am not relying on zero-initialized memory for "entries"..

> 
> > +       if (!entries)
> > +               return -ENOMEM;
> > +
> > +       /* allocate entries */
> 
> This comment can be dropped.

Sure.

> 
> > +       for (index = 0; index < nr_pages; ++index) {
> > +               entries[index] = zswap_entry_cache_alloc(GFP_KERNEL,
> node_id);
> > +
> > +               if (!entries[index]) {
> > +                       zswap_reject_kmemcache_fail++;
> > +                       nr_pages = index;
> > +                       goto store_folio_failed;
> > +               }
> > +
> > +               entries[index]->handle = (unsigned long)ERR_PTR(-EINVAL);
> >         }
> >
> > -       /*
> > -        * We may have had an existing entry that became stale when
> > -        * the folio was redirtied and now the new version is being
> > -        * swapped out. Get rid of the old.
> > -        */
> > -       if (old)
> > -               zswap_entry_free(old);
> > +       if (!zswap_compress_folio(folio, entries, pool))
> > +               goto store_folio_failed;
> >
> > -       /*
> > -        * The entry is successfully compressed and stored in the tree, there is
> > -        * no further possibility of failure. Grab refs to the pool and objcg.
> > -        * These refs will be dropped by zswap_entry_free() when the entry is
> > -        * removed from the tree.
> > -        */
> > -       zswap_pool_get(pool);
> > -       if (objcg)
> > -               obj_cgroup_get(objcg);
> > +       for (index = 0; index < nr_pages; ++index) {
> > +               swp_entry_t page_swpentry = page_swap_entry(folio_page(folio,
> index));
> > +               struct zswap_entry *old, *entry = entries[index];
> > +
> > +               old = xa_store(swap_zswap_tree(page_swpentry),
> > +                              swp_offset(page_swpentry),
> > +                              entry, GFP_KERNEL);
> > +               if (xa_is_err(old)) {
> > +                       int err = xa_err(old);
> > +
> > +                       WARN_ONCE(err != -ENOMEM, "unexpected xarray error:
> %d\n", err);
> > +                       zswap_reject_alloc_fail++;
> > +                       goto store_folio_failed;
> > +               }
> >
> > -       /*
> > -        * We finish initializing the entry while it's already in xarray.
> > -        * This is safe because:
> > -        *
> > -        * 1. Concurrent stores and invalidations are excluded by folio lock.
> > -        *
> > -        * 2. Writeback is excluded by the entry not being on the LRU yet.
> > -        *    The publishing order matters to prevent writeback from seeing
> > -        *    an incoherent entry.
> > -        */
> > -       entry->pool = pool;
> > -       entry->swpentry = page_swpentry;
> > -       entry->objcg = objcg;
> > -       entry->referenced = true;
> > -       if (entry->length) {
> > -               INIT_LIST_HEAD(&entry->lru);
> > -               zswap_lru_add(&zswap_list_lru, entry);
> > +               /*
> > +                * We may have had an existing entry that became stale when
> > +                * the folio was redirtied and now the new version is being
> > +                * swapped out. Get rid of the old.
> > +                */
> > +               if (old)
> > +                       zswap_entry_free(old);
> > +
> > +               /*
> > +                * The entry is successfully compressed and stored in the tree,
> there is
> > +                * no further possibility of failure. Grab refs to the pool and objcg.
> > +                * These refs will be dropped by zswap_entry_free() when the
> entry is
> > +                * removed from the tree.
> > +                */
> > +               zswap_pool_get(pool);
> > +               if (objcg)
> > +                       obj_cgroup_get(objcg);
> > +
> > +               /*
> > +                * We finish initializing the entry while it's already in xarray.
> > +                * This is safe because:
> > +                *
> > +                * 1. Concurrent stores and invalidations are excluded by folio
> lock.
> > +                *
> > +                * 2. Writeback is excluded by the entry not being on the LRU yet.
> > +                *    The publishing order matters to prevent writeback from seeing
> > +                *    an incoherent entry.
> > +                */
> > +               entry->pool = pool;
> > +               entry->swpentry = page_swpentry;
> > +               entry->objcg = objcg;
> > +               entry->referenced = true;
> > +               if (entry->length) {
> > +                       INIT_LIST_HEAD(&entry->lru);
> > +                       zswap_lru_add(&zswap_list_lru, entry);
> > +               }
> > +
> > +               compressed_bytes += entry->length;
> >         }
> >
> > -       return entry->length;
> > +       kfree(entries);
> > +
> > +       return compressed_bytes;
> > +
> > +store_folio_failed:
> > +       for (index = 0; index < nr_pages; ++index) {
> > +               if (!IS_ERR_VALUE(entries[index]->handle))
> > +                       zpool_free(pool->zpool, entries[index]->handle);
> > +
> > +               zswap_entry_cache_free(entries[index]);
> > +       }
> 
> If there is a failure in xa_store() halfway through the entries, this
> loop will free all the compressed objects and entries. But, some of
> the entries are already in the xarray, and zswap_store() will try to
> free them again. This seems like a bug, or did I miss something here?

Thanks, great catch! Yes, this is a bug. I have a simple fix implemented,
that I am currently testing and will include in v6.

> 
> > +
> > +       kfree(entries);
> >
> > -store_failed:
> > -       zpool_free(pool->zpool, entry->handle);
> > -compress_failed:
> > -       zswap_entry_cache_free(entry);
> >         return -EINVAL;
> >  }
> >
> > @@ -1549,8 +1601,8 @@ bool zswap_store(struct folio *folio)
> >         struct mem_cgroup *memcg = NULL;
> >         struct zswap_pool *pool;
> >         size_t compressed_bytes = 0;
> > +       ssize_t bytes;
> >         bool ret = false;
> > -       long index;
> >
> >         VM_WARN_ON_ONCE(!folio_test_locked(folio));
> >         VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> > @@ -1584,15 +1636,11 @@ bool zswap_store(struct folio *folio)
> >                 mem_cgroup_put(memcg);
> >         }
> >
> > -       for (index = 0; index < nr_pages; ++index) {
> > -               struct page *page = folio_page(folio, index);
> > -               ssize_t bytes;
> > +       bytes = zswap_store_folio(folio, objcg, pool);
> > +       if (bytes < 0)
> > +               goto put_pool;
> >
> > -               bytes = zswap_store_page(page, objcg, pool);
> > -               if (bytes < 0)
> > -                       goto put_pool;
> > -               compressed_bytes += bytes;
> > -       }
> > +       compressed_bytes = bytes;
> 
> What's the point of having both compressed_bytes and bytes now?

The main reason was to cleanly handle a negative error value returned in "bytes"
(declared as ssize_t), as against a true total "compressed_bytes" (declared as size_t)
for the folio to use for objcg charging. This is similar to the current mainline
code where zswap_store() calls zswap_store_page(). I was hoping to avoid potential
issues with overflow/underflow, and for maintainability. Let me know if this is Ok.

Thanks,
Kanchana

> 
> >
> >         if (objcg) {
> >                 obj_cgroup_charge_zswap(objcg, compressed_bytes);
> > @@ -1622,6 +1670,7 @@ bool zswap_store(struct folio *folio)
> >                 pgoff_t offset = swp_offset(swp);
> >                 struct zswap_entry *entry;
> >                 struct xarray *tree;
> > +               long index;
> >
> >                 for (index = 0; index < nr_pages; ++index) {
> >                         tree = swap_zswap_tree(swp_entry(type, offset + index));
> > --
> > 2.27.0
> >
Yosry Ahmed Jan. 8, 2025, 4:22 a.m. UTC | #2
[..]
> > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > index 99cd78891fd0..1be0f1807bfc 100644
> > > --- a/mm/zswap.c
> > > +++ b/mm/zswap.c
> > > @@ -1467,77 +1467,129 @@ static void shrink_worker(struct work_struct
> > *w)
> > >  * main API
> > >  **********************************/
> > >
> > > -static ssize_t zswap_store_page(struct page *page,
> > > -                               struct obj_cgroup *objcg,
> > > -                               struct zswap_pool *pool)
> > > +static bool zswap_compress_folio(struct folio *folio,
> > > +                                struct zswap_entry *entries[],
> > > +                                struct zswap_pool *pool)
> > >  {
> > > -       swp_entry_t page_swpentry = page_swap_entry(page);
> > > -       struct zswap_entry *entry, *old;
> > > +       long index, nr_pages = folio_nr_pages(folio);
> > >
> > > -       /* allocate entry */
> > > -       entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> > > -       if (!entry) {
> > > -               zswap_reject_kmemcache_fail++;
> > > -               return -EINVAL;
> > > +       for (index = 0; index < nr_pages; ++index) {
> > > +               struct page *page = folio_page(folio, index);
> > > +
> > > +               if (!zswap_compress(page, entries[index], pool))
> > > +                       return false;
> > >         }
> > >
> > > -       if (!zswap_compress(page, entry, pool))
> > > -               goto compress_failed;
> > > +       return true;
> > > +}
> > >
> > > -       old = xa_store(swap_zswap_tree(page_swpentry),
> > > -                      swp_offset(page_swpentry),
> > > -                      entry, GFP_KERNEL);
> > > -       if (xa_is_err(old)) {
> > > -               int err = xa_err(old);
> > > +/*
> > > + * Store all pages in a folio.
> > > + *
> > > + * The error handling from all failure points is consolidated to the
> > > + * "store_folio_failed" label, based on the initialization of the zswap
> > entries'
> > > + * handles to ERR_PTR(-EINVAL) at allocation time, and the fact that the
> > > + * entry's handle is subsequently modified only upon a successful
> > zpool_malloc()
> > > + * after the page is compressed.
> > > + */
> > > +static ssize_t zswap_store_folio(struct folio *folio,
> > > +                                struct obj_cgroup *objcg,
> > > +                                struct zswap_pool *pool)
> > > +{
> > > +       long index, nr_pages = folio_nr_pages(folio);
> > > +       struct zswap_entry **entries = NULL;
> > > +       int node_id = folio_nid(folio);
> > > +       size_t compressed_bytes = 0;
> > >
> > > -               WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n",
> > err);
> > > -               zswap_reject_alloc_fail++;
> > > -               goto store_failed;
> > > +       entries = kmalloc(nr_pages * sizeof(*entries), GFP_KERNEL);
> >
> > We can probably use kcalloc() here.
>
> I am a little worried about the latency penalty of kcalloc() in the reclaim path,
> especially since I am not relying on zero-initialized memory for "entries"..

Hmm good point, for a 2M THP we could be allocating an entire page here.

[..]
> > > @@ -1549,8 +1601,8 @@ bool zswap_store(struct folio *folio)
> > >         struct mem_cgroup *memcg = NULL;
> > >         struct zswap_pool *pool;
> > >         size_t compressed_bytes = 0;
> > > +       ssize_t bytes;
> > >         bool ret = false;
> > > -       long index;
> > >
> > >         VM_WARN_ON_ONCE(!folio_test_locked(folio));
> > >         VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> > > @@ -1584,15 +1636,11 @@ bool zswap_store(struct folio *folio)
> > >                 mem_cgroup_put(memcg);
> > >         }
> > >
> > > -       for (index = 0; index < nr_pages; ++index) {
> > > -               struct page *page = folio_page(folio, index);
> > > -               ssize_t bytes;
> > > +       bytes = zswap_store_folio(folio, objcg, pool);
> > > +       if (bytes < 0)
> > > +               goto put_pool;
> > >
> > > -               bytes = zswap_store_page(page, objcg, pool);
> > > -               if (bytes < 0)
> > > -                       goto put_pool;
> > > -               compressed_bytes += bytes;
> > > -       }
> > > +       compressed_bytes = bytes;
> >
> > What's the point of having both compressed_bytes and bytes now?
>
> The main reason was to cleanly handle a negative error value returned in "bytes"
> (declared as ssize_t), as against a true total "compressed_bytes" (declared as size_t)
> for the folio to use for objcg charging. This is similar to the current mainline
> code where zswap_store() calls zswap_store_page(). I was hoping to avoid potential
> issues with overflow/underflow, and for maintainability. Let me know if this is Ok.

It makes sense in the current mainline because we store the return
value of each call to zswap_store_page() in 'bytes', then check if
it's an error value, then add it to 'compressed_bytes'. Now we have a
single call to zswap_store_folio() and a single return value. AFAICT,
there is currently no benefit to storing it in 'bytes', checking it,
then moving it to 'compressed_bytes'. The compiler will probably
optimize the variable away anyway, but it looks weird.
diff mbox series

Patch

diff --git a/mm/zswap.c b/mm/zswap.c
index 99cd78891fd0..1be0f1807bfc 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1467,77 +1467,129 @@  static void shrink_worker(struct work_struct *w)
 * main API
 **********************************/
 
-static ssize_t zswap_store_page(struct page *page,
-				struct obj_cgroup *objcg,
-				struct zswap_pool *pool)
+static bool zswap_compress_folio(struct folio *folio,
+				 struct zswap_entry *entries[],
+				 struct zswap_pool *pool)
 {
-	swp_entry_t page_swpentry = page_swap_entry(page);
-	struct zswap_entry *entry, *old;
+	long index, nr_pages = folio_nr_pages(folio);
 
-	/* allocate entry */
-	entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
-	if (!entry) {
-		zswap_reject_kmemcache_fail++;
-		return -EINVAL;
+	for (index = 0; index < nr_pages; ++index) {
+		struct page *page = folio_page(folio, index);
+
+		if (!zswap_compress(page, entries[index], pool))
+			return false;
 	}
 
-	if (!zswap_compress(page, entry, pool))
-		goto compress_failed;
+	return true;
+}
 
-	old = xa_store(swap_zswap_tree(page_swpentry),
-		       swp_offset(page_swpentry),
-		       entry, GFP_KERNEL);
-	if (xa_is_err(old)) {
-		int err = xa_err(old);
+/*
+ * Store all pages in a folio.
+ *
+ * The error handling from all failure points is consolidated to the
+ * "store_folio_failed" label, based on the initialization of the zswap entries'
+ * handles to ERR_PTR(-EINVAL) at allocation time, and the fact that the
+ * entry's handle is subsequently modified only upon a successful zpool_malloc()
+ * after the page is compressed.
+ */
+static ssize_t zswap_store_folio(struct folio *folio,
+				 struct obj_cgroup *objcg,
+				 struct zswap_pool *pool)
+{
+	long index, nr_pages = folio_nr_pages(folio);
+	struct zswap_entry **entries = NULL;
+	int node_id = folio_nid(folio);
+	size_t compressed_bytes = 0;
 
-		WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
-		zswap_reject_alloc_fail++;
-		goto store_failed;
+	entries = kmalloc(nr_pages * sizeof(*entries), GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	/* allocate entries */
+	for (index = 0; index < nr_pages; ++index) {
+		entries[index] = zswap_entry_cache_alloc(GFP_KERNEL, node_id);
+
+		if (!entries[index]) {
+			zswap_reject_kmemcache_fail++;
+			nr_pages = index;
+			goto store_folio_failed;
+		}
+
+		entries[index]->handle = (unsigned long)ERR_PTR(-EINVAL);
 	}
 
-	/*
-	 * We may have had an existing entry that became stale when
-	 * the folio was redirtied and now the new version is being
-	 * swapped out. Get rid of the old.
-	 */
-	if (old)
-		zswap_entry_free(old);
+	if (!zswap_compress_folio(folio, entries, pool))
+		goto store_folio_failed;
 
-	/*
-	 * The entry is successfully compressed and stored in the tree, there is
-	 * no further possibility of failure. Grab refs to the pool and objcg.
-	 * These refs will be dropped by zswap_entry_free() when the entry is
-	 * removed from the tree.
-	 */
-	zswap_pool_get(pool);
-	if (objcg)
-		obj_cgroup_get(objcg);
+	for (index = 0; index < nr_pages; ++index) {
+		swp_entry_t page_swpentry = page_swap_entry(folio_page(folio, index));
+		struct zswap_entry *old, *entry = entries[index];
+
+		old = xa_store(swap_zswap_tree(page_swpentry),
+			       swp_offset(page_swpentry),
+			       entry, GFP_KERNEL);
+		if (xa_is_err(old)) {
+			int err = xa_err(old);
+
+			WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
+			zswap_reject_alloc_fail++;
+			goto store_folio_failed;
+		}
 
-	/*
-	 * We finish initializing the entry while it's already in xarray.
-	 * This is safe because:
-	 *
-	 * 1. Concurrent stores and invalidations are excluded by folio lock.
-	 *
-	 * 2. Writeback is excluded by the entry not being on the LRU yet.
-	 *    The publishing order matters to prevent writeback from seeing
-	 *    an incoherent entry.
-	 */
-	entry->pool = pool;
-	entry->swpentry = page_swpentry;
-	entry->objcg = objcg;
-	entry->referenced = true;
-	if (entry->length) {
-		INIT_LIST_HEAD(&entry->lru);
-		zswap_lru_add(&zswap_list_lru, entry);
+		/*
+		 * We may have had an existing entry that became stale when
+		 * the folio was redirtied and now the new version is being
+		 * swapped out. Get rid of the old.
+		 */
+		if (old)
+			zswap_entry_free(old);
+
+		/*
+		 * The entry is successfully compressed and stored in the tree, there is
+		 * no further possibility of failure. Grab refs to the pool and objcg.
+		 * These refs will be dropped by zswap_entry_free() when the entry is
+		 * removed from the tree.
+		 */
+		zswap_pool_get(pool);
+		if (objcg)
+			obj_cgroup_get(objcg);
+
+		/*
+		 * We finish initializing the entry while it's already in xarray.
+		 * This is safe because:
+		 *
+		 * 1. Concurrent stores and invalidations are excluded by folio lock.
+		 *
+		 * 2. Writeback is excluded by the entry not being on the LRU yet.
+		 *    The publishing order matters to prevent writeback from seeing
+		 *    an incoherent entry.
+		 */
+		entry->pool = pool;
+		entry->swpentry = page_swpentry;
+		entry->objcg = objcg;
+		entry->referenced = true;
+		if (entry->length) {
+			INIT_LIST_HEAD(&entry->lru);
+			zswap_lru_add(&zswap_list_lru, entry);
+		}
+
+		compressed_bytes += entry->length;
 	}
 
-	return entry->length;
+	kfree(entries);
+
+	return compressed_bytes;
+
+store_folio_failed:
+	for (index = 0; index < nr_pages; ++index) {
+		if (!IS_ERR_VALUE(entries[index]->handle))
+			zpool_free(pool->zpool, entries[index]->handle);
+
+		zswap_entry_cache_free(entries[index]);
+	}
+
+	kfree(entries);
 
-store_failed:
-	zpool_free(pool->zpool, entry->handle);
-compress_failed:
-	zswap_entry_cache_free(entry);
 	return -EINVAL;
 }
 
@@ -1549,8 +1601,8 @@  bool zswap_store(struct folio *folio)
 	struct mem_cgroup *memcg = NULL;
 	struct zswap_pool *pool;
 	size_t compressed_bytes = 0;
+	ssize_t bytes;
 	bool ret = false;
-	long index;
 
 	VM_WARN_ON_ONCE(!folio_test_locked(folio));
 	VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1584,15 +1636,11 @@  bool zswap_store(struct folio *folio)
 		mem_cgroup_put(memcg);
 	}
 
-	for (index = 0; index < nr_pages; ++index) {
-		struct page *page = folio_page(folio, index);
-		ssize_t bytes;
+	bytes = zswap_store_folio(folio, objcg, pool);
+	if (bytes < 0)
+		goto put_pool;
 
-		bytes = zswap_store_page(page, objcg, pool);
-		if (bytes < 0)
-			goto put_pool;
-		compressed_bytes += bytes;
-	}
+	compressed_bytes = bytes;
 
 	if (objcg) {
 		obj_cgroup_charge_zswap(objcg, compressed_bytes);
@@ -1622,6 +1670,7 @@  bool zswap_store(struct folio *folio)
 		pgoff_t offset = swp_offset(swp);
 		struct zswap_entry *entry;
 		struct xarray *tree;
+		long index;
 
 		for (index = 0; index < nr_pages; ++index) {
 			tree = swap_zswap_tree(swp_entry(type, offset + index));