diff mbox series

[RFC,04/14] mm: swap: swap cache support for virtualized swap

Message ID 20250407234223.1059191-5-nphamcs@gmail.com
State New
Headers show
Series [RFC,01/14] swapfile: rearrange functions | expand

Commit Message

Nhat Pham April 7, 2025, 11:42 p.m. UTC
Currently, the swap cache code assumes that the swap space is of a fixed
size. The virtual swap space is dynamically sized, so the existing
partitioning code cannot be easily reused.  A dynamic partitioning is
planned, but for now keep the design simple and just use a flat
swapcache for vswap.

Since the vswap's implementation has begun to diverge from the old
implementation, we also introduce a new build config
(CONFIG_VIRTUAL_SWAP). Users who do not select this config will get the
old implementation, with no behavioral change.

Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 mm/Kconfig      | 13 ++++++++++
 mm/swap.h       | 22 ++++++++++------
 mm/swap_state.c | 68 +++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 85 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/mm/Kconfig b/mm/Kconfig
index 1b501db06417..1a6acdb64333 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -22,6 +22,19 @@  menuconfig SWAP
 	  used to provide more virtual memory than the actual RAM present
 	  in your computer.  If unsure say Y.
 
+config VIRTUAL_SWAP
+	bool "Swap space virtualization"
+	depends on SWAP
+	default n
+	help
+		When this is selected, the kernel is built with the new swap
+		design. This will allow us to decouple the swap backends
+		(zswap, on-disk swapfile, etc.), and save disk space when we
+		use zswap (or the zero-filled swap page optimization).
+
+		There might be more lock contentions with heavy swap use, since
+		the swap cache is no longer range partitioned.
+
 config ZSWAP
 	bool "Compressed cache for swap pages"
 	depends on SWAP
diff --git a/mm/swap.h b/mm/swap.h
index d5f8effa8015..06e20b1d79c4 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -22,22 +22,27 @@  void swap_write_unplug(struct swap_iocb *sio);
 int swap_writepage(struct page *page, struct writeback_control *wbc);
 void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
 
-/* linux/mm/swap_state.c */
-/* One swap address space for each 64M swap space */
+/* Return the swap device position of the swap slot. */
+static inline loff_t swap_slot_pos(swp_slot_t slot)
+{
+	return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
+}
+
 #define SWAP_ADDRESS_SPACE_SHIFT	14
 #define SWAP_ADDRESS_SPACE_PAGES	(1 << SWAP_ADDRESS_SPACE_SHIFT)
 #define SWAP_ADDRESS_SPACE_MASK		(SWAP_ADDRESS_SPACE_PAGES - 1)
+
+/* linux/mm/swap_state.c */
+#ifdef CONFIG_VIRTUAL_SWAP
+extern struct address_space *swap_address_space(swp_entry_t entry);
+#define swap_cache_index(entry) entry.val
+#else
+/* One swap address space for each 64M swap space */
 extern struct address_space *swapper_spaces[];
 #define swap_address_space(entry)			    \
 	(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
 		>> SWAP_ADDRESS_SPACE_SHIFT])
 
-/* Return the swap device position of the swap slot. */
-static inline loff_t swap_slot_pos(swp_slot_t slot)
-{
-	return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
-}
-
 /*
  * Return the swap cache index of the swap entry.
  */
@@ -46,6 +51,7 @@  static inline pgoff_t swap_cache_index(swp_entry_t entry)
 	BUILD_BUG_ON((SWP_OFFSET_MASK | SWAP_ADDRESS_SPACE_MASK) != SWP_OFFSET_MASK);
 	return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
 }
+#endif
 
 void show_swap_cache_info(void);
 bool add_to_swap(struct folio *folio);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 055e555d3382..268338a0ea57 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,10 +38,19 @@  static const struct address_space_operations swap_aops = {
 #endif
 };
 
+#ifdef CONFIG_VIRTUAL_SWAP
+static struct address_space swapper_space __read_mostly;
+
+struct address_space *swap_address_space(swp_entry_t entry)
+{
+	return &swapper_space;
+}
+#else
 struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
 static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
-static bool enable_vma_readahead __read_mostly = true;
+#endif
 
+static bool enable_vma_readahead __read_mostly = true;
 #define SWAP_RA_ORDER_CEILING	5
 
 #define SWAP_RA_WIN_SHIFT	(PAGE_SHIFT / 2)
@@ -260,6 +269,28 @@  void delete_from_swap_cache(struct folio *folio)
 	folio_ref_sub(folio, folio_nr_pages(folio));
 }
 
+#ifdef CONFIG_VIRTUAL_SWAP
+void clear_shadow_from_swap_cache(int type, unsigned long begin,
+				unsigned long end)
+{
+	swp_slot_t slot = swp_slot(type, begin);
+	swp_entry_t entry = swp_slot_to_swp_entry(slot);
+	unsigned long index = swap_cache_index(entry);
+	struct address_space *address_space = swap_address_space(entry);
+	void *old;
+	XA_STATE(xas, &address_space->i_pages, index);
+
+	xas_set_update(&xas, workingset_update_node);
+
+	xa_lock_irq(&address_space->i_pages);
+	xas_for_each(&xas, old, entry.val + end - begin) {
+		if (!xa_is_value(old))
+			continue;
+		xas_store(&xas, NULL);
+	}
+	xa_unlock_irq(&address_space->i_pages);
+}
+#else
 void clear_shadow_from_swap_cache(int type, unsigned long begin,
 				unsigned long end)
 {
@@ -290,6 +321,7 @@  void clear_shadow_from_swap_cache(int type, unsigned long begin,
 			break;
 	}
 }
+#endif
 
 /*
  * If we are the only user, then try to free up the swap cache.
@@ -718,23 +750,34 @@  struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	return folio;
 }
 
+static void init_swapper_space(struct address_space *space)
+{
+	xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
+	atomic_set(&space->i_mmap_writable, 0);
+	space->a_ops = &swap_aops;
+	/* swap cache doesn't use writeback related tags */
+	mapping_set_no_writeback_tags(space);
+}
+
+#ifdef CONFIG_VIRTUAL_SWAP
 int init_swap_address_space(unsigned int type, unsigned long nr_pages)
 {
-	struct address_space *spaces, *space;
+	return 0;
+}
+
+void exit_swap_address_space(unsigned int type) {}
+#else
+int init_swap_address_space(unsigned int type, unsigned long nr_pages)
+{
+	struct address_space *spaces;
 	unsigned int i, nr;
 
 	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
 	spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
 	if (!spaces)
 		return -ENOMEM;
-	for (i = 0; i < nr; i++) {
-		space = spaces + i;
-		xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
-		atomic_set(&space->i_mmap_writable, 0);
-		space->a_ops = &swap_aops;
-		/* swap cache doesn't use writeback related tags */
-		mapping_set_no_writeback_tags(space);
-	}
+	for (i = 0; i < nr; i++)
+		init_swapper_space(spaces + i);
 	nr_swapper_spaces[type] = nr;
 	swapper_spaces[type] = spaces;
 
@@ -752,6 +795,7 @@  void exit_swap_address_space(unsigned int type)
 	nr_swapper_spaces[type] = 0;
 	swapper_spaces[type] = NULL;
 }
+#endif
 
 static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
 			   unsigned long *end)
@@ -930,6 +974,10 @@  static int __init swap_init_sysfs(void)
 	int err;
 	struct kobject *swap_kobj;
 
+#ifdef CONFIG_VIRTUAL_SWAP
+	init_swapper_space(&swapper_space);
+#endif
+
 	swap_kobj = kobject_create_and_add("swap", mm_kobj);
 	if (!swap_kobj) {
 		pr_err("failed to create swap kobject\n");