@@ -22,6 +22,19 @@ menuconfig SWAP
used to provide more virtual memory than the actual RAM present
in your computer. If unsure say Y.
+config VIRTUAL_SWAP
+ bool "Swap space virtualization"
+ depends on SWAP
+ default n
+ help
+ When this is selected, the kernel is built with the new swap
+ design. This will allow us to decouple the swap backends
+ (zswap, on-disk swapfile, etc.), and save disk space when we
+ use zswap (or the zero-filled swap page optimization).
+
+ There might be more lock contentions with heavy swap use, since
+ the swap cache is no longer range partitioned.
+
config ZSWAP
bool "Compressed cache for swap pages"
depends on SWAP
@@ -22,22 +22,27 @@ void swap_write_unplug(struct swap_iocb *sio);
int swap_writepage(struct page *page, struct writeback_control *wbc);
void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
-/* linux/mm/swap_state.c */
-/* One swap address space for each 64M swap space */
+/* Return the swap device position of the swap slot. */
+static inline loff_t swap_slot_pos(swp_slot_t slot)
+{
+ return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
+}
+
#define SWAP_ADDRESS_SPACE_SHIFT 14
#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
#define SWAP_ADDRESS_SPACE_MASK (SWAP_ADDRESS_SPACE_PAGES - 1)
+
+/* linux/mm/swap_state.c */
+#ifdef CONFIG_VIRTUAL_SWAP
+extern struct address_space *swap_address_space(swp_entry_t entry);
+#define swap_cache_index(entry) entry.val
+#else
+/* One swap address space for each 64M swap space */
extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT])
-/* Return the swap device position of the swap slot. */
-static inline loff_t swap_slot_pos(swp_slot_t slot)
-{
- return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
-}
-
/*
* Return the swap cache index of the swap entry.
*/
@@ -46,6 +51,7 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
BUILD_BUG_ON((SWP_OFFSET_MASK | SWAP_ADDRESS_SPACE_MASK) != SWP_OFFSET_MASK);
return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
}
+#endif
void show_swap_cache_info(void);
bool add_to_swap(struct folio *folio);
@@ -38,10 +38,19 @@ static const struct address_space_operations swap_aops = {
#endif
};
+#ifdef CONFIG_VIRTUAL_SWAP
+static struct address_space swapper_space __read_mostly;
+
+struct address_space *swap_address_space(swp_entry_t entry)
+{
+ return &swapper_space;
+}
+#else
struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
-static bool enable_vma_readahead __read_mostly = true;
+#endif
+static bool enable_vma_readahead __read_mostly = true;
#define SWAP_RA_ORDER_CEILING 5
#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
@@ -260,6 +269,28 @@ void delete_from_swap_cache(struct folio *folio)
folio_ref_sub(folio, folio_nr_pages(folio));
}
+#ifdef CONFIG_VIRTUAL_SWAP
+void clear_shadow_from_swap_cache(int type, unsigned long begin,
+ unsigned long end)
+{
+ swp_slot_t slot = swp_slot(type, begin);
+ swp_entry_t entry = swp_slot_to_swp_entry(slot);
+ unsigned long index = swap_cache_index(entry);
+ struct address_space *address_space = swap_address_space(entry);
+ void *old;
+ XA_STATE(xas, &address_space->i_pages, index);
+
+ xas_set_update(&xas, workingset_update_node);
+
+ xa_lock_irq(&address_space->i_pages);
+ xas_for_each(&xas, old, entry.val + end - begin) {
+ if (!xa_is_value(old))
+ continue;
+ xas_store(&xas, NULL);
+ }
+ xa_unlock_irq(&address_space->i_pages);
+}
+#else
void clear_shadow_from_swap_cache(int type, unsigned long begin,
unsigned long end)
{
@@ -290,6 +321,7 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
break;
}
}
+#endif
/*
* If we are the only user, then try to free up the swap cache.
@@ -718,23 +750,34 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
return folio;
}
+static void init_swapper_space(struct address_space *space)
+{
+ xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
+ atomic_set(&space->i_mmap_writable, 0);
+ space->a_ops = &swap_aops;
+ /* swap cache doesn't use writeback related tags */
+ mapping_set_no_writeback_tags(space);
+}
+
+#ifdef CONFIG_VIRTUAL_SWAP
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
{
- struct address_space *spaces, *space;
+ return 0;
+}
+
+void exit_swap_address_space(unsigned int type) {}
+#else
+int init_swap_address_space(unsigned int type, unsigned long nr_pages)
+{
+ struct address_space *spaces;
unsigned int i, nr;
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
if (!spaces)
return -ENOMEM;
- for (i = 0; i < nr; i++) {
- space = spaces + i;
- xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
- atomic_set(&space->i_mmap_writable, 0);
- space->a_ops = &swap_aops;
- /* swap cache doesn't use writeback related tags */
- mapping_set_no_writeback_tags(space);
- }
+ for (i = 0; i < nr; i++)
+ init_swapper_space(spaces + i);
nr_swapper_spaces[type] = nr;
swapper_spaces[type] = spaces;
@@ -752,6 +795,7 @@ void exit_swap_address_space(unsigned int type)
nr_swapper_spaces[type] = 0;
swapper_spaces[type] = NULL;
}
+#endif
static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
unsigned long *end)
@@ -930,6 +974,10 @@ static int __init swap_init_sysfs(void)
int err;
struct kobject *swap_kobj;
+#ifdef CONFIG_VIRTUAL_SWAP
+ init_swapper_space(&swapper_space);
+#endif
+
swap_kobj = kobject_create_and_add("swap", mm_kobj);
if (!swap_kobj) {
pr_err("failed to create swap kobject\n");
Currently, the swap cache code assumes that the swap space is of a fixed size. The virtual swap space is dynamically sized, so the existing partitioning code cannot be easily reused. A dynamic partitioning is planned, but for now keep the design simple and just use a flat swapcache for vswap. Since the vswap's implementation has begun to diverge from the old implementation, we also introduce a new build config (CONFIG_VIRTUAL_SWAP). Users who do not select this config will get the old implementation, with no behavioral change. Signed-off-by: Nhat Pham <nphamcs@gmail.com> --- mm/Kconfig | 13 ++++++++++ mm/swap.h | 22 ++++++++++------ mm/swap_state.c | 68 +++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 85 insertions(+), 18 deletions(-)