@@ -101,8 +101,10 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
ifdef CONFIG_SWAP
+ifndef CONFIG_VIRTUAL_SWAP
obj-$(CONFIG_MEMCG) += swap_cgroup.o
endif
+endif
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
obj-$(CONFIG_GUP_TEST) += gup_test.o
obj-$(CONFIG_DMAPOOL_TEST) += dmapool_test.o
@@ -27,10 +27,14 @@
*
* @slot: The handle to the physical swap slot backing this page.
* @rcu: The RCU head to free the descriptor with an RCU grace period.
+ * @memcgid: The memcg id of the owning memcg, if any.
*/
struct swp_desc {
swp_slot_t slot;
struct rcu_head rcu;
+#ifdef CONFIG_MEMCG
+ atomic_t memcgid;
+#endif
};
/* Virtual swap space - swp_entry_t -> struct swp_desc */
@@ -122,8 +126,10 @@ static swp_entry_t vswap_alloc(int nr)
return (swp_entry_t){0};
}
- for (i = 0; i < nr; i++)
+ for (i = 0; i < nr; i++) {
descs[i]->slot.val = 0;
+ atomic_set(&descs[i]->memcgid, 0);
+ }
xa_lock(&vswap_map);
if (nr == 1) {
@@ -352,6 +358,70 @@ swp_entry_t swp_slot_to_swp_entry(swp_slot_t slot)
return entry ? (swp_entry_t){xa_to_value(entry)} : (swp_entry_t){0};
}
+#ifdef CONFIG_MEMCG
+static unsigned short vswap_cgroup_record(swp_entry_t entry,
+ unsigned short memcgid, unsigned int nr_ents)
+{
+ struct swp_desc *desc;
+ unsigned short oldid, iter = 0;
+
+ XA_STATE(xas, &vswap_map, entry.val);
+
+ rcu_read_lock();
+ xas_for_each(&xas, desc, entry.val + nr_ents - 1) {
+ if (xas_retry(&xas, desc))
+ continue;
+
+ oldid = atomic_xchg(&desc->memcgid, memcgid);
+ if (!iter)
+ iter = oldid;
+ VM_WARN_ON(iter != oldid);
+ }
+ rcu_read_unlock();
+
+ return oldid;
+}
+
+void swap_cgroup_record(struct folio *folio, unsigned short memcgid,
+ swp_entry_t entry)
+{
+ unsigned short oldid =
+ vswap_cgroup_record(entry, memcgid, folio_nr_pages(folio));
+
+ VM_WARN_ON(oldid);
+}
+
+unsigned short swap_cgroup_clear(swp_entry_t entry, unsigned int nr_ents)
+{
+ return vswap_cgroup_record(entry, 0, nr_ents);
+}
+
+unsigned short lookup_swap_cgroup_id(swp_entry_t entry)
+{
+ struct swp_desc *desc;
+ unsigned short ret;
+
+ /*
+ * Note that the virtual swap slot can be freed under us, for instance in
+ * the invocation of mem_cgroup_swapin_charge_folio. We need to wrap the
+ * entire lookup in RCU read-side critical section, and double check the
+ * existence of the swap descriptor.
+ */
+ rcu_read_lock();
+ desc = xa_load(&vswap_map, entry.val);
+ ret = desc ? atomic_read(&desc->memcgid) : 0;
+ rcu_read_unlock();
+ return ret;
+}
+
+int swap_cgroup_swapon(int type, unsigned long max_pages)
+{
+ return 0;
+}
+
+void swap_cgroup_swapoff(int type) {}
+#endif /* CONFIG_MEMCG */
+
int vswap_init(void)
{
swp_desc_cache = KMEM_CACHE(swp_desc, 0);
Once we decouple a swap entry from its backing store via the virtual swap, we can no longer statically allocate an array to store the swap entries' cgroup information. Move it to the swap descriptor. Signed-off-by: Nhat Pham <nphamcs@gmail.com> --- mm/Makefile | 2 ++ mm/vswap.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 1 deletion(-)