@@ -318,6 +318,49 @@ static inline struct list_head *page_deferred_list(struct page *page)
return &page[2].deferred_list;
}
+static inline void thp_mapcount_seqcount_init(struct page *page)
+{
+ raw_seqcount_init(&page[1].mapcount_seqcount);
+}
+
+static inline unsigned int thp_mapcount_read_begin(struct page *page)
+{
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ return raw_read_seqcount_begin(&page[1].mapcount_seqcount);
+}
+
+static inline bool thp_mapcount_read_retry(struct page *page,
+ unsigned int seqcount)
+{
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ if (!raw_read_seqcount_retry(&page[1].mapcount_seqcount, seqcount))
+ return false;
+ cpu_relax();
+ return true;
+}
+
+static inline void thp_mapcount_lock(struct page *page,
+ unsigned long *irq_flags)
+{
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ /*
+ * Prevent deadlocks in thp_mapcount_read_begin() if it is called in IRQ
+ * context.
+ */
+ local_irq_save(*irq_flags);
+ bit_spin_lock(PG_locked, &page[1].flags);
+ raw_write_seqcount_begin(&page[1].mapcount_seqcount);
+}
+
+static inline void thp_mapcount_unlock(struct page *page,
+ unsigned long irq_flags)
+{
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ raw_write_seqcount_end(&page[1].mapcount_seqcount);
+ bit_spin_unlock(PG_locked, &page[1].flags);
+ local_irq_restore(irq_flags);
+}
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -467,6 +510,28 @@ static inline bool thp_migration_supported(void)
{
return false;
}
+
+static inline unsigned int thp_mapcount_read_begin(struct page *page)
+{
+ return 0;
+}
+
+static inline bool thp_mapcount_read_retry(struct page *page,
+ unsigned int seqcount)
+{
+ return false;
+}
+
+static inline void thp_mapcount_lock(struct page *page,
+ unsigned long *irq_flags)
+{
+}
+
+static inline void thp_mapcount_unlock(struct page *page,
+ unsigned long irq_flags)
+{
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/**
@@ -151,6 +151,15 @@ struct page {
unsigned char compound_order;
atomic_t compound_mapcount;
unsigned int compound_nr; /* 1 << compound_order */
+ /*
+ * THP only: allow for atomic reading of the mapcount,
+ * for example when we might be racing with a concurrent
+ * THP split. Initialized for all THP but locking is
+ * so far only required for anon THP where such races
+ * apply. Write access is serialized via the
+ * PG_locked-based spinlock in the first tail page.
+ */
+ raw_seqcount_t mapcount_seqcount;
};
struct { /* Second tail page of compound page */
unsigned long _compound_pad_1; /* compound_head */
@@ -527,6 +527,7 @@ void prep_transhuge_page(struct page *page)
INIT_LIST_HEAD(page_deferred_list(page));
set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
+ thp_mapcount_seqcount_init(page);
}
bool is_transparent_hugepage(struct page *page)
@@ -1959,11 +1960,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long haddr, bool freeze)
{
struct mm_struct *mm = vma->vm_mm;
+ unsigned long addr, irq_flags;
struct page *page;
pgtable_t pgtable;
pmd_t old_pmd, _pmd;
bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false;
- unsigned long addr;
int i;
VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
@@ -2108,6 +2109,13 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
/* Sub-page mapcount accounting for above small mappings. */
int val = 1;
+ /*
+ * lock_page_memcg() is taken before thp_mapcount_lock() in
+ * page_remove_anon_compound_rmap(), respect the same locking
+ * order.
+ */
+ lock_page_memcg(page);
+ thp_mapcount_lock(page, &irq_flags);
/*
* Set PG_double_map before dropping compound_mapcount to avoid
* false-negative page_mapped().
@@ -2121,7 +2129,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
for (i = 0; i < HPAGE_PMD_NR; i++)
atomic_add(val, &page[i]._mapcount);
- lock_page_memcg(page);
if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
/* Last compound_mapcount is gone. */
__mod_lruvec_page_state(page, NR_ANON_THPS,
@@ -2132,6 +2139,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
atomic_dec(&page[i]._mapcount);
}
}
+ thp_mapcount_unlock(page, irq_flags);
unlock_page_memcg(page);
}
@@ -2501,6 +2509,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
int total_mapcount(struct page *page)
{
int i, compound, nr, ret;
+ unsigned int seqcount;
+ bool double_map;
VM_BUG_ON_PAGE(PageTail(page), page);
@@ -2510,13 +2520,19 @@ int total_mapcount(struct page *page)
return head_compound_mapcount(page);
nr = compound_nr(page);
- ret = compound = head_compound_mapcount(page);
- for (i = 0; i < nr; i++)
- ret += atomic_read(&page[i]._mapcount) + 1;
+
+ do {
+ seqcount = thp_mapcount_read_begin(page);
+ ret = compound = head_compound_mapcount(page);
+ for (i = 0; i < nr; i++)
+ ret += atomic_read(&page[i]._mapcount) + 1;
+ double_map = PageDoubleMap(page);
+ } while (thp_mapcount_read_retry(page, seqcount));
+
/* File pages has compound_mapcount included in _mapcount */
if (!PageAnon(page))
return ret - compound * nr;
- if (PageDoubleMap(page))
+ if (double_map)
ret -= nr;
return ret;
}
@@ -2548,6 +2564,7 @@ int total_mapcount(struct page *page)
int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
{
int i, ret, _total_mapcount, mapcount;
+ unsigned int seqcount;
/* hugetlbfs shouldn't call it */
VM_BUG_ON_PAGE(PageHuge(page), page);
@@ -2561,17 +2578,22 @@ int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
page = compound_head(page);
- _total_mapcount = ret = 0;
- for (i = 0; i < thp_nr_pages(page); i++) {
- mapcount = atomic_read(&page[i]._mapcount) + 1;
- ret = max(ret, mapcount);
- _total_mapcount += mapcount;
- }
- if (PageDoubleMap(page)) {
- ret -= 1;
- _total_mapcount -= thp_nr_pages(page);
- }
- mapcount = compound_mapcount(page);
+ do {
+ _total_mapcount = ret = 0;
+
+ seqcount = thp_mapcount_read_begin(page);
+ for (i = 0; i < thp_nr_pages(page); i++) {
+ mapcount = atomic_read(&page[i]._mapcount) + 1;
+ ret = max(ret, mapcount);
+ _total_mapcount += mapcount;
+ }
+ if (PageDoubleMap(page)) {
+ ret -= 1;
+ _total_mapcount -= thp_nr_pages(page);
+ }
+ mapcount = compound_mapcount(page);
+ } while (thp_mapcount_read_retry(page, seqcount));
+
ret += mapcount;
_total_mapcount += mapcount;
if (total_mapcount)
@@ -1294,6 +1294,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
static void page_remove_anon_compound_rmap(struct page *page)
{
+ unsigned long irq_flags;
int i, nr;
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
@@ -1308,23 +1309,30 @@ static void page_remove_anon_compound_rmap(struct page *page)
__mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page));
- if (TestClearPageDoubleMap(page)) {
- /*
- * Subpages can be mapped with PTEs too. Check how many of
- * them are still mapped.
- */
- for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
- if (atomic_add_negative(-1, &page[i]._mapcount))
- nr++;
- }
+ if (PageDoubleMap(page)) {
+ thp_mapcount_lock(page, &irq_flags);
+ if (TestClearPageDoubleMap(page)) {
+ /*
+ * Subpages can be mapped with PTEs too. Check how many
+ * of them are still mapped.
+ */
+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
+ if (atomic_add_negative(-1, &page[i]._mapcount))
+ nr++;
+ }
+ thp_mapcount_unlock(page, irq_flags);
- /*
- * Queue the page for deferred split if at least one small
- * page of the compound page is unmapped, but at least one
- * small page is still mapped.
- */
- if (nr && nr < thp_nr_pages(page))
- deferred_split_huge_page(page);
+ /*
+ * Queue the page for deferred split if at least one
+ * small page of the compound page is unmapped, but at
+ * least one small page is still mapped.
+ */
+ if (nr && nr < thp_nr_pages(page))
+ deferred_split_huge_page(page);
+ } else {
+ thp_mapcount_unlock(page, irq_flags);
+ nr = thp_nr_pages(page);
+ }
} else {
nr = thp_nr_pages(page);
}
@@ -1610,6 +1610,7 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
struct swap_cluster_info *ci = NULL;
unsigned char *map = NULL;
int mapcount, swapcount = 0;
+ unsigned int seqcount;
/* hugetlbfs shouldn't call it */
VM_BUG_ON_PAGE(PageHuge(page), page);
@@ -1625,7 +1626,6 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
page = compound_head(page);
- _total_mapcount = _total_swapcount = map_swapcount = 0;
if (PageSwapCache(page)) {
swp_entry_t entry;
@@ -1638,21 +1638,28 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
}
if (map)
ci = lock_cluster(si, offset);
- for (i = 0; i < HPAGE_PMD_NR; i++) {
- mapcount = atomic_read(&page[i]._mapcount) + 1;
- _total_mapcount += mapcount;
- if (map) {
- swapcount = swap_count(map[offset + i]);
- _total_swapcount += swapcount;
+
+ do {
+ _total_mapcount = _total_swapcount = map_swapcount = 0;
+
+ seqcount = thp_mapcount_read_begin(page);
+ for (i = 0; i < HPAGE_PMD_NR; i++) {
+ mapcount = atomic_read(&page[i]._mapcount) + 1;
+ _total_mapcount += mapcount;
+ if (map) {
+ swapcount = swap_count(map[offset + i]);
+ _total_swapcount += swapcount;
+ }
+ map_swapcount = max(map_swapcount, mapcount + swapcount);
}
- map_swapcount = max(map_swapcount, mapcount + swapcount);
- }
+ if (PageDoubleMap(page)) {
+ map_swapcount -= 1;
+ _total_mapcount -= HPAGE_PMD_NR;
+ }
+ mapcount = compound_mapcount(page);
+ } while (thp_mapcount_read_retry(page, seqcount));
+
unlock_cluster(ci);
- if (PageDoubleMap(page)) {
- map_swapcount -= 1;
- _total_mapcount -= HPAGE_PMD_NR;
- }
- mapcount = compound_mapcount(page);
map_swapcount += mapcount;
_total_mapcount += mapcount;
if (total_mapcount)
@@ -730,6 +730,8 @@ EXPORT_SYMBOL(folio_mapping);
/* Slow path of page_mapcount() for compound pages */
int __page_mapcount(struct page *page)
{
+ struct page *head_page;
+ unsigned int seqcount;
int ret;
if (PageHuge(page))
@@ -741,11 +743,16 @@ int __page_mapcount(struct page *page)
if (!PageAnon(page))
return atomic_read(&page->_mapcount) + 1;
- ret = atomic_read(&page->_mapcount) + 1;
- page = compound_head(page);
- ret += head_compound_mapcount(page);
- if (PageDoubleMap(page))
- ret--;
+ /* The mapcount_seqlock is so far only required for anonymous THP. */
+ head_page = compound_head(page);
+ do {
+ seqcount = thp_mapcount_read_begin(head_page);
+ ret = atomic_read(&page->_mapcount) + 1;
+ ret += head_compound_mapcount(head_page);
+ if (PageDoubleMap(head_page))
+ ret--;
+ } while (thp_mapcount_read_retry(head_page, seqcount));
+
return ret;
}
EXPORT_SYMBOL_GPL(__page_mapcount);