@@ -2168,6 +2168,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
/* see btrfs_writepage_start_hook for details on why this is required */
struct btrfs_writepage_fixup {
struct page *page;
+ struct inode *inode;
struct btrfs_work work;
};
@@ -2182,9 +2183,20 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
u64 page_start;
u64 page_end;
int ret = 0;
+ bool free_delalloc_space = true;
fixup = container_of(work, struct btrfs_writepage_fixup, work);
page = fixup->page;
+ inode = fixup->inode;
+ page_start = page_offset(page);
+ page_end = page_offset(page) + PAGE_SIZE - 1;
+
+ /*
+ * This is similar to page_mkwrite, we need to reserve the space before
+ * we take the page lock.
+ */
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
+ PAGE_SIZE);
again:
lock_page(page);
@@ -2193,25 +2205,48 @@ again:
* page->mapping may go NULL, but it shouldn't be moved to a different
* address space.
*/
- if (!page->mapping || !PageDirty(page) || !PageChecked(page))
+ if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
+ /*
+ * Unfortunately this is a little tricky, either
+ *
+ * 1) We got here and our page had already been dealt with and
+ * we reserved our space, thus ret == 0, so we need to just
+ * drop our space reservation and bail. This can happen the
+ * first time we come into the fixup worker, or could happen
+ * while waiting for the ordered extent.
+ * 2) Our page was already dealt with, but we happened to get an
+ * ENOSPC above from the btrfs_delalloc_reserve_space. In
+ * this case we obviously don't have anything to release, but
+ * because the page was already dealt with we don't want to
+ * mark the page with an error, so make sure we're resetting
+ * ret to 0. This is why we have this check _before_ the ret
+ * check, because we do not want to have a surprise ENOSPC
+ * when the page was already properly dealt with.
+ */
+ if (!ret) {
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ PAGE_SIZE);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ page_start, PAGE_SIZE,
+ true);
+ }
+ ret = 0;
goto out_page;
+ }
/*
- * We keep the PageChecked() bit set until we're done with the
- * btrfs_start_ordered_extent() dance that we do below. That drops and
- * retakes the page lock, so we don't want new fixup workers queued for
- * this page during the churn.
+ * We can't mess with the page state unless it is locked, so now that
+ * it is locked bail if we failed to make our space reservation.
*/
- inode = page->mapping->host;
- page_start = page_offset(page);
- page_end = page_offset(page) + PAGE_SIZE - 1;
+ if (ret)
+ goto out_page;
lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
/* already ordered? We're done */
if (PagePrivate2(page))
- goto out;
+ goto out_reserved;
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
PAGE_SIZE);
@@ -2224,11 +2259,6 @@ again:
goto again;
}
- ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
- PAGE_SIZE);
- if (ret)
- goto out;
-
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
&cached_state);
if (ret)
@@ -2242,12 +2272,12 @@ again:
* The page was dirty when we started, nothing should have cleaned it.
*/
BUG_ON(!PageDirty(page));
+ free_delalloc_space = false;
out_reserved:
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
- if (ret)
+ if (free_delalloc_space)
btrfs_delalloc_release_space(inode, data_reserved, page_start,
PAGE_SIZE, true);
-out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
out_page:
@@ -2266,6 +2296,12 @@ out_page:
put_page(page);
kfree(fixup);
extent_changeset_free(data_reserved);
+ /*
+ * As a precaution, do a delayed iput in case it would be the last iput
+ * that could need flushing space. Recursing back to fixup worker would
+ * deadlock.
+ */
+ btrfs_add_delayed_iput(inode);
}
/*
@@ -2303,10 +2339,18 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
if (!fixup)
return -EAGAIN;
+ /*
+ * We are already holding a reference to this inode from
+ * write_cache_pages. We need to hold it because the space reservation
+ * takes place outside of the page lock, and we can't trust
+ * page->mapping outside of the page lock.
+ */
+ ihold(inode);
SetPageChecked(page);
get_page(page);
btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
fixup->page = page;
+ fixup->inode = inode;
btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
return -EAGAIN;