@@ -2048,8 +2048,17 @@ static u64 update_block_group_flags(stru
return flags;
}
-int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
-
+/*
+ * Mark one block group RO, can be called several times for the same block
+ * group.
+ *
+ * @cache: the destination block group
+ * @do_chunk_alloc: whether need to do chunk pre-allocation, this is to
+ * ensure we still have some free space after marking this
+ * block group RO.
+ */
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache,
+ bool do_chunk_alloc)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_trans_handle *trans;
@@ -2079,25 +2088,29 @@ again:
goto again;
}
- /*
- * if we are changing raid levels, try to allocate a corresponding
- * block group with the new raid level.
- */
- alloc_flags = update_block_group_flags(fs_info, cache->flags);
- if (alloc_flags != cache->flags) {
- ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ if (do_chunk_alloc) {
/*
- * ENOSPC is allowed here, we may have enough space
- * already allocated at the new raid level to
- * carry on
+ * If we are changing raid levels, try to allocate a
+ * corresponding block group with the new raid level.
*/
- if (ret == -ENOSPC)
- ret = 0;
- if (ret < 0)
- goto out;
+ alloc_flags = update_block_group_flags(fs_info, cache->flags);
+ if (alloc_flags != cache->flags) {
+ ret = btrfs_chunk_alloc(trans, alloc_flags,
+ CHUNK_ALLOC_FORCE);
+ /*
+ * ENOSPC is allowed here, we may have enough space
+ * already allocated at the new raid level to carry on
+ */
+ if (ret == -ENOSPC)
+ ret = 0;
+ if (ret < 0)
+ goto out;
+ }
}
- ret = inc_block_group_ro(cache, 0);
+ ret = inc_block_group_ro(cache, !do_chunk_alloc);
+ if (!do_chunk_alloc)
+ goto unlock_out;
if (!ret)
goto out;
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
@@ -2112,6 +2125,7 @@ out:
check_system_chunk(trans, alloc_flags);
mutex_unlock(&fs_info->chunk_mutex);
}
+unlock_out:
mutex_unlock(&fs_info->ro_block_group_mutex);
btrfs_end_transaction(trans);
@@ -205,7 +205,8 @@ int btrfs_read_block_groups(struct btrfs
int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
-int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache,
+ bool do_chunk_alloc);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
@@ -4428,7 +4428,7 @@ int btrfs_relocate_block_group(struct bt
rc->extent_root = extent_root;
rc->block_group = bg;
- ret = btrfs_inc_block_group_ro(rc->block_group);
+ ret = btrfs_inc_block_group_ro(rc->block_group, true);
if (ret) {
err = ret;
goto out;
@@ -3560,7 +3560,26 @@ int scrub_enumerate_chunks(struct scrub_
* -> btrfs_scrub_pause()
*/
scrub_pause_on(fs_info);
- ret = btrfs_inc_block_group_ro(cache);
+
+ /*
+ * Don't do chunk preallocation for scrub.
+ *
+ * This is especially important for SYSTEM bgs, or we can hit
+ * -EFBIG from btrfs_finish_chunk_alloc() like:
+ * 1. The only SYSTEM bg is marked RO.
+ * Since SYSTEM bg is small, that's pretty common.
+ * 2. New SYSTEM bg will be allocated
+ * Due to regular version will allocate new chunk.
+ * 3. New SYSTEM bg is empty and will get cleaned up
+ * Before cleanup really happens, it's marked RO again.
+ * 4. Empty SYSTEM bg get scrubbed
+ * We go back to 2.
+ *
+ * This can easily boost the amount of SYSTEM chunks if cleaner
+ * thread can't be triggered fast enough, and use up all space
+ * of btrfs_super_block::sys_chunk_array
+ */
+ ret = btrfs_inc_block_group_ro(cache, false);
if (!ret && sctx->is_dev_replace) {
/*
* If we are doing a device replace wait for any tasks