diff mbox series

[v2,07/20] block/block-copy: add ratelimit to block-copy

Message ID 20200601181118.579-8-vsementsov@virtuozzo.com
State New
Headers show
Series backup performance: block_status + async | expand

Commit Message

Vladimir Sementsov-Ogievskiy June 1, 2020, 6:11 p.m. UTC
We are going to directly use one async block-copy operation for backup
job, so we need rate limitator.

We want to maintain current backup behavior: only background copying is
limited and copy-before-write operations only participate in limit
calculation. Therefore we need one rate limitator for block-copy state
and boolean flag for block-copy call state for actual limitation.

Note, that we can't just calculate each chunk in limitator after
successful copying: it will not save us from starting a lot of async
sub-requests which will exceed limit too much. Instead let's use the
following scheme on sub-request creation:
1. If at the moment limit is not exceeded, create the request and
account it immediately.
2. If at the moment limit is already exceeded, drop create sub-request
and handle limit instead (by sleep).
With this approach we'll never exceed the limit more than by one
sub-request (which pretty much matches current backup behavior).

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 include/block/block-copy.h |  8 +++++++
 block/block-copy.c         | 44 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

Comments

Vladimir Sementsov-Ogievskiy Sept. 25, 2020, 6:19 p.m. UTC | #1
22.07.2020 14:05, Max Reitz wrote:
> On 01.06.20 20:11, Vladimir Sementsov-Ogievskiy wrote:
>> We are going to directly use one async block-copy operation for backup
>> job, so we need rate limitator.
> 
> %s/limitator/limiter/g, I think.
> 
>> We want to maintain current backup behavior: only background copying is
>> limited and copy-before-write operations only participate in limit
>> calculation. Therefore we need one rate limitator for block-copy state
>> and boolean flag for block-copy call state for actual limitation.
>>
>> Note, that we can't just calculate each chunk in limitator after
>> successful copying: it will not save us from starting a lot of async
>> sub-requests which will exceed limit too much. Instead let's use the
>> following scheme on sub-request creation:
>> 1. If at the moment limit is not exceeded, create the request and
>> account it immediately.
>> 2. If at the moment limit is already exceeded, drop create sub-request
>> and handle limit instead (by sleep).
>> With this approach we'll never exceed the limit more than by one
>> sub-request (which pretty much matches current backup behavior).
> 
> Sounds reasonable.
> 
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   include/block/block-copy.h |  8 +++++++
>>   block/block-copy.c         | 44 ++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 52 insertions(+)
>>
>> diff --git a/include/block/block-copy.h b/include/block/block-copy.h
>> index 600984c733..d40e691123 100644
>> --- a/include/block/block-copy.h
>> +++ b/include/block/block-copy.h
>> @@ -59,6 +59,14 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
>>                                        int64_t max_chunk,
>>                                        BlockCopyAsyncCallbackFunc cb);
>>   
>> +/*
>> + * Set speed limit for block-copy instance. All block-copy operations related to
>> + * this BlockCopyState will participate in speed calculation, but only
>> + * block_copy_async calls with @ratelimit=true will be actually limited.
>> + */
>> +void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
>> +                          uint64_t speed);
>> +
>>   BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
>>   void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
>>   
>> diff --git a/block/block-copy.c b/block/block-copy.c
>> index 4114d1fd25..851d9c8aaf 100644
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -26,6 +26,7 @@
>>   #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
>>   #define BLOCK_COPY_MAX_MEM (128 * MiB)
>>   #define BLOCK_COPY_MAX_WORKERS 64
>> +#define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
>>   
>>   static coroutine_fn int block_copy_task_entry(AioTask *task);
>>   
>> @@ -36,11 +37,13 @@ typedef struct BlockCopyCallState {
>>       int64_t bytes;
>>       int max_workers;
>>       int64_t max_chunk;
>> +    bool ratelimit;
>>       BlockCopyAsyncCallbackFunc cb;
>>   
>>       /* State */
>>       bool failed;
>>       bool finished;
>> +    QemuCoSleepState *sleep_state;
>>   
>>       /* OUT parameters */
>>       bool error_is_read;
>> @@ -103,6 +106,9 @@ typedef struct BlockCopyState {
>>       void *progress_opaque;
>>   
>>       SharedResource *mem;
>> +
>> +    uint64_t speed;
>> +    RateLimit rate_limit;
>>   } BlockCopyState;
>>   
>>   static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
>> @@ -611,6 +617,21 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
>>           }
>>           task->zeroes = ret & BDRV_BLOCK_ZERO;
>>   
>> +        if (s->speed) {
>> +            if (call_state->ratelimit) {
>> +                uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
>> +                if (ns > 0) {
>> +                    block_copy_task_end(task, -EAGAIN);
>> +                    g_free(task);
>> +                    qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
>> +                                              &call_state->sleep_state);
>> +                    continue;
>> +                }
>> +            }
>> +
>> +            ratelimit_calculate_delay(&s->rate_limit, task->bytes);
>> +        }
>> +
> 
> Looks good.
> 
>>           trace_block_copy_process(s, task->offset);
>>   
>>           co_get_from_shres(s->mem, task->bytes);
>> @@ -649,6 +670,13 @@ out:
>>       return ret < 0 ? ret : found_dirty;
>>   }
>>   
>> +static void block_copy_kick(BlockCopyCallState *call_state)
>> +{
>> +    if (call_state->sleep_state) {
>> +        qemu_co_sleep_wake(call_state->sleep_state);
>> +    }
>> +}
>> +
>>   /*
>>    * block_copy_common
>>    *
>> @@ -729,6 +757,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
>>           .s = s,
>>           .offset = offset,
>>           .bytes = bytes,
>> +        .ratelimit = ratelimit,
> 
> Hm, same problem/question as in patch 6: Should the @ratelimit parameter
> really be added in patch 5 if it’s used only now?
> 
>>           .cb = cb,
>>           .max_workers = max_workers ?: BLOCK_COPY_MAX_WORKERS,
>>           .max_chunk = max_chunk,
>> @@ -752,3 +781,18 @@ void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
>>   {
>>       s->skip_unallocated = skip;
>>   }
>> +
>> +void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
>> +                          uint64_t speed)
>> +{
>> +    uint64_t old_speed = s->speed;
>> +
>> +    s->speed = speed;
>> +    if (speed > 0) {
>> +        ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
>> +    }
>> +
>> +    if (call_state && old_speed && (speed > old_speed || speed == 0)) {
>> +        block_copy_kick(call_state);
>> +    }
>> +}
> 
> Hm.  I’m interested in seeing how this is going to be used, i.e. what
> callers will pass for @call_state.  I suppose it’s going to be the
> background operation for the whole device, but I wonder whether it
> actually makes sense to pass it.  I mean, the caller could just call
> block_copy_kick() itself (unconditionally, because it’ll never hurt, I
> think).
> 

Yes, that's weird. I think the correct approach is to have list of call-states in block-copy-state, and kick them all on speed update.
diff mbox series

Patch

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 600984c733..d40e691123 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -59,6 +59,14 @@  BlockCopyCallState *block_copy_async(BlockCopyState *s,
                                      int64_t max_chunk,
                                      BlockCopyAsyncCallbackFunc cb);
 
+/*
+ * Set speed limit for block-copy instance. All block-copy operations related to
+ * this BlockCopyState will participate in speed calculation, but only
+ * block_copy_async calls with @ratelimit=true will be actually limited.
+ */
+void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
+                          uint64_t speed);
+
 BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
 void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
 
diff --git a/block/block-copy.c b/block/block-copy.c
index 4114d1fd25..851d9c8aaf 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -26,6 +26,7 @@ 
 #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
 #define BLOCK_COPY_MAX_MEM (128 * MiB)
 #define BLOCK_COPY_MAX_WORKERS 64
+#define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
 
 static coroutine_fn int block_copy_task_entry(AioTask *task);
 
@@ -36,11 +37,13 @@  typedef struct BlockCopyCallState {
     int64_t bytes;
     int max_workers;
     int64_t max_chunk;
+    bool ratelimit;
     BlockCopyAsyncCallbackFunc cb;
 
     /* State */
     bool failed;
     bool finished;
+    QemuCoSleepState *sleep_state;
 
     /* OUT parameters */
     bool error_is_read;
@@ -103,6 +106,9 @@  typedef struct BlockCopyState {
     void *progress_opaque;
 
     SharedResource *mem;
+
+    uint64_t speed;
+    RateLimit rate_limit;
 } BlockCopyState;
 
 static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
@@ -611,6 +617,21 @@  block_copy_dirty_clusters(BlockCopyCallState *call_state)
         }
         task->zeroes = ret & BDRV_BLOCK_ZERO;
 
+        if (s->speed) {
+            if (call_state->ratelimit) {
+                uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
+                if (ns > 0) {
+                    block_copy_task_end(task, -EAGAIN);
+                    g_free(task);
+                    qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
+                                              &call_state->sleep_state);
+                    continue;
+                }
+            }
+
+            ratelimit_calculate_delay(&s->rate_limit, task->bytes);
+        }
+
         trace_block_copy_process(s, task->offset);
 
         co_get_from_shres(s->mem, task->bytes);
@@ -649,6 +670,13 @@  out:
     return ret < 0 ? ret : found_dirty;
 }
 
+static void block_copy_kick(BlockCopyCallState *call_state)
+{
+    if (call_state->sleep_state) {
+        qemu_co_sleep_wake(call_state->sleep_state);
+    }
+}
+
 /*
  * block_copy_common
  *
@@ -729,6 +757,7 @@  BlockCopyCallState *block_copy_async(BlockCopyState *s,
         .s = s,
         .offset = offset,
         .bytes = bytes,
+        .ratelimit = ratelimit,
         .cb = cb,
         .max_workers = max_workers ?: BLOCK_COPY_MAX_WORKERS,
         .max_chunk = max_chunk,
@@ -752,3 +781,18 @@  void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
 {
     s->skip_unallocated = skip;
 }
+
+void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
+                          uint64_t speed)
+{
+    uint64_t old_speed = s->speed;
+
+    s->speed = speed;
+    if (speed > 0) {
+        ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
+    }
+
+    if (call_state && old_speed && (speed > old_speed || speed == 0)) {
+        block_copy_kick(call_state);
+    }
+}