Message ID | 20250424-isolcpus-io-queues-v6-9-9a53a870ca1f@kernel.org |
---|---|
State | New |
Headers | show |
Series | blk: honor isolcpus configuration | expand |
On 4/24/25 20:19, Daniel Wagner wrote: > When isolcpus=io_queue is enabled, and the last housekeeping CPU for a > given hctx would go offline, there would be no CPU left which handles > the IOs. To prevent IO stalls, prevent offlining housekeeping CPUs which > are still severing isolated CPUs.. serving > > Signed-off-by: Daniel Wagner <wagi@kernel.org> > --- > block/blk-mq.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 44 insertions(+), 2 deletions(-) > > diff --git a/block/blk-mq.c b/block/blk-mq.c > index c2697db591091200cdb9f6e082e472b829701e4c..aff17673b773583dfb2b01cb2f5f010c456bd834 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -3627,6 +3627,48 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx) > return data.has_rq; > } > > +static bool blk_mq_hctx_check_isolcpus_online(struct blk_mq_hw_ctx *hctx, unsigned int cpu) > +{ > + const struct cpumask *hk_mask; > + int i; > + > + if (!housekeeping_enabled(HK_TYPE_IO_QUEUE)) > + return true; > + > + hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE); > + > + for (i = 0; i < hctx->nr_ctx; i++) { > + struct blk_mq_ctx *ctx = hctx->ctxs[i]; > + > + if (ctx->cpu == cpu) > + continue; > + > + /* > + * Check if this context has at least one online > + * housekeeping CPU in this case the hardware context is > + * usable. > + */ > + if (cpumask_test_cpu(ctx->cpu, hk_mask) && > + cpu_online(ctx->cpu)) > + break; > + > + /* > + * The context doesn't have any online housekeeping CPUs > + * but there might be an online isolated CPU mapped to > + * it. > + */ > + if (cpu_is_offline(ctx->cpu)) > + continue; > + > + pr_warn("%s: trying to offline hctx%d but there is still an online isolcpu CPU %d mapped to it\n", > + hctx->queue->disk->disk_name, > + hctx->queue_num, ctx->cpu); > + return true; > + } > + > + return false; > +} > + > static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx, > unsigned int this_cpu) > { > @@ -3647,7 +3689,7 @@ static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx, > > /* this hctx has at least one online CPU */ > if (this_cpu != cpu) > - return true; > + return blk_mq_hctx_check_isolcpus_online(hctx, this_cpu); > } > > return false; > @@ -3659,7 +3701,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) > struct blk_mq_hw_ctx, cpuhp_online); > > if (blk_mq_hctx_has_online_cpu(hctx, cpu)) > - return 0; > + return -EINVAL; > > /* > * Prevent new request from being allocated on the current hctx. > Otherwise: Reviewed-by: Hannes Reinecke <hare@suse.de> Cheers, Hannes
diff --git a/block/blk-mq.c b/block/blk-mq.c index c2697db591091200cdb9f6e082e472b829701e4c..aff17673b773583dfb2b01cb2f5f010c456bd834 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3627,6 +3627,48 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx) return data.has_rq; } +static bool blk_mq_hctx_check_isolcpus_online(struct blk_mq_hw_ctx *hctx, unsigned int cpu) +{ + const struct cpumask *hk_mask; + int i; + + if (!housekeeping_enabled(HK_TYPE_IO_QUEUE)) + return true; + + hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE); + + for (i = 0; i < hctx->nr_ctx; i++) { + struct blk_mq_ctx *ctx = hctx->ctxs[i]; + + if (ctx->cpu == cpu) + continue; + + /* + * Check if this context has at least one online + * housekeeping CPU in this case the hardware context is + * usable. + */ + if (cpumask_test_cpu(ctx->cpu, hk_mask) && + cpu_online(ctx->cpu)) + break; + + /* + * The context doesn't have any online housekeeping CPUs + * but there might be an online isolated CPU mapped to + * it. + */ + if (cpu_is_offline(ctx->cpu)) + continue; + + pr_warn("%s: trying to offline hctx%d but there is still an online isolcpu CPU %d mapped to it\n", + hctx->queue->disk->disk_name, + hctx->queue_num, ctx->cpu); + return true; + } + + return false; +} + static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx, unsigned int this_cpu) { @@ -3647,7 +3689,7 @@ static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx, /* this hctx has at least one online CPU */ if (this_cpu != cpu) - return true; + return blk_mq_hctx_check_isolcpus_online(hctx, this_cpu); } return false; @@ -3659,7 +3701,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) struct blk_mq_hw_ctx, cpuhp_online); if (blk_mq_hctx_has_online_cpu(hctx, cpu)) - return 0; + return -EINVAL; /* * Prevent new request from being allocated on the current hctx.
When isolcpus=io_queue is enabled, and the last housekeeping CPU for a given hctx would go offline, there would be no CPU left which handles the IOs. To prevent IO stalls, prevent offlining housekeeping CPUs which are still severing isolated CPUs.. Signed-off-by: Daniel Wagner <wagi@kernel.org> --- block/blk-mq.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-)