diff mbox series

[v6,9/9] blk-mq: prevent offlining hk CPU with associated online isolated CPUs

Message ID 20250424-isolcpus-io-queues-v6-9-9a53a870ca1f@kernel.org
State New
Headers show
Series blk: honor isolcpus configuration | expand

Commit Message

Daniel Wagner April 24, 2025, 6:19 p.m. UTC
When isolcpus=io_queue is enabled, and the last housekeeping CPU for a
given hctx would go offline, there would be no CPU left which handles
the IOs. To prevent IO stalls, prevent offlining housekeeping CPUs which
are still severing isolated CPUs..

Signed-off-by: Daniel Wagner <wagi@kernel.org>
---
 block/blk-mq.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

Comments

Hannes Reinecke April 25, 2025, 6:28 a.m. UTC | #1
On 4/24/25 20:19, Daniel Wagner wrote:
> When isolcpus=io_queue is enabled, and the last housekeeping CPU for a
> given hctx would go offline, there would be no CPU left which handles
> the IOs. To prevent IO stalls, prevent offlining housekeeping CPUs which
> are still severing isolated CPUs..
             serving

> 
> Signed-off-by: Daniel Wagner <wagi@kernel.org>
> ---
>   block/blk-mq.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index c2697db591091200cdb9f6e082e472b829701e4c..aff17673b773583dfb2b01cb2f5f010c456bd834 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -3627,6 +3627,48 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
>   	return data.has_rq;
>   }
>   
> +static bool blk_mq_hctx_check_isolcpus_online(struct blk_mq_hw_ctx *hctx, unsigned int cpu)
> +{
> +	const struct cpumask *hk_mask;
> +	int i;
> +
> +	if (!housekeeping_enabled(HK_TYPE_IO_QUEUE))
> +		return true;
> +
> +	hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
> +
> +	for (i = 0; i < hctx->nr_ctx; i++) {
> +		struct blk_mq_ctx *ctx = hctx->ctxs[i];
> +
> +		if (ctx->cpu == cpu)
> +			continue;
> +
> +		/*
> +		 * Check if this context has at least one online
> +		 * housekeeping CPU in this case the hardware context is
> +		 * usable.
> +		 */
> +		if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
> +		    cpu_online(ctx->cpu))
> +			break;
> +
> +		/*
> +		 * The context doesn't have any online housekeeping CPUs
> +		 * but there might be an online isolated CPU mapped to
> +		 * it.
> +		 */
> +		if (cpu_is_offline(ctx->cpu))
> +			continue;
> +
> +		pr_warn("%s: trying to offline hctx%d but there is still an online isolcpu CPU %d mapped to it\n",
> +			hctx->queue->disk->disk_name,
> +			hctx->queue_num, ctx->cpu);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
>   static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
>   		unsigned int this_cpu)
>   {
> @@ -3647,7 +3689,7 @@ static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
>   
>   		/* this hctx has at least one online CPU */
>   		if (this_cpu != cpu)
> -			return true;
> +			return blk_mq_hctx_check_isolcpus_online(hctx, this_cpu);
>   	}
>   
>   	return false;
> @@ -3659,7 +3701,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
>   			struct blk_mq_hw_ctx, cpuhp_online);
>   
>   	if (blk_mq_hctx_has_online_cpu(hctx, cpu))
> -		return 0;
> +		return -EINVAL;
>   
>   	/*
>   	 * Prevent new request from being allocated on the current hctx.
> 
Otherwise:

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c2697db591091200cdb9f6e082e472b829701e4c..aff17673b773583dfb2b01cb2f5f010c456bd834 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3627,6 +3627,48 @@  static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
 	return data.has_rq;
 }
 
+static bool blk_mq_hctx_check_isolcpus_online(struct blk_mq_hw_ctx *hctx, unsigned int cpu)
+{
+	const struct cpumask *hk_mask;
+	int i;
+
+	if (!housekeeping_enabled(HK_TYPE_IO_QUEUE))
+		return true;
+
+	hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+
+	for (i = 0; i < hctx->nr_ctx; i++) {
+		struct blk_mq_ctx *ctx = hctx->ctxs[i];
+
+		if (ctx->cpu == cpu)
+			continue;
+
+		/*
+		 * Check if this context has at least one online
+		 * housekeeping CPU in this case the hardware context is
+		 * usable.
+		 */
+		if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
+		    cpu_online(ctx->cpu))
+			break;
+
+		/*
+		 * The context doesn't have any online housekeeping CPUs
+		 * but there might be an online isolated CPU mapped to
+		 * it.
+		 */
+		if (cpu_is_offline(ctx->cpu))
+			continue;
+
+		pr_warn("%s: trying to offline hctx%d but there is still an online isolcpu CPU %d mapped to it\n",
+			hctx->queue->disk->disk_name,
+			hctx->queue_num, ctx->cpu);
+		return true;
+	}
+
+	return false;
+}
+
 static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
 		unsigned int this_cpu)
 {
@@ -3647,7 +3689,7 @@  static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
 
 		/* this hctx has at least one online CPU */
 		if (this_cpu != cpu)
-			return true;
+			return blk_mq_hctx_check_isolcpus_online(hctx, this_cpu);
 	}
 
 	return false;
@@ -3659,7 +3701,7 @@  static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
 			struct blk_mq_hw_ctx, cpuhp_online);
 
 	if (blk_mq_hctx_has_online_cpu(hctx, cpu))
-		return 0;
+		return -EINVAL;
 
 	/*
 	 * Prevent new request from being allocated on the current hctx.