diff mbox series

scsi: core: don't start concurrent async scan on same host

Message ID 20201010032539.426615-1-ming.lei@redhat.com
State New
Headers show
Series scsi: core: don't start concurrent async scan on same host | expand

Commit Message

Ming Lei Oct. 10, 2020, 3:25 a.m. UTC
Current scsi host scan mechanism supposes to fallback into sync host
scan if async scan is in-progress. However, this rule isn't strictly
respected, because scsi_prep_async_scan() doesn't hold scan_mutex when
checking shost->async_scan. When scsi_scan_host() is called
concurrently, two async scan on same host may be started, and hang in
do_scan_async() is observed.

Fixes this issue by checking & setting shost->async_scan atomically
with shost->scan_mutex.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ewan D. Milne <emilne@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/scsi_scan.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

Comments

Lee Duncan Oct. 10, 2020, 5:31 p.m. UTC | #1
On 10/9/20 8:25 PM, Ming Lei wrote:
> Current scsi host scan mechanism supposes to fallback into sync host
> scan if async scan is in-progress. However, this rule isn't strictly
> respected, because scsi_prep_async_scan() doesn't hold scan_mutex when
> checking shost->async_scan. When scsi_scan_host() is called
> concurrently, two async scan on same host may be started, and hang in
> do_scan_async() is observed.
> 
> Fixes this issue by checking & setting shost->async_scan atomically
> with shost->scan_mutex.
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ewan D. Milne <emilne@redhat.com>
> Cc: Hannes Reinecke <hare@suse.de>
> Cc: Bart Van Assche <bvanassche@acm.org>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/scsi/scsi_scan.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index f2437a7570ce..9af50e6f94c4 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -1714,15 +1714,16 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
>   */
>  static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>  {
> -	struct async_scan_data *data;
> +	struct async_scan_data *data = NULL;
>  	unsigned long flags;
>  
>  	if (strncmp(scsi_scan_type, "sync", 4) == 0)
>  		return NULL;
>  
> +	mutex_lock(&shost->scan_mutex);
>  	if (shost->async_scan) {
>  		shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__);
> -		return NULL;
> +		goto err;
>  	}
>  
>  	data = kmalloc(sizeof(*data), GFP_KERNEL);
> @@ -1733,7 +1734,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>  		goto err;
>  	init_completion(&data->prev_finished);
>  
> -	mutex_lock(&shost->scan_mutex);
>  	spin_lock_irqsave(shost->host_lock, flags);
>  	shost->async_scan = 1;
>  	spin_unlock_irqrestore(shost->host_lock, flags);
> @@ -1748,6 +1748,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>  	return data;
>  
>   err:
> +	mutex_unlock(&shost->scan_mutex);
>  	kfree(data);
>  	return NULL;
>  }
> 
Reviewed-by: Lee Duncan <lduncan@suse.com>
Ming Lei Oct. 17, 2020, 6:23 a.m. UTC | #2
On Sat, Oct 10, 2020 at 4:23 PM Ming Lei <ming.lei@redhat.com> wrote:
>
> Current scsi host scan mechanism supposes to fallback into sync host
> scan if async scan is in-progress. However, this rule isn't strictly
> respected, because scsi_prep_async_scan() doesn't hold scan_mutex when
> checking shost->async_scan. When scsi_scan_host() is called
> concurrently, two async scan on same host may be started, and hang in
> do_scan_async() is observed.
>
> Fixes this issue by checking & setting shost->async_scan atomically
> with shost->scan_mutex.
>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ewan D. Milne <emilne@redhat.com>
> Cc: Hannes Reinecke <hare@suse.de>
> Cc: Bart Van Assche <bvanassche@acm.org>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/scsi/scsi_scan.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index f2437a7570ce..9af50e6f94c4 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -1714,15 +1714,16 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
>   */
>  static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>  {
> -       struct async_scan_data *data;
> +       struct async_scan_data *data = NULL;
>         unsigned long flags;
>
>         if (strncmp(scsi_scan_type, "sync", 4) == 0)
>                 return NULL;
>
> +       mutex_lock(&shost->scan_mutex);
>         if (shost->async_scan) {
>                 shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__);
> -               return NULL;
> +               goto err;
>         }
>
>         data = kmalloc(sizeof(*data), GFP_KERNEL);
> @@ -1733,7 +1734,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>                 goto err;
>         init_completion(&data->prev_finished);
>
> -       mutex_lock(&shost->scan_mutex);
>         spin_lock_irqsave(shost->host_lock, flags);
>         shost->async_scan = 1;
>         spin_unlock_irqrestore(shost->host_lock, flags);
> @@ -1748,6 +1748,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>         return data;
>
>   err:
> +       mutex_unlock(&shost->scan_mutex);
>         kfree(data);
>         return NULL;
>  }
> --
> 2.25.2
>

Hello Guys,

Ping...

Thanks,
Ming Lei
Bart Van Assche Oct. 19, 2020, 2:17 p.m. UTC | #3
On 10/9/20 8:25 PM, Ming Lei wrote:
> Current scsi host scan mechanism supposes to fallback into sync host

> scan if async scan is in-progress. However, this rule isn't strictly

> respected, because scsi_prep_async_scan() doesn't hold scan_mutex when

> checking shost->async_scan. When scsi_scan_host() is called

> concurrently, two async scan on same host may be started, and hang in

> do_scan_async() is observed.

> 

> Fixes this issue by checking & setting shost->async_scan atomically

> with shost->scan_mutex.


Did you perhaps mean "by serializing shost->async_scan accesses with
shost->scan_mutex"?

It is not clear to me why the shost->async_scan assignment is protected
with the host lock. Can spin_lock_irqsave(shost->host_lock, flags) and
spin_unlock_irqrestore(shost->host_lock, flags) be left out from this
function?

Anyway:

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Ming Lei Oct. 19, 2020, 2:38 p.m. UTC | #4
On Mon, Oct 19, 2020 at 07:17:52AM -0700, Bart Van Assche wrote:
> On 10/9/20 8:25 PM, Ming Lei wrote:
> > Current scsi host scan mechanism supposes to fallback into sync host
> > scan if async scan is in-progress. However, this rule isn't strictly
> > respected, because scsi_prep_async_scan() doesn't hold scan_mutex when
> > checking shost->async_scan. When scsi_scan_host() is called
> > concurrently, two async scan on same host may be started, and hang in
> > do_scan_async() is observed.
> > 
> > Fixes this issue by checking & setting shost->async_scan atomically
> > with shost->scan_mutex.
> 
> Did you perhaps mean "by serializing shost->async_scan accesses with
> shost->scan_mutex"?

Specifically, the following checking & setting has to be done atomically,
so shost->scan_mutex is required, just as what scsi_finish_async_scan()
does for clearing shost->async_scan:

scsi_prep_async_scan():

	if (!shost->async_scan)
		return NULL;
	...
	shost->async_scan = 1

> 
> It is not clear to me why the shost->async_scan assignment is protected
> with the host lock. Can spin_lock_irqsave(shost->host_lock, flags) and
> spin_unlock_irqrestore(shost->host_lock, flags) be left out from this
> function?

I think it is doable to remove the ->host_lock from both scsi_prep_async_scan()
and scsi_finish_async_scan(), which can be done as one follow-up cleanup.

With this patch, all reading/writing shost->async_scan are protected by
shost->scan_mutex.

> 
> Anyway:
> 
> Reviewed-by: Bart Van Assche <bvanassche@acm.org>

Thanks!
Martin K. Petersen Oct. 26, 2020, 8:08 p.m. UTC | #5
Ming,

> Current scsi host scan mechanism supposes to fallback into sync host
> scan if async scan is in-progress. However, this rule isn't strictly
> respected, because scsi_prep_async_scan() doesn't hold scan_mutex when
> checking shost->async_scan. When scsi_scan_host() is called
> concurrently, two async scan on same host may be started, and hang in
> do_scan_async() is observed.
>
> Fixes this issue by checking & setting shost->async_scan atomically
> with shost->scan_mutex.

Applied to 5.10/scsi-fixes, thanks!
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index f2437a7570ce..9af50e6f94c4 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1714,15 +1714,16 @@  static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
  */
 static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 {
-	struct async_scan_data *data;
+	struct async_scan_data *data = NULL;
 	unsigned long flags;
 
 	if (strncmp(scsi_scan_type, "sync", 4) == 0)
 		return NULL;
 
+	mutex_lock(&shost->scan_mutex);
 	if (shost->async_scan) {
 		shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__);
-		return NULL;
+		goto err;
 	}
 
 	data = kmalloc(sizeof(*data), GFP_KERNEL);
@@ -1733,7 +1734,6 @@  static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 		goto err;
 	init_completion(&data->prev_finished);
 
-	mutex_lock(&shost->scan_mutex);
 	spin_lock_irqsave(shost->host_lock, flags);
 	shost->async_scan = 1;
 	spin_unlock_irqrestore(shost->host_lock, flags);
@@ -1748,6 +1748,7 @@  static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 	return data;
 
  err:
+	mutex_unlock(&shost->scan_mutex);
 	kfree(data);
 	return NULL;
 }