diff mbox series

[v4,1/4] scsi: core: Make sure that targets outlive devices

Message ID 20220712221936.1199196-2-bvanassche@acm.org
State Superseded
Headers show
Series Call blk_mq_free_tag_set() earlier | expand

Commit Message

Bart Van Assche July 12, 2022, 10:19 p.m. UTC
This patch prevents that the following sequence triggers a kernel crash:
* Deletion of a SCSI device is requested via sysfs. Device removal takes
  some time because blk_cleanup_queue() is waiting for the SCSI error
  handler.
* The SCSI target associated with that SCSI device is removed.
* scsi_remove_target() returns and its caller frees the resources
  associated with the SCSI target.
* The error handler makes progress and invokes an LLD callback that
  dereferences the SCSI target pointer.

Reported-by: Mike Christie <michael.christie@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: John Garry <john.garry@huawei.com>
Cc: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/scsi_scan.c   |  2 ++
 drivers/scsi/scsi_sysfs.c  | 20 +++++++++++++++++---
 include/scsi/scsi_device.h |  2 ++
 3 files changed, 21 insertions(+), 3 deletions(-)

Comments

Ming Lei July 13, 2022, 1:33 a.m. UTC | #1
On Tue, Jul 12, 2022 at 03:19:33PM -0700, Bart Van Assche wrote:
> This patch prevents that the following sequence triggers a kernel crash:
> * Deletion of a SCSI device is requested via sysfs. Device removal takes
>   some time because blk_cleanup_queue() is waiting for the SCSI error
>   handler.
> * The SCSI target associated with that SCSI device is removed.
> * scsi_remove_target() returns and its caller frees the resources
>   associated with the SCSI target.
> * The error handler makes progress and invokes an LLD callback that
>   dereferences the SCSI target pointer.
> 
> Reported-by: Mike Christie <michael.christie@oracle.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: Mike Christie <michael.christie@oracle.com>
> Cc: Hannes Reinecke <hare@suse.de>
> Cc: John Garry <john.garry@huawei.com>
> Cc: Li Zhijian <lizhijian@fujitsu.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>

Looks fine,

Reviewed-by: Ming Lei <ming.lei@redhat.com>

Thanks,
Ming
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 91ac901a6682..4c1efd6a3b0c 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -521,6 +521,8 @@  static struct scsi_target *scsi_alloc_target(struct device *parent,
 	starget->state = STARGET_CREATED;
 	starget->scsi_level = SCSI_2;
 	starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
+	init_waitqueue_head(&starget->sdev_wq);
+
  retry:
 	spin_lock_irqsave(shost->host_lock, flags);
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 43949798a2e4..1bc9c26fe1d4 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -443,7 +443,9 @@  static void scsi_device_cls_release(struct device *class_dev)
 
 static void scsi_device_dev_release_usercontext(struct work_struct *work)
 {
-	struct scsi_device *sdev;
+	struct scsi_device *sdev = container_of(work, struct scsi_device,
+						ew.work);
+	struct scsi_target *starget = sdev->sdev_target;
 	struct device *parent;
 	struct list_head *this, *tmp;
 	struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL;
@@ -452,8 +454,6 @@  static void scsi_device_dev_release_usercontext(struct work_struct *work)
 	unsigned long flags;
 	struct module *mod;
 
-	sdev = container_of(work, struct scsi_device, ew.work);
-
 	mod = sdev->host->hostt->module;
 
 	scsi_dh_release_device(sdev);
@@ -516,6 +516,9 @@  static void scsi_device_dev_release_usercontext(struct work_struct *work)
 	kfree(sdev->inquiry);
 	kfree(sdev);
 
+	if (starget && atomic_dec_return(&starget->sdev_count) == 0)
+		wake_up(&starget->sdev_wq);
+
 	if (parent)
 		put_device(parent);
 	module_put(mod);
@@ -1535,6 +1538,14 @@  static void __scsi_remove_target(struct scsi_target *starget)
 		goto restart;
 	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	/*
+	 * After scsi_remove_target() returns its caller can remove resources
+	 * associated with @starget, e.g. an rport or session. Wait until all
+	 * devices associated with @starget have been removed to prevent that
+	 * a SCSI error handling callback function triggers a use-after-free.
+	 */
+	wait_event(starget->sdev_wq, atomic_read(&starget->sdev_count) == 0);
 }
 
 /**
@@ -1645,6 +1656,9 @@  void scsi_sysfs_device_initialize(struct scsi_device *sdev)
 	list_add_tail(&sdev->same_target_siblings, &starget->devices);
 	list_add_tail(&sdev->siblings, &shost->__devices);
 	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	atomic_inc(&starget->sdev_count);
+
 	/*
 	 * device can now only be removed via __scsi_remove_device() so hold
 	 * the target.  Target will be held in CREATED state until something
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 7cf5f3b7589f..190d2081f4c6 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -309,6 +309,8 @@  struct scsi_target {
 	struct list_head	devices;
 	struct device		dev;
 	struct kref		reap_ref; /* last put renders target invisible */
+	atomic_t		sdev_count;
+	wait_queue_head_t	sdev_wq;
 	unsigned int		channel;
 	unsigned int		id; /* target id ... replace
 				     * scsi_device.id eventually */