diff mbox series

[v2] coresight: core: Fix coresight device probe failure issue

Message ID 20220304082350.30069-1-quic_jinlmao@quicinc.com
State New
Headers show
Series [v2] coresight: core: Fix coresight device probe failure issue | expand

Commit Message

Mao Jinlong March 4, 2022, 8:23 a.m. UTC
It is possibe that probe failure issue happens when the device
and its child_device's probe happens at the same time.
In coresight_make_links, has_conns_grp is true for parent, but
has_conns_grp is false for child device as has_conns_grp is set
to true in coresight_create_conns_sysfs_group. The probe of parent
device will fail at this condition. Add has_conns_grp check for
child device before make the links and make the process from
device_register to connection_create be atomic to avoid this
probe failure issue.

Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Suggested-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com>
---
 drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Comments

Suzuki K Poulose March 8, 2022, 11:56 a.m. UTC | #1
Hi Jinlong

On 04/03/2022 08:23, Mao Jinlong wrote:
> It is possibe that probe failure issue happens when the device
> and its child_device's probe happens at the same time.
> In coresight_make_links, has_conns_grp is true for parent, but
> has_conns_grp is false for child device as has_conns_grp is set
> to true in coresight_create_conns_sysfs_group. The probe of parent
> device will fail at this condition. Add has_conns_grp check for
> child device before make the links and make the process from
> device_register to connection_create be atomic to avoid this
> probe failure issue.
> 
> Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> Suggested-by: Mike Leach <mike.leach@linaro.org>
> Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com>
> ---
>   drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------
>   1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
> index 88653d1c06a4..b3e3bc59c09b 100644
> --- a/drivers/hwtracing/coresight/coresight-core.c
> +++ b/drivers/hwtracing/coresight/coresight-core.c
> @@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev)
>   			continue;
>   		conn->child_dev =
>   			coresight_find_csdev_by_fwnode(conn->child_fwnode);
> -		if (conn->child_dev) {
> +		if (conn->child_dev && conn->child_dev->has_conns_grp) {
>   			ret = coresight_make_links(csdev, conn,
>   						   conn->child_dev);
>   			if (ret)
> @@ -1594,7 +1594,8 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
>   	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
>   	if (!refcnts) {
>   		ret = -ENOMEM;
> -		goto err_free_csdev;
> +		kfree(csdev);
> +		goto err_out;
>   	}
>   
>   	csdev->refcnt = refcnts;
> @@ -1619,8 +1620,10 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
>   	csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
>   	dev_set_name(&csdev->dev, "%s", desc->name);
>   
> +	mutex_lock(&coresight_mutex);
>   	ret = device_register(&csdev->dev);
>   	if (ret) {
> +		mutex_unlock(&coresight_mutex);
>   		put_device(&csdev->dev);
>   		/*
>   		 * All resources are free'd explicitly via
> @@ -1634,6 +1637,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
>   		ret = etm_perf_add_symlink_sink(csdev);
>   
>   		if (ret) {
> +			mutex_unlock(&coresight_mutex);
>   			device_unregister(&csdev->dev);
>   			/*
>   			 * As with the above, all resources are free'd
> @@ -1645,8 +1649,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
>   		}
>   	}
>   
> -	mutex_lock(&coresight_mutex);
> -
>   	ret = coresight_create_conns_sysfs_group(csdev);
>   	if (!ret)
>   		ret = coresight_fixup_device_conns(csdev);
> @@ -1663,8 +1665,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
>   
>   	return csdev;
>   
> -err_free_csdev:
> -	kfree(csdev);
>   err_out:
>   	/* Cleanup the connection information */
>   	coresight_release_platform_data(NULL, desc->pdata);

Could we consolidate the unlock sequence to a single point with 
something like this (untested):


diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index af00dca8d1ac..198ee140c6e6 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -1571,6 +1571,7 @@ struct coresight_device *coresight_register(struct 
coresight_desc *desc)
  	int nr_refcnts = 1;
  	atomic_t *refcnts = NULL;
  	struct coresight_device *csdev;
+	bool registered = false;

  	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
  	if (!csdev) {
@@ -1591,7 +1592,8 @@ struct coresight_device *coresight_register(struct 
coresight_desc *desc)
  	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
  	if (!refcnts) {
  		ret = -ENOMEM;
-		goto err_free_csdev;
+		kfree(csdev);
+		goto err_out;
  	}

  	csdev->refcnt = refcnts;
@@ -1616,6 +1618,13 @@ struct coresight_device 
*coresight_register(struct coresight_desc *desc)
  	csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
  	dev_set_name(&csdev->dev, "%s", desc->name);

+	/*
+	 * Make sure the device registration and the connection fixup
+	 * are synchronised, so that we don't see uninitialised devices
+	 * on the coresight bus while trying to resolve the connections.
+	 */
+	mutex_lock(&coresight_mutex);
+
  	ret = device_register(&csdev->dev);
  	if (ret) {
  		put_device(&csdev->dev);
@@ -1623,7 +1632,7 @@ struct coresight_device *coresight_register(struct 
coresight_desc *desc)
  		 * All resources are free'd explicitly via
  		 * coresight_device_release(), triggered from put_device().
  		 */
-		goto err_out;
+		goto out_unlock;
  	}

  	if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
@@ -1638,11 +1647,11 @@ struct coresight_device 
*coresight_register(struct coresight_desc *desc)
  			 * from put_device(), which is in turn called from
  			 * function device_unregister().
  			 */
-			goto err_out;
+			goto out_unlock;
  		}
  	}
-
-	mutex_lock(&coresight_mutex);
+	/* Device is now registered */
+	registered = true;

  	ret = coresight_create_conns_sysfs_group(csdev);
  	if (!ret)
@@ -1652,16 +1661,15 @@ struct coresight_device 
*coresight_register(struct coresight_desc *desc)
  	if (!ret && cti_assoc_ops && cti_assoc_ops->add)
  		cti_assoc_ops->add(csdev);

+out_unlock:
  	mutex_unlock(&coresight_mutex);
-	if (ret) {
+	/* Success */
+	if (!ret)
+		return csdev;
+
+	/* Unregister the device if needed */
+	if (registered)
  		coresight_unregister(csdev);
-		return ERR_PTR(ret);
-	}
-
-	return csdev;
-
-err_free_csdev:
-	kfree(csdev);
  err_out:
  	/* Cleanup the connection information */
  	coresight_release_platform_data(NULL, desc->pdata);
Suzuki K Poulose March 8, 2022, 1:36 p.m. UTC | #2
On 08/03/2022 11:56, Suzuki K Poulose wrote:
> Hi Jinlong
> 
> On 04/03/2022 08:23, Mao Jinlong wrote:
>> It is possibe that probe failure issue happens when the device
>> and its child_device's probe happens at the same time.
>> In coresight_make_links, has_conns_grp is true for parent, but
>> has_conns_grp is false for child device as has_conns_grp is set
>> to true in coresight_create_conns_sysfs_group. The probe of parent
>> device will fail at this condition. Add has_conns_grp check for
>> child device before make the links and make the process from
>> device_register to connection_create be atomic to avoid this
>> probe failure issue.
>>
>> Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>> Suggested-by: Mike Leach <mike.leach@linaro.org>
>> Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com>
>> ---
>>   drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------
>>   1 file changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-core.c 
>> b/drivers/hwtracing/coresight/coresight-core.c
>> index 88653d1c06a4..b3e3bc59c09b 100644
>> --- a/drivers/hwtracing/coresight/coresight-core.c
>> +++ b/drivers/hwtracing/coresight/coresight-core.c
>> @@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct 
>> coresight_device *csdev)
>>               continue;
>>           conn->child_dev =
>>               coresight_find_csdev_by_fwnode(conn->child_fwnode);
>> -        if (conn->child_dev) {
>> +        if (conn->child_dev && conn->child_dev->has_conns_grp) {
>>               ret = coresight_make_links(csdev, conn,
>>                              conn->child_dev);
>>               if (ret)
>> @@ -1594,7 +1594,8 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>       refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
>>       if (!refcnts) {
>>           ret = -ENOMEM;
>> -        goto err_free_csdev;
>> +        kfree(csdev);
>> +        goto err_out;
>>       }
>>       csdev->refcnt = refcnts;
>> @@ -1619,8 +1620,10 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>       csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
>>       dev_set_name(&csdev->dev, "%s", desc->name);
>> +    mutex_lock(&coresight_mutex);
>>       ret = device_register(&csdev->dev);
>>       if (ret) {
>> +        mutex_unlock(&coresight_mutex);
>>           put_device(&csdev->dev);
>>           /*
>>            * All resources are free'd explicitly via
>> @@ -1634,6 +1637,7 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>           ret = etm_perf_add_symlink_sink(csdev);
>>           if (ret) {
>> +            mutex_unlock(&coresight_mutex);
>>               device_unregister(&csdev->dev);
>>               /*
>>                * As with the above, all resources are free'd
>> @@ -1645,8 +1649,6 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>           }
>>       }
>> -    mutex_lock(&coresight_mutex);
>> -
>>       ret = coresight_create_conns_sysfs_group(csdev);
>>       if (!ret)
>>           ret = coresight_fixup_device_conns(csdev);
>> @@ -1663,8 +1665,6 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>       return csdev;
>> -err_free_csdev:
>> -    kfree(csdev);
>>   err_out:
>>       /* Cleanup the connection information */
>>       coresight_release_platform_data(NULL, desc->pdata);
> 
> Could we consolidate the unlock sequence to a single point with 
> something like this (untested):
> 
> 
> diff --git a/drivers/hwtracing/coresight/coresight-core.c 
> b/drivers/hwtracing/coresight/coresight-core.c
> index af00dca8d1ac..198ee140c6e6 100644
> --- a/drivers/hwtracing/coresight/coresight-core.c
> +++ b/drivers/hwtracing/coresight/coresight-core.c
> @@ -1571,6 +1571,7 @@ struct coresight_device *coresight_register(struct 
> coresight_desc *desc)
>       int nr_refcnts = 1;
>       atomic_t *refcnts = NULL;
>       struct coresight_device *csdev;
> +    bool registered = false;
> 
>       csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
>       if (!csdev) {
> @@ -1591,7 +1592,8 @@ struct coresight_device *coresight_register(struct 
> coresight_desc *desc)
>       refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
>       if (!refcnts) {
>           ret = -ENOMEM;
> -        goto err_free_csdev;
> +        kfree(csdev);
> +        goto err_out;
>       }
> 
>       csdev->refcnt = refcnts;
> @@ -1616,6 +1618,13 @@ struct coresight_device 
> *coresight_register(struct coresight_desc *desc)
>       csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
>       dev_set_name(&csdev->dev, "%s", desc->name);
> 
> +    /*
> +     * Make sure the device registration and the connection fixup
> +     * are synchronised, so that we don't see uninitialised devices
> +     * on the coresight bus while trying to resolve the connections.
> +     */
> +    mutex_lock(&coresight_mutex);
> +
>       ret = device_register(&csdev->dev);
>       if (ret) {
>           put_device(&csdev->dev);
> @@ -1623,7 +1632,7 @@ struct coresight_device *coresight_register(struct 
> coresight_desc *desc)
>            * All resources are free'd explicitly via
>            * coresight_device_release(), triggered from put_device().
>            */
> -        goto err_out;
> +        goto out_unlock;
>       }
> 
>       if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
> @@ -1638,11 +1647,11 @@ struct coresight_device 
> *coresight_register(struct coresight_desc *desc)
>                * from put_device(), which is in turn called from
>                * function device_unregister().
>                */
> -            goto err_out;
> +            goto out_unlock;
>           }
>       }
> -
> -    mutex_lock(&coresight_mutex);
> +    /* Device is now registered */
> +    registered = true;
> 
>       ret = coresight_create_conns_sysfs_group(csdev);
>       if (!ret)
> @@ -1652,16 +1661,15 @@ struct coresight_device 
> *coresight_register(struct coresight_desc *desc)
>       if (!ret && cti_assoc_ops && cti_assoc_ops->add)
>           cti_assoc_ops->add(csdev);
> 
> +out_unlock:
>       mutex_unlock(&coresight_mutex);
> -    if (ret) {
> +    /* Success */
> +    if (!ret)
> +        return csdev;
> +
> +    /* Unregister the device if needed */
> +    if (registered)
>           coresight_unregister(csdev);
> -        return ERR_PTR(ret);
> -    }

Apologies, this should be preserved :

i.e,
	if (registered) {
		coresight_unregister(csdev);
		return ERR_PTR(ret);
	}

Thanks
Suzuki
Mao Jinlong March 9, 2022, 2:57 a.m. UTC | #3
Hi Suzuki,

On 3/8/2022 9:36 PM, Suzuki K Poulose wrote:
> On 08/03/2022 11:56, Suzuki K Poulose wrote:
>> Hi Jinlong
>>
>> On 04/03/2022 08:23, Mao Jinlong wrote:
>>> It is possibe that probe failure issue happens when the device
>>> and its child_device's probe happens at the same time.
>>> In coresight_make_links, has_conns_grp is true for parent, but
>>> has_conns_grp is false for child device as has_conns_grp is set
>>> to true in coresight_create_conns_sysfs_group. The probe of parent
>>> device will fail at this condition. Add has_conns_grp check for
>>> child device before make the links and make the process from
>>> device_register to connection_create be atomic to avoid this
>>> probe failure issue.
>>>
>>> Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>>> Suggested-by: Mike Leach <mike.leach@linaro.org>
>>> Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com>
>>> ---
>>>   drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------
>>>   1 file changed, 6 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/hwtracing/coresight/coresight-core.c 
>>> b/drivers/hwtracing/coresight/coresight-core.c
>>> index 88653d1c06a4..b3e3bc59c09b 100644
>>> --- a/drivers/hwtracing/coresight/coresight-core.c
>>> +++ b/drivers/hwtracing/coresight/coresight-core.c
>>> @@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct 
>>> coresight_device *csdev)
>>>               continue;
>>>           conn->child_dev =
>>> coresight_find_csdev_by_fwnode(conn->child_fwnode);
>>> -        if (conn->child_dev) {
>>> +        if (conn->child_dev && conn->child_dev->has_conns_grp) {
>>>               ret = coresight_make_links(csdev, conn,
>>>                              conn->child_dev);
>>>               if (ret)
>>> @@ -1594,7 +1594,8 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>       refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
>>>       if (!refcnts) {
>>>           ret = -ENOMEM;
>>> -        goto err_free_csdev;
>>> +        kfree(csdev);
>>> +        goto err_out;
>>>       }
>>>       csdev->refcnt = refcnts;
>>> @@ -1619,8 +1620,10 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>       csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
>>>       dev_set_name(&csdev->dev, "%s", desc->name);
>>> +    mutex_lock(&coresight_mutex);
>>>       ret = device_register(&csdev->dev);
>>>       if (ret) {
>>> +        mutex_unlock(&coresight_mutex);
>>>           put_device(&csdev->dev);
>>>           /*
>>>            * All resources are free'd explicitly via
>>> @@ -1634,6 +1637,7 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>           ret = etm_perf_add_symlink_sink(csdev);
>>>           if (ret) {
>>> +            mutex_unlock(&coresight_mutex);
>>>               device_unregister(&csdev->dev);
>>>               /*
>>>                * As with the above, all resources are free'd
>>> @@ -1645,8 +1649,6 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>           }
>>>       }
>>> -    mutex_lock(&coresight_mutex);
>>> -
>>>       ret = coresight_create_conns_sysfs_group(csdev);
>>>       if (!ret)
>>>           ret = coresight_fixup_device_conns(csdev);
>>> @@ -1663,8 +1665,6 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>       return csdev;
>>> -err_free_csdev:
>>> -    kfree(csdev);
>>>   err_out:
>>>       /* Cleanup the connection information */
>>>       coresight_release_platform_data(NULL, desc->pdata);
>>
>> Could we consolidate the unlock sequence to a single point with 
>> something like this (untested):
>>
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-core.c 
>> b/drivers/hwtracing/coresight/coresight-core.c
>> index af00dca8d1ac..198ee140c6e6 100644
>> --- a/drivers/hwtracing/coresight/coresight-core.c
>> +++ b/drivers/hwtracing/coresight/coresight-core.c
>> @@ -1571,6 +1571,7 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>       int nr_refcnts = 1;
>>       atomic_t *refcnts = NULL;
>>       struct coresight_device *csdev;
>> +    bool registered = false;
>>
>>       csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
>>       if (!csdev) {
>> @@ -1591,7 +1592,8 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>       refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
>>       if (!refcnts) {
>>           ret = -ENOMEM;
>> -        goto err_free_csdev;
>> +        kfree(csdev);
>> +        goto err_out;
>>       }
>>
>>       csdev->refcnt = refcnts;
>> @@ -1616,6 +1618,13 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>       csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
>>       dev_set_name(&csdev->dev, "%s", desc->name);
>>
>> +    /*
>> +     * Make sure the device registration and the connection fixup
>> +     * are synchronised, so that we don't see uninitialised devices
>> +     * on the coresight bus while trying to resolve the connections.
>> +     */
>> +    mutex_lock(&coresight_mutex);
>> +
>>       ret = device_register(&csdev->dev);
>>       if (ret) {
>>           put_device(&csdev->dev);
>> @@ -1623,7 +1632,7 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>            * All resources are free'd explicitly via
>>            * coresight_device_release(), triggered from put_device().
>>            */
>> -        goto err_out;
>> +        goto out_unlock;
>>       }
>>
>>       if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
>> @@ -1638,11 +1647,11 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>                * from put_device(), which is in turn called from
>>                * function device_unregister().
>>                */
>> -            goto err_out;
>> +            goto out_unlock;
>>           }
>>       }
>> -
>> -    mutex_lock(&coresight_mutex);
>> +    /* Device is now registered */
>> +    registered = true;
>>
>>       ret = coresight_create_conns_sysfs_group(csdev);
>>       if (!ret)
>> @@ -1652,16 +1661,15 @@ struct coresight_device 
>> *coresight_register(struct coresight_desc *desc)
>>       if (!ret && cti_assoc_ops && cti_assoc_ops->add)
>>           cti_assoc_ops->add(csdev);
>>
>> +out_unlock:
>>       mutex_unlock(&coresight_mutex);
>> -    if (ret) {
>> +    /* Success */
>> +    if (!ret)
>> +        return csdev;
>> +
>> +    /* Unregister the device if needed */
>> +    if (registered)
>>           coresight_unregister(csdev);
>> -        return ERR_PTR(ret);
>> -    }
>
> Apologies, this should be preserved :
>
> i.e,
>     if (registered) {
>         coresight_unregister(csdev);
>         return ERR_PTR(ret);
>     }
I will test with your changes and get back to you later.

Thanks
Jinlong Mao
>
> Thanks
> Suzuki
Mao Jinlong March 9, 2022, 2:27 p.m. UTC | #4
Hi Suzuki,

On 3/9/2022 10:57 AM, Jinlong Mao wrote:
> Hi Suzuki,
>
> On 3/8/2022 9:36 PM, Suzuki K Poulose wrote:
>> On 08/03/2022 11:56, Suzuki K Poulose wrote:
>>> Hi Jinlong
>>>
>>> On 04/03/2022 08:23, Mao Jinlong wrote:
>>>> It is possibe that probe failure issue happens when the device
>>>> and its child_device's probe happens at the same time.
>>>> In coresight_make_links, has_conns_grp is true for parent, but
>>>> has_conns_grp is false for child device as has_conns_grp is set
>>>> to true in coresight_create_conns_sysfs_group. The probe of parent
>>>> device will fail at this condition. Add has_conns_grp check for
>>>> child device before make the links and make the process from
>>>> device_register to connection_create be atomic to avoid this
>>>> probe failure issue.
>>>>
>>>> Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>> Suggested-by: Mike Leach <mike.leach@linaro.org>
>>>> Signed-off-by: Mao Jinlong <quic_jinlmao@quicinc.com>
>>>> ---
>>>>   drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------
>>>>   1 file changed, 6 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/hwtracing/coresight/coresight-core.c 
>>>> b/drivers/hwtracing/coresight/coresight-core.c
>>>> index 88653d1c06a4..b3e3bc59c09b 100644
>>>> --- a/drivers/hwtracing/coresight/coresight-core.c
>>>> +++ b/drivers/hwtracing/coresight/coresight-core.c
>>>> @@ -1382,7 +1382,7 @@ static int 
>>>> coresight_fixup_device_conns(struct coresight_device *csdev)
>>>>               continue;
>>>>           conn->child_dev =
>>>> coresight_find_csdev_by_fwnode(conn->child_fwnode);
>>>> -        if (conn->child_dev) {
>>>> +        if (conn->child_dev && conn->child_dev->has_conns_grp) {
>>>>               ret = coresight_make_links(csdev, conn,
>>>>                              conn->child_dev);
>>>>               if (ret)
>>>> @@ -1594,7 +1594,8 @@ struct coresight_device 
>>>> *coresight_register(struct coresight_desc *desc)
>>>>       refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
>>>>       if (!refcnts) {
>>>>           ret = -ENOMEM;
>>>> -        goto err_free_csdev;
>>>> +        kfree(csdev);
>>>> +        goto err_out;
>>>>       }
>>>>       csdev->refcnt = refcnts;
>>>> @@ -1619,8 +1620,10 @@ struct coresight_device 
>>>> *coresight_register(struct coresight_desc *desc)
>>>>       csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
>>>>       dev_set_name(&csdev->dev, "%s", desc->name);
>>>> +    mutex_lock(&coresight_mutex);
>>>>       ret = device_register(&csdev->dev);
>>>>       if (ret) {
>>>> +        mutex_unlock(&coresight_mutex);
>>>>           put_device(&csdev->dev);
>>>>           /*
>>>>            * All resources are free'd explicitly via
>>>> @@ -1634,6 +1637,7 @@ struct coresight_device 
>>>> *coresight_register(struct coresight_desc *desc)
>>>>           ret = etm_perf_add_symlink_sink(csdev);
>>>>           if (ret) {
>>>> +            mutex_unlock(&coresight_mutex);
>>>>               device_unregister(&csdev->dev);
>>>>               /*
>>>>                * As with the above, all resources are free'd
>>>> @@ -1645,8 +1649,6 @@ struct coresight_device 
>>>> *coresight_register(struct coresight_desc *desc)
>>>>           }
>>>>       }
>>>> -    mutex_lock(&coresight_mutex);
>>>> -
>>>>       ret = coresight_create_conns_sysfs_group(csdev);
>>>>       if (!ret)
>>>>           ret = coresight_fixup_device_conns(csdev);
>>>> @@ -1663,8 +1665,6 @@ struct coresight_device 
>>>> *coresight_register(struct coresight_desc *desc)
>>>>       return csdev;
>>>> -err_free_csdev:
>>>> -    kfree(csdev);
>>>>   err_out:
>>>>       /* Cleanup the connection information */
>>>>       coresight_release_platform_data(NULL, desc->pdata);
>>>
>>> Could we consolidate the unlock sequence to a single point with 
>>> something like this (untested):
>>>
>>>
>>> diff --git a/drivers/hwtracing/coresight/coresight-core.c 
>>> b/drivers/hwtracing/coresight/coresight-core.c
>>> index af00dca8d1ac..198ee140c6e6 100644
>>> --- a/drivers/hwtracing/coresight/coresight-core.c
>>> +++ b/drivers/hwtracing/coresight/coresight-core.c
>>> @@ -1571,6 +1571,7 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>       int nr_refcnts = 1;
>>>       atomic_t *refcnts = NULL;
>>>       struct coresight_device *csdev;
>>> +    bool registered = false;
>>>
>>>       csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
>>>       if (!csdev) {
>>> @@ -1591,7 +1592,8 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>       refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
>>>       if (!refcnts) {
>>>           ret = -ENOMEM;
>>> -        goto err_free_csdev;
>>> +        kfree(csdev);
>>> +        goto err_out;
>>>       }
>>>
>>>       csdev->refcnt = refcnts;
>>> @@ -1616,6 +1618,13 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>       csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
>>>       dev_set_name(&csdev->dev, "%s", desc->name);
>>>
>>> +    /*
>>> +     * Make sure the device registration and the connection fixup
>>> +     * are synchronised, so that we don't see uninitialised devices
>>> +     * on the coresight bus while trying to resolve the connections.
>>> +     */
>>> +    mutex_lock(&coresight_mutex);
>>> +
>>>       ret = device_register(&csdev->dev);
>>>       if (ret) {
>>>           put_device(&csdev->dev);
>>> @@ -1623,7 +1632,7 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>            * All resources are free'd explicitly via
>>>            * coresight_device_release(), triggered from put_device().
>>>            */
>>> -        goto err_out;
>>> +        goto out_unlock;
>>>       }
>>>
>>>       if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
>>> @@ -1638,11 +1647,11 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>                * from put_device(), which is in turn called from
>>>                * function device_unregister().
>>>                */
>>> -            goto err_out;
>>> +            goto out_unlock;
>>>           }
>>>       }
>>> -
>>> -    mutex_lock(&coresight_mutex);
>>> +    /* Device is now registered */
>>> +    registered = true;
>>>
>>>       ret = coresight_create_conns_sysfs_group(csdev);
>>>       if (!ret)
>>> @@ -1652,16 +1661,15 @@ struct coresight_device 
>>> *coresight_register(struct coresight_desc *desc)
>>>       if (!ret && cti_assoc_ops && cti_assoc_ops->add)
>>>           cti_assoc_ops->add(csdev);
>>>
>>> +out_unlock:
>>>       mutex_unlock(&coresight_mutex);
>>> -    if (ret) {
>>> +    /* Success */
>>> +    if (!ret)
>>> +        return csdev;
>>> +
>>> +    /* Unregister the device if needed */
>>> +    if (registered)
>>>           coresight_unregister(csdev);
>>> -        return ERR_PTR(ret);
>>> -    }
>>
>> Apologies, this should be preserved :
>>
>> i.e,
>>     if (registered) {
>>         coresight_unregister(csdev);
>>         return ERR_PTR(ret);
>>     }

I tested with your changes.
No probe failure during reboot stress test.
I submit "[PATCH v3] coresight: core: Fix coresight device probe failure 
issue".
Please help to review.

Thanks
Jinlong Mao

> I will test with your changes and get back to you later.
>
> Thanks
> Jinlong Mao
>>
>> Thanks
>> Suzuki
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 88653d1c06a4..b3e3bc59c09b 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -1382,7 +1382,7 @@  static int coresight_fixup_device_conns(struct coresight_device *csdev)
 			continue;
 		conn->child_dev =
 			coresight_find_csdev_by_fwnode(conn->child_fwnode);
-		if (conn->child_dev) {
+		if (conn->child_dev && conn->child_dev->has_conns_grp) {
 			ret = coresight_make_links(csdev, conn,
 						   conn->child_dev);
 			if (ret)
@@ -1594,7 +1594,8 @@  struct coresight_device *coresight_register(struct coresight_desc *desc)
 	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
 	if (!refcnts) {
 		ret = -ENOMEM;
-		goto err_free_csdev;
+		kfree(csdev);
+		goto err_out;
 	}
 
 	csdev->refcnt = refcnts;
@@ -1619,8 +1620,10 @@  struct coresight_device *coresight_register(struct coresight_desc *desc)
 	csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
 	dev_set_name(&csdev->dev, "%s", desc->name);
 
+	mutex_lock(&coresight_mutex);
 	ret = device_register(&csdev->dev);
 	if (ret) {
+		mutex_unlock(&coresight_mutex);
 		put_device(&csdev->dev);
 		/*
 		 * All resources are free'd explicitly via
@@ -1634,6 +1637,7 @@  struct coresight_device *coresight_register(struct coresight_desc *desc)
 		ret = etm_perf_add_symlink_sink(csdev);
 
 		if (ret) {
+			mutex_unlock(&coresight_mutex);
 			device_unregister(&csdev->dev);
 			/*
 			 * As with the above, all resources are free'd
@@ -1645,8 +1649,6 @@  struct coresight_device *coresight_register(struct coresight_desc *desc)
 		}
 	}
 
-	mutex_lock(&coresight_mutex);
-
 	ret = coresight_create_conns_sysfs_group(csdev);
 	if (!ret)
 		ret = coresight_fixup_device_conns(csdev);
@@ -1663,8 +1665,6 @@  struct coresight_device *coresight_register(struct coresight_desc *desc)
 
 	return csdev;
 
-err_free_csdev:
-	kfree(csdev);
 err_out:
 	/* Cleanup the connection information */
 	coresight_release_platform_data(NULL, desc->pdata);