diff mbox series

[v8,7/8] wifi: ath12k: refactor core start based on hardware group

Message ID 20240531180411.1149605-8-quic_hprem@quicinc.com
State New
Headers show
Series wifi: ath12k: Introduce device group abstraction | expand

Commit Message

Harshitha Prem May 31, 2024, 6:04 p.m. UTC
From: Karthikeyan Periyasamy <quic_periyasa@quicinc.com>

Currently, mac allocate/register and core_pdev_create are initiated
immediately when QMI firmware ready event is received for a particular
device.

With hardware device group abstraction, QMI firmware ready event can be
received simultaneously for different devices in the group and so, it
should not be registered immediately rather it has to be deferred until
all devices in the group has received QMI firmware ready.

To handle this, refactor the code of core start to move the following
apis inside a wrapper ath12k_core_hw_group_start()
        * ath12k_mac_allocate()
        * ath12k_core_pdev_create()
        * ath12k_core_rfkill_config()
        * ath12k_mac_register()
        * ath12k_hif_irq_enable()

similarly, move the corresponding destroy/unregister/disable apis
inside wrapper ath12k_core_hw_group_stop()

Add the device flags to indicate pdev created and IRQ enabled which would
be helpful for device clean up during failure cases.

Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1
Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4

Signed-off-by: Karthikeyan Periyasamy <quic_periyasa@quicinc.com>
Co-developed-by: Harshitha Prem <quic_hprem@quicinc.com>
Signed-off-by: Harshitha Prem <quic_hprem@quicinc.com>
---
 drivers/net/wireless/ath/ath12k/core.c | 210 +++++++++++++++++++------
 drivers/net/wireless/ath/ath12k/core.h |  32 ++++
 2 files changed, 191 insertions(+), 51 deletions(-)

Comments

Kalle Valo June 6, 2024, 1:04 p.m. UTC | #1
Harshitha Prem <quic_hprem@quicinc.com> writes:

> From: Karthikeyan Periyasamy <quic_periyasa@quicinc.com>
>
> Currently, mac allocate/register and core_pdev_create are initiated
> immediately when QMI firmware ready event is received for a particular
> device.
>
> With hardware device group abstraction, QMI firmware ready event can be
> received simultaneously for different devices in the group and so, it
> should not be registered immediately rather it has to be deferred until
> all devices in the group has received QMI firmware ready.
>
> To handle this, refactor the code of core start to move the following
> apis inside a wrapper ath12k_core_hw_group_start()
>         * ath12k_mac_allocate()
>         * ath12k_core_pdev_create()
>         * ath12k_core_rfkill_config()
>         * ath12k_mac_register()
>         * ath12k_hif_irq_enable()
>
> similarly, move the corresponding destroy/unregister/disable apis
> inside wrapper ath12k_core_hw_group_stop()
>
> Add the device flags to indicate pdev created and IRQ enabled which would
> be helpful for device clean up during failure cases.
>
> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1
> Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4
>
> Signed-off-by: Karthikeyan Periyasamy <quic_periyasa@quicinc.com>
> Co-developed-by: Harshitha Prem <quic_hprem@quicinc.com>
> Signed-off-by: Harshitha Prem <quic_hprem@quicinc.com>
> ---
>  drivers/net/wireless/ath/ath12k/core.c | 210 +++++++++++++++++++------
>  drivers/net/wireless/ath/ath12k/core.h |  32 ++++
>  2 files changed, 191 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c
> index ebe31cbb6435..90c70dbfc50a 100644
> --- a/drivers/net/wireless/ath/ath12k/core.c
> +++ b/drivers/net/wireless/ath/ath12k/core.c
> @@ -563,6 +563,9 @@ u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab)
>  
>  static void ath12k_core_stop(struct ath12k_base *ab)
>  {
> +	clear_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags);
> +	ath12k_dec_num_core_started(ab);
> +
>  	if (!test_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags))
>  		ath12k_qmi_firmware_stop(ab);
>  
> @@ -689,11 +692,15 @@ static int ath12k_core_pdev_create(struct ath12k_base *ab)
>  		return ret;
>  	}
>  
> +	set_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags);
> +
>  	return 0;
>  }
>  
>  static void ath12k_core_pdev_destroy(struct ath12k_base *ab)
>  {
> +	clear_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags);
> +
>  	ath12k_dp_pdev_free(ab);
>  }
>  
> @@ -702,6 +709,8 @@ static int ath12k_core_start(struct ath12k_base *ab,
>  {
>  	int ret;
>  
> +	lockdep_assert_held(&ab->core_lock);
> +
>  	ret = ath12k_wmi_attach(ab);
>  	if (ret) {
>  		ath12k_err(ab, "failed to attach wmi: %d\n", ret);
> @@ -795,6 +804,12 @@ static int ath12k_core_start(struct ath12k_base *ab,
>  		/* ACPI is optional so continue in case of an error */
>  		ath12k_dbg(ab, ATH12K_DBG_BOOT, "acpi failed: %d\n", ret);
>  
> +	if (!test_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags)) {
> +		/* Indicate the core start in the appropriate group */
> +		ath12k_inc_num_core_started(ab);
> +		set_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags);
> +	}
> +
>  	return 0;
>  
>  err_reo_cleanup:
> @@ -806,6 +821,108 @@ static int ath12k_core_start(struct ath12k_base *ab,
>  	return ret;
>  }
>  
> +static void ath12k_core_device_cleanup(struct ath12k_base *ab)
> +{
> +	mutex_lock(&ab->core_lock);
> +
> +	if (test_and_clear_bit(ATH12K_FLAG_CORE_HIF_IRQ_ENABLED, &ab->dev_flags))
> +		ath12k_hif_irq_disable(ab);
> +
> +	if (test_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags))
> +		ath12k_core_pdev_destroy(ab);
> +
> +	if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) {
> +		ath12k_mac_unregister(ab);
> +		ath12k_mac_destroy(ab);
> +	}
> +
> +	mutex_unlock(&ab->core_lock);
> +}

This patch is just abusing flags and because of that we have spaghetti
code. I have been disliking use of enum ath12k_dev_flags before but this
is just looks too much. I am wondering do we need to cleanup the ath12k
architecture first, reduce the usage of flags and then revisit this
patchset?
Harshitha Prem June 7, 2024, 1:49 p.m. UTC | #2
On 6/6/2024 6:34 PM, Kalle Valo wrote:
> Harshitha Prem <quic_hprem@quicinc.com> writes:
> 
>> From: Karthikeyan Periyasamy <quic_periyasa@quicinc.com>
>>
>> Currently, mac allocate/register and core_pdev_create are initiated
>> immediately when QMI firmware ready event is received for a particular
>> device.
>>
>> With hardware device group abstraction, QMI firmware ready event can be
>> received simultaneously for different devices in the group and so, it
>> should not be registered immediately rather it has to be deferred until
>> all devices in the group has received QMI firmware ready.
>>
>> To handle this, refactor the code of core start to move the following
>> apis inside a wrapper ath12k_core_hw_group_start()
>>          * ath12k_mac_allocate()
>>          * ath12k_core_pdev_create()
>>          * ath12k_core_rfkill_config()
>>          * ath12k_mac_register()
>>          * ath12k_hif_irq_enable()
>>
>> similarly, move the corresponding destroy/unregister/disable apis
>> inside wrapper ath12k_core_hw_group_stop()
>>
>> Add the device flags to indicate pdev created and IRQ enabled which would
>> be helpful for device clean up during failure cases.
>>
>> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1
>> Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4
>>
>> Signed-off-by: Karthikeyan Periyasamy <quic_periyasa@quicinc.com>
>> Co-developed-by: Harshitha Prem <quic_hprem@quicinc.com>
>> Signed-off-by: Harshitha Prem <quic_hprem@quicinc.com>
>> ---
>>   drivers/net/wireless/ath/ath12k/core.c | 210 +++++++++++++++++++------
>>   drivers/net/wireless/ath/ath12k/core.h |  32 ++++
>>   2 files changed, 191 insertions(+), 51 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c
>> index ebe31cbb6435..90c70dbfc50a 100644
>> --- a/drivers/net/wireless/ath/ath12k/core.c
>> +++ b/drivers/net/wireless/ath/ath12k/core.c
>> @@ -563,6 +563,9 @@ u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab)
>>   
>>   static void ath12k_core_stop(struct ath12k_base *ab)
>>   {
>> +	clear_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags);
>> +	ath12k_dec_num_core_started(ab);
>> +
>>   	if (!test_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags))
>>   		ath12k_qmi_firmware_stop(ab);
>>   
>> @@ -689,11 +692,15 @@ static int ath12k_core_pdev_create(struct ath12k_base *ab)
>>   		return ret;
>>   	}
>>   
>> +	set_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags);
>> +
>>   	return 0;
>>   }
>>   
>>   static void ath12k_core_pdev_destroy(struct ath12k_base *ab)
>>   {
>> +	clear_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags);
>> +
>>   	ath12k_dp_pdev_free(ab);
>>   }
>>   
>> @@ -702,6 +709,8 @@ static int ath12k_core_start(struct ath12k_base *ab,
>>   {
>>   	int ret;
>>   
>> +	lockdep_assert_held(&ab->core_lock);
>> +
>>   	ret = ath12k_wmi_attach(ab);
>>   	if (ret) {
>>   		ath12k_err(ab, "failed to attach wmi: %d\n", ret);
>> @@ -795,6 +804,12 @@ static int ath12k_core_start(struct ath12k_base *ab,
>>   		/* ACPI is optional so continue in case of an error */
>>   		ath12k_dbg(ab, ATH12K_DBG_BOOT, "acpi failed: %d\n", ret);
>>   
>> +	if (!test_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags)) {
>> +		/* Indicate the core start in the appropriate group */
>> +		ath12k_inc_num_core_started(ab);
>> +		set_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags);
>> +	}
>> +
>>   	return 0;
>>   
>>   err_reo_cleanup:
>> @@ -806,6 +821,108 @@ static int ath12k_core_start(struct ath12k_base *ab,
>>   	return ret;
>>   }
>>   
>> +static void ath12k_core_device_cleanup(struct ath12k_base *ab)
>> +{
>> +	mutex_lock(&ab->core_lock);
>> +
>> +	if (test_and_clear_bit(ATH12K_FLAG_CORE_HIF_IRQ_ENABLED, &ab->dev_flags))
>> +		ath12k_hif_irq_disable(ab);
>> +
>> +	if (test_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags))
>> +		ath12k_core_pdev_destroy(ab);
>> +
>> +	if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) {
>> +		ath12k_mac_unregister(ab);
>> +		ath12k_mac_destroy(ab);
>> +	}
>> +
>> +	mutex_unlock(&ab->core_lock);
>> +}
> 
> This patch is just abusing flags and because of that we have spaghetti
> code. I have been disliking use of enum ath12k_dev_flags before but this
> is just looks too much. I am wondering do we need to cleanup the ath12k
> architecture first, reduce the usage of flags and then revisit this
> patchset?
> 
yeah., more dev flags :( but flags were needed for the race conditions 
when multiple devices where involved in a group, some devices would have 
completed till pdev create some might not. Some crashes were seen 
because hif_irq_disable was called for a device in a group but that 
device was not even at the stage of core register. Will check the 
possibility to  reduce the flag usage but it seemed necessary for 
multiple device group clean up.

Thanks,
Harshitha
Kalle Valo July 3, 2024, 4:28 p.m. UTC | #3
Harshitha Prem <quic_hprem@quicinc.com> writes:

>>>   +static void ath12k_core_device_cleanup(struct ath12k_base *ab)
>>> +{
>>> +	mutex_lock(&ab->core_lock);
>>> +
>>> +	if (test_and_clear_bit(ATH12K_FLAG_CORE_HIF_IRQ_ENABLED, &ab->dev_flags))
>>> +		ath12k_hif_irq_disable(ab);
>>> +
>>> +	if (test_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags))
>>> +		ath12k_core_pdev_destroy(ab);
>>> +
>>> +	if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) {
>>> +		ath12k_mac_unregister(ab);
>>> +		ath12k_mac_destroy(ab);
>>> +	}
>>> +
>>> +	mutex_unlock(&ab->core_lock);
>>> +}
>> This patch is just abusing flags and because of that we have
>> spaghetti
>> code. I have been disliking use of enum ath12k_dev_flags before but this
>> is just looks too much. I am wondering do we need to cleanup the ath12k
>> architecture first, reduce the usage of flags and then revisit this
>> patchset?
>> 
> yeah., more dev flags :( but flags were needed for the race conditions
> when multiple devices where involved in a group, some devices would
> have completed till pdev create some might not. Some crashes were seen
> because hif_irq_disable was called for a device in a group but that
> device was not even at the stage of core register. Will check the
> possibility to  reduce the flag usage but it seemed necessary for
> multiple device group clean up.

I think the core problem here is of mixing enum ath12k_hw_state and enum
ath12k_dev_flags, it's just a mess even before this patchset. For
example, these flags look like they should be part enum ath12k_hw_state
instead:

	ATH12K_FLAG_RECOVERY,
	ATH12K_FLAG_UNREGISTERING,
	ATH12K_FLAG_REGISTERED,
	ATH12K_FLAG_QMI_FAIL,

If we have an enum plus set of flags to handle the actual state then it
will become difficult to manage it all. Instead we should just have the
enum for tracking the state of the driver.
Harshitha Prem July 9, 2024, 10:14 a.m. UTC | #4
On 7/3/2024 9:58 PM, Kalle Valo wrote:
> Harshitha Prem <quic_hprem@quicinc.com> writes:
> 
>>>>    +static void ath12k_core_device_cleanup(struct ath12k_base *ab)
>>>> +{
>>>> +	mutex_lock(&ab->core_lock);
>>>> +
>>>> +	if (test_and_clear_bit(ATH12K_FLAG_CORE_HIF_IRQ_ENABLED, &ab->dev_flags))
>>>> +		ath12k_hif_irq_disable(ab);
>>>> +
>>>> +	if (test_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags))
>>>> +		ath12k_core_pdev_destroy(ab);
>>>> +
>>>> +	if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) {
>>>> +		ath12k_mac_unregister(ab);
>>>> +		ath12k_mac_destroy(ab);
>>>> +	}
>>>> +
>>>> +	mutex_unlock(&ab->core_lock);
>>>> +}
>>> This patch is just abusing flags and because of that we have
>>> spaghetti
>>> code. I have been disliking use of enum ath12k_dev_flags before but this
>>> is just looks too much. I am wondering do we need to cleanup the ath12k
>>> architecture first, reduce the usage of flags and then revisit this
>>> patchset?
>>>
>> yeah., more dev flags :( but flags were needed for the race conditions
>> when multiple devices where involved in a group, some devices would
>> have completed till pdev create some might not. Some crashes were seen
>> because hif_irq_disable was called for a device in a group but that
>> device was not even at the stage of core register. Will check the
>> possibility to  reduce the flag usage but it seemed necessary for
>> multiple device group clean up.
> 
> I think the core problem here is of mixing enum ath12k_hw_state and enum
> ath12k_dev_flags, it's just a mess even before this patchset. For
> example, these flags look like they should be part enum ath12k_hw_state
> instead:
> 
> 	ATH12K_FLAG_RECOVERY,
> 	ATH12K_FLAG_UNREGISTERING,
> 	ATH12K_FLAG_REGISTERED,
> 	ATH12K_FLAG_QMI_FAIL,
> 
> If we have an enum plus set of flags to handle the actual state then it
> will become difficult to manage it all. Instead we should just have the
> enum for tracking the state of the driver.
> 

ath12k_hw_state is the driver state representation which is used to 
indicate whether driver has started or in restarting from mac80211 
prespective where as ath12k_dev_flags closely related to devices and its 
q6 states.

So, ATH12K_FLAG_RECOVERY, ATH12K_FLAG_QMI_FAIL should be in 
ath12k_dev_flags because these are specific to Q6 crashes and failure. 
ATH12K_FLAG_UNREGISTERING is actually used to indicate pci_remove is 
initiated and we should not process any QMI events but may be naming is 
creating the confusion. ATH12K_FLAG_REGISTERED flag is used whether to 
recover or not with the information available in mac80211 to reconfig.

With hardware abstraction, it can be like 3 devices (ath12k_base) in one 
ath12k_hw and with ath12k_hw_states alone we might not be able to 
handle. Because, say device1 is in recovery, device2 is already till QMI 
firmware ready after device probing and these two devices are not 
registered to  mac80211 then we cannot set the ath12k_hw_state as ON/OFF 
or anything else.

Hence, we may require two distinct flags, where one holds the driver 
abstraction state and other is device states. With grouping complexity 
would increases as we have to sync between the devices and we require 
two flags. Please let me know your thoughts.


Thanks,
Harshitha
Kalle Valo Aug. 6, 2024, 6:03 a.m. UTC | #5
Harshitha Prem <quic_hprem@quicinc.com> writes:

> On 7/3/2024 9:58 PM, Kalle Valo wrote:
>> Harshitha Prem <quic_hprem@quicinc.com> writes:
>> 
>>>>>    +static void ath12k_core_device_cleanup(struct ath12k_base *ab)
>>>>> +{
>>>>> +	mutex_lock(&ab->core_lock);
>>>>> +
>>>>> +	if (test_and_clear_bit(ATH12K_FLAG_CORE_HIF_IRQ_ENABLED, &ab->dev_flags))
>>>>> +		ath12k_hif_irq_disable(ab);
>>>>> +
>>>>> +	if (test_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags))
>>>>> +		ath12k_core_pdev_destroy(ab);
>>>>> +
>>>>> +	if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) {
>>>>> +		ath12k_mac_unregister(ab);
>>>>> +		ath12k_mac_destroy(ab);
>>>>> +	}
>>>>> +
>>>>> +	mutex_unlock(&ab->core_lock);
>>>>> +}
>>>> This patch is just abusing flags and because of that we have
>>>> spaghetti
>>>> code. I have been disliking use of enum ath12k_dev_flags before but this
>>>> is just looks too much. I am wondering do we need to cleanup the ath12k
>>>> architecture first, reduce the usage of flags and then revisit this
>>>> patchset?
>>>>
>>> yeah., more dev flags :( but flags were needed for the race conditions
>>> when multiple devices where involved in a group, some devices would
>>> have completed till pdev create some might not. Some crashes were seen
>>> because hif_irq_disable was called for a device in a group but that
>>> device was not even at the stage of core register. Will check the
>>> possibility to  reduce the flag usage but it seemed necessary for
>>> multiple device group clean up.
>> I think the core problem here is of mixing enum ath12k_hw_state and
>> enum
>> ath12k_dev_flags, it's just a mess even before this patchset. For
>> example, these flags look like they should be part enum ath12k_hw_state
>> instead:
>> 	ATH12K_FLAG_RECOVERY,
>> 	ATH12K_FLAG_UNREGISTERING,
>> 	ATH12K_FLAG_REGISTERED,
>> 	ATH12K_FLAG_QMI_FAIL,
>> If we have an enum plus set of flags to handle the actual state then
>> it
>> will become difficult to manage it all. Instead we should just have the
>> enum for tracking the state of the driver.
>> 
>
> ath12k_hw_state is the driver state representation which is used to
> indicate whether driver has started or in restarting from mac80211
> prespective where as ath12k_dev_flags closely related to devices and
> its q6 states.
>
> So, ATH12K_FLAG_RECOVERY, ATH12K_FLAG_QMI_FAIL should be in
> ath12k_dev_flags because these are specific to Q6 crashes and failure.
> ATH12K_FLAG_UNREGISTERING is actually used to indicate pci_remove is
> initiated and we should not process any QMI events but may be naming
> is creating the confusion. ATH12K_FLAG_REGISTERED flag is used whether
> to recover or not with the information available in mac80211 to
> reconfig.
>
> With hardware abstraction, it can be like 3 devices (ath12k_base) in
> one ath12k_hw and with ath12k_hw_states alone we might not be able to
> handle. Because, say device1 is in recovery, device2 is already till
> QMI firmware ready after device probing and these two devices are not
> registered to  mac80211 then we cannot set the ath12k_hw_state as
> ON/OFF or anything else.
>
> Hence, we may require two distinct flags, where one holds the driver
> abstraction state and other is device states. With grouping complexity
> would increases as we have to sync between the devices and we require
> two flags. Please let me know your thoughts.

Can you elaborate, for example provide exact state machine description
for all these states? I just can't understand how using several flags in
addition of an enum as different states makes anything easier to manage.
To me that sounds just like spaghetti code.

What I'm thinking is we should cleanup ath12k_core_start(). Now it's
basically asynchronous (ie. qmi.c calls
ath12k_core_qmi_firmware_ready()) but if we change it to be synchronous
(using completions etc.) I believe we could get rid of lots of these
annoying flags. In other words, if ath12k_core_start() returns with zero
then the firmware has booted succesfully.
Harshitha Prem Aug. 6, 2024, 11:43 a.m. UTC | #6
On 8/6/2024 11:33 AM, Kalle Valo wrote:
> Harshitha Prem <quic_hprem@quicinc.com> writes:
> 
>> On 7/3/2024 9:58 PM, Kalle Valo wrote:
>>> Harshitha Prem <quic_hprem@quicinc.com> writes:
>>>
>>>>>>     +static void ath12k_core_device_cleanup(struct ath12k_base *ab)
>>>>>> +{
>>>>>> +	mutex_lock(&ab->core_lock);
>>>>>> +
>>>>>> +	if (test_and_clear_bit(ATH12K_FLAG_CORE_HIF_IRQ_ENABLED, &ab->dev_flags))
>>>>>> +		ath12k_hif_irq_disable(ab);
>>>>>> +
>>>>>> +	if (test_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags))
>>>>>> +		ath12k_core_pdev_destroy(ab);
>>>>>> +
>>>>>> +	if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) {
>>>>>> +		ath12k_mac_unregister(ab);
>>>>>> +		ath12k_mac_destroy(ab);
>>>>>> +	}
>>>>>> +
>>>>>> +	mutex_unlock(&ab->core_lock);
>>>>>> +}
>>>>> This patch is just abusing flags and because of that we have
>>>>> spaghetti
>>>>> code. I have been disliking use of enum ath12k_dev_flags before but this
>>>>> is just looks too much. I am wondering do we need to cleanup the ath12k
>>>>> architecture first, reduce the usage of flags and then revisit this
>>>>> patchset?
>>>>>
>>>> yeah., more dev flags :( but flags were needed for the race conditions
>>>> when multiple devices where involved in a group, some devices would
>>>> have completed till pdev create some might not. Some crashes were seen
>>>> because hif_irq_disable was called for a device in a group but that
>>>> device was not even at the stage of core register. Will check the
>>>> possibility to  reduce the flag usage but it seemed necessary for
>>>> multiple device group clean up.
>>> I think the core problem here is of mixing enum ath12k_hw_state and
>>> enum
>>> ath12k_dev_flags, it's just a mess even before this patchset. For
>>> example, these flags look like they should be part enum ath12k_hw_state
>>> instead:
>>> 	ATH12K_FLAG_RECOVERY,
>>> 	ATH12K_FLAG_UNREGISTERING,
>>> 	ATH12K_FLAG_REGISTERED,
>>> 	ATH12K_FLAG_QMI_FAIL,
>>> If we have an enum plus set of flags to handle the actual state then
>>> it
>>> will become difficult to manage it all. Instead we should just have the
>>> enum for tracking the state of the driver.
>>>
>>
>> ath12k_hw_state is the driver state representation which is used to
>> indicate whether driver has started or in restarting from mac80211
>> prespective where as ath12k_dev_flags closely related to devices and
>> its q6 states.
>>
>> So, ATH12K_FLAG_RECOVERY, ATH12K_FLAG_QMI_FAIL should be in
>> ath12k_dev_flags because these are specific to Q6 crashes and failure.
>> ATH12K_FLAG_UNREGISTERING is actually used to indicate pci_remove is
>> initiated and we should not process any QMI events but may be naming
>> is creating the confusion. ATH12K_FLAG_REGISTERED flag is used whether
>> to recover or not with the information available in mac80211 to
>> reconfig.
>>
>> With hardware abstraction, it can be like 3 devices (ath12k_base) in
>> one ath12k_hw and with ath12k_hw_states alone we might not be able to
>> handle. Because, say device1 is in recovery, device2 is already till
>> QMI firmware ready after device probing and these two devices are not
>> registered to  mac80211 then we cannot set the ath12k_hw_state as
>> ON/OFF or anything else.
>>
>> Hence, we may require two distinct flags, where one holds the driver
>> abstraction state and other is device states. With grouping complexity
>> would increases as we have to sync between the devices and we require
>> two flags. Please let me know your thoughts.
> 
> Can you elaborate, for example provide exact state machine description
> for all these states? I just can't understand how using several flags in
> addition of an enum as different states makes anything easier to manage.
> To me that sounds just like spaghetti code.
> 
No new flags are introduced in this patchset, I have revisited and 
removed it any new flags. But, there are some existing flags which seems 
to be necessary in case of firmware crash or QMI handshake failure 
scenario.

Please find the details of how each flags/state in device 
(ath12k_dev_flags) is used currently :

  1. ATH12K_CAC_RUNNING - This is used to indicate whether any ongoing 
CAC is in progress for the radio in device, if yes, then we should not 
handle any data packets or management packets that are received over dp 
rx or wmi event respectively during this state.
The state is set during vdev start that is initiated via 
assign_vif_chanctx or switch_vif_chanctx. here, the vif_chanctx is from 
mac80211 but the data rx or wmi event is from the device which are both 
different context. We cannot sync between them, so this flag helps in 
avoiding processing the packets.

  2. ATH12K_FLAG_CRASH_FLUSH - this is used to indicate that the QMI 
connection between the host driver and Q6 is lost. This is set as soon 
as driver receives an QMI server exit event. So, once this is set we 
have to ensure that no data packets to be queued for hardware and no WMI 
commands are posted to firmware. All the calls to send any WMI command 
or data packet would be from mac80211.

3. ATH12K_FLAG_RAW_MODE - this flag indicates that the firmware or 
hardware operates in RAW mode. Based on this we have to set the rx/tx 
decap type to indicate firmware how to handle packets from this type of 
vif. (Do we necessary need to keep in dev_flags? may be dev_mode)

4. ATH12K_FLAG_HW_CRYPTO_DISABLED - this indicates whether software 
encryption is set or not in raw mode.

5. ATH12K_FLAG_RECOVERY - This is set during QMI server exit but not 
used anywhere. (Unused. Remove?)

6. ATH12K_FLAG_UNREGISTERING - this is set during pci remove. if this 
flag is set driver should avoid handling any mhi events/QMI events 
received from the target/hardware. (May be rename to pci shutdown?)

7. ATH12K_FLAG_REGISTERED - this is set once the device is successfully 
registered to mac80211. So, later if this device encounters any firmware 
crash, it can be recovered smoothly without registering. Also, this flag 
is used during pci_suspend scenario. If device crashes even before 
registering to mac80211 we have to avoid recovering it and this flags 
helps the same.

Once firmware crash is occurred, after collecting the dumps again the 
QMI handshake for that device will start from QMI server arrive to 
ATH12K_QMI_EVENT_FW_READY and in firmware ready we should not start the 
core again rather init only few and try to reconfigure the device with 
available data from mac80211.

8. ATH12K_FLAG_QMI_FAIL - this is set when any of the QMI handshakes 
fail and tested in rmmod case to skip the cleanup process.

The following flags are added by MCC team, I am not so clear on the use 
case seems like there are few cases in suspend use case where they need 
these states.

9. ATH12K_FLAG_HTC_SUSPEND_COMPLETE - Used for htc suspend case.
10. ATH12K_FLAG_CE_IRQ_ENABLED
11. ATH12K_FLAG_EXT_IRQ_ENABLED


So, these device states are more closely related to the 
hardware/firmware in which state it is whether firmware crashed after 
mac80211 register or before and based on it whether driver can process 
the dp tx/rx or wmi tx/rx as these would be from different context. Not 
sure, how we will be able to sync among various contexts/threads without 
these flags, please let me know your thoughts?


Now, on the otherhand, we have ath12k_hw_state which is used by mac80211 
during recovery or driver start or stop.


In multiple device grouping, say if we have 3 pci devices connected  and 
grouped, each of them will be probed separately and core_init would be 
done for each. During core_init, QMI init would be called. So, now 
device1, device2 and device3 would have invoked the QMI init. Since the 
QMI handshake happens between host and Q6 processor separately for each 
device (QMI handshake would be in order for that particular device but 
is asynchronous between the devices in group), driver would not have 
control on which device (whether device1/device2/device3) will receive 
the QMI server arrive event first. Hence, we are using mutex as well as 
defer mechanism to sync the devices as some of the MLO partner device 
params has to be filled in QMI host cap. Kindly refer below patchset.

https://patchwork.kernel.org/project/linux-wireless/patch/20240520131409.676931-1-quic_hprem@quicinc.com/

> What I'm thinking is we should cleanup ath12k_core_start(). Now it's
> basically asynchronous (ie. qmi.c calls
> ath12k_core_qmi_firmware_ready()) but if we change it to be synchronous
> (using completions etc.) I believe we could get rid of lots of these
> annoying flags. In other words, if ath12k_core_start() returns with zero
> then the firmware has booted succesfully.
> 
Sorry, I could not get your question, because already 
ath12k_core_start() is called from ath12k_core_qmi_firmware_ready() 
which means firmware has booted successfully and in core_start we 
register to mac80211 and enable the irqs.

Thanks,
Harshitha
diff mbox series

Patch

diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c
index ebe31cbb6435..90c70dbfc50a 100644
--- a/drivers/net/wireless/ath/ath12k/core.c
+++ b/drivers/net/wireless/ath/ath12k/core.c
@@ -563,6 +563,9 @@  u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab)
 
 static void ath12k_core_stop(struct ath12k_base *ab)
 {
+	clear_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags);
+	ath12k_dec_num_core_started(ab);
+
 	if (!test_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags))
 		ath12k_qmi_firmware_stop(ab);
 
@@ -689,11 +692,15 @@  static int ath12k_core_pdev_create(struct ath12k_base *ab)
 		return ret;
 	}
 
+	set_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags);
+
 	return 0;
 }
 
 static void ath12k_core_pdev_destroy(struct ath12k_base *ab)
 {
+	clear_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags);
+
 	ath12k_dp_pdev_free(ab);
 }
 
@@ -702,6 +709,8 @@  static int ath12k_core_start(struct ath12k_base *ab,
 {
 	int ret;
 
+	lockdep_assert_held(&ab->core_lock);
+
 	ret = ath12k_wmi_attach(ab);
 	if (ret) {
 		ath12k_err(ab, "failed to attach wmi: %d\n", ret);
@@ -795,6 +804,12 @@  static int ath12k_core_start(struct ath12k_base *ab,
 		/* ACPI is optional so continue in case of an error */
 		ath12k_dbg(ab, ATH12K_DBG_BOOT, "acpi failed: %d\n", ret);
 
+	if (!test_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags)) {
+		/* Indicate the core start in the appropriate group */
+		ath12k_inc_num_core_started(ab);
+		set_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags);
+	}
+
 	return 0;
 
 err_reo_cleanup:
@@ -806,6 +821,108 @@  static int ath12k_core_start(struct ath12k_base *ab,
 	return ret;
 }
 
+static void ath12k_core_device_cleanup(struct ath12k_base *ab)
+{
+	mutex_lock(&ab->core_lock);
+
+	if (test_and_clear_bit(ATH12K_FLAG_CORE_HIF_IRQ_ENABLED, &ab->dev_flags))
+		ath12k_hif_irq_disable(ab);
+
+	if (test_bit(ATH12K_FLAG_PDEV_CREATED, &ab->dev_flags))
+		ath12k_core_pdev_destroy(ab);
+
+	if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) {
+		ath12k_mac_unregister(ab);
+		ath12k_mac_destroy(ab);
+	}
+
+	mutex_unlock(&ab->core_lock);
+}
+
+static void ath12k_core_hw_group_stop(struct ath12k_hw_group *ag)
+{
+	struct ath12k_base *ab;
+	int i;
+
+	lockdep_assert_held(&ag->mutex_lock);
+
+	for (i = ag->num_devices - 1; i >= 0; i--) {
+		ab = ag->ab[i];
+		if (!ab)
+			continue;
+		ath12k_core_device_cleanup(ab);
+	}
+}
+
+static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag)
+{
+	struct ath12k_base *ab;
+	int ret, i;
+	bool is_registered;
+
+	lockdep_assert_held(&ag->mutex_lock);
+
+	for (i = 0; i < ag->num_devices; i++) {
+		ab = ag->ab[i];
+		if (!ab)
+			continue;
+
+		mutex_lock(&ab->core_lock);
+
+		/* Check if already registered or not, since same flow
+		 * execute for HW restart case.
+		 */
+		is_registered = test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags);
+
+		if (is_registered)
+			goto core_pdev_create;
+
+		ret = ath12k_mac_allocate(ab);
+		if (ret) {
+			ath12k_err(ab, "failed to create new hw device with mac80211 :%d\n",
+				   ret);
+			mutex_unlock(&ab->core_lock);
+			return ret;
+		}
+
+		ret = ath12k_mac_register(ab);
+		if (ret) {
+			ath12k_err(ab, "failed to register radio with mac80211: %d\n",
+				   ret);
+			mutex_unlock(&ab->core_lock);
+			goto err;
+		}
+
+core_pdev_create:
+		ret = ath12k_core_pdev_create(ab);
+		if (ret) {
+			ath12k_err(ab, "failed to create pdev core %d\n", ret);
+			mutex_unlock(&ab->core_lock);
+			goto err;
+		}
+
+		ath12k_hif_irq_enable(ab);
+		set_bit(ATH12K_FLAG_CORE_HIF_IRQ_ENABLED, &ab->dev_flags);
+
+		ret = ath12k_core_rfkill_config(ab);
+		if (ret && ret != -EOPNOTSUPP) {
+			mutex_unlock(&ab->core_lock);
+			goto err;
+		}
+
+		mutex_unlock(&ab->core_lock);
+	}
+
+	set_bit(ATH12K_GROUP_FLAG_REGISTERED, &ag->flags);
+
+	return 0;
+
+err:
+	ath12k_core_hw_group_stop(ag);
+
+	return ret;
+}
+
 static int ath12k_core_start_firmware(struct ath12k_base *ab,
 				      enum ath12k_firmware_mode mode)
 {
@@ -823,9 +940,18 @@  static int ath12k_core_start_firmware(struct ath12k_base *ab,
 	return ret;
 }
 
+static inline
+bool ath12k_core_hw_group_start_ready(struct ath12k_hw_group *ag)
+{
+	lockdep_assert_held(&ag->mutex_lock);
+
+	return (ag->num_started == ag->num_devices);
+}
+
 int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab)
 {
-	int ret;
+	struct ath12k_hw_group *ag = ath12k_ab_to_ag(ab);
+	int ret, i;
 
 	ret = ath12k_core_start_firmware(ab, ATH12K_FIRMWARE_MODE_NORMAL);
 	if (ret) {
@@ -845,59 +971,48 @@  int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab)
 		goto err_firmware_stop;
 	}
 
+	mutex_lock(&ag->mutex_lock);
 	mutex_lock(&ab->core_lock);
 	ret = ath12k_core_start(ab, ATH12K_FIRMWARE_MODE_NORMAL);
 	if (ret) {
 		ath12k_err(ab, "failed to start core: %d\n", ret);
 		goto err_dp_free;
 	}
+	mutex_unlock(&ab->core_lock);
 
-	ret = ath12k_mac_allocate(ab);
-	if (ret) {
-		ath12k_err(ab, "failed to create new hw device with mac80211 :%d\n",
-			   ret);
-		goto err_core_stop;
-	}
-
-	ret = ath12k_mac_register(ab);
-	if (ret) {
-		ath12k_err(ab, "failed register the radio with mac80211: %d\n", ret);
-		goto err_mac_destroy;
+	if (ath12k_core_hw_group_start_ready(ag)) {
+		ret = ath12k_core_hw_group_start(ag);
+		if (ret) {
+			ath12k_warn(ab, "unable to start hw group\n");
+			goto err_core_stop;
+		}
+		ath12k_dbg(ab, ATH12K_DBG_BOOT, "group %d started\n", ag->id);
 	}
+	mutex_unlock(&ag->mutex_lock);
 
-	ret = ath12k_core_pdev_create(ab);
-	if (ret) {
-		ath12k_err(ab, "failed to create pdev core: %d\n", ret);
-		goto err_mac_unregister;
-	}
+	return 0;
 
-	ath12k_hif_irq_enable(ab);
+err_core_stop:
+	for (i = ag->num_devices - 1; i >= 0; i--) {
+		ab = ag->ab[i];
+		if (!ab)
+			continue;
 
-	ret = ath12k_core_rfkill_config(ab);
-	if (ret && ret != -EOPNOTSUPP) {
-		ath12k_err(ab, "failed to config rfkill: %d\n", ret);
-		goto err_core_pdev_destroy;
+		mutex_lock(&ab->core_lock);
+		if (test_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags))
+			ath12k_core_stop(ab);
+		mutex_unlock(&ab->core_lock);
 	}
+	goto exit;
 
-	mutex_unlock(&ab->core_lock);
-
-	return 0;
-
-err_core_pdev_destroy:
-	ath12k_hif_irq_disable(ab);
-	ath12k_core_pdev_destroy(ab);
-err_mac_unregister:
-	ath12k_mac_unregister(ab);
-err_mac_destroy:
-	ath12k_mac_destroy(ab);
-err_core_stop:
-	ath12k_core_stop(ab);
 err_dp_free:
 	ath12k_dp_free(ab);
 	mutex_unlock(&ab->core_lock);
 err_firmware_stop:
 	ath12k_qmi_firmware_stop(ab);
 
+exit:
+	mutex_unlock(&ag->mutex_lock);
 	return ret;
 }
 
@@ -1258,7 +1373,7 @@  static struct ath12k_hw_group *ath12k_core_assign_hw_group(struct ath12k_base *a
 
 void ath12k_core_unassign_hw_group(struct ath12k_base *ab)
 {
-	struct ath12k_hw_group *ag = ab->ag;
+	struct ath12k_hw_group *ag = ath12k_ab_to_ag(ab);
 	u8 device_id = ab->device_id;
 	int num_probed;
 
@@ -1292,19 +1407,6 @@  void ath12k_core_unassign_hw_group(struct ath12k_base *ab)
 		ath12k_core_hw_group_free(ag);
 }
 
-static void ath12k_core_device_cleanup(struct ath12k_base *ab)
-{
-	mutex_lock(&ab->core_lock);
-
-	ath12k_hif_irq_disable(ab);
-	ath12k_core_pdev_destroy(ab);
-	ath12k_mac_unregister(ab);
-	ath12k_mac_destroy(ab);
-	ath12k_core_stop(ab);
-
-	mutex_unlock(&ab->core_lock);
-}
-
 static void ath12k_core_hw_group_destroy(struct ath12k_hw_group *ag)
 {
 	struct ath12k_base *ab;
@@ -1332,14 +1434,20 @@  static void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag)
 		return;
 
 	mutex_lock(&ag->mutex_lock);
+	if (test_and_clear_bit(ATH12K_GROUP_FLAG_REGISTERED, &ag->flags))
+		ath12k_core_hw_group_stop(ag);
+
 	for (i = 0; i < ag->num_devices; i++) {
 		ab = ag->ab[i];
 		if (!ab)
 			continue;
 
-		if (test_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, &ab->dev_flags))
-			ath12k_core_device_cleanup(ab);
+		mutex_lock(&ab->core_lock);
+		if (test_bit(ATH12K_FLAG_CORE_STARTED, &ab->dev_flags))
+			ath12k_core_stop(ab);
+		mutex_unlock(&ab->core_lock);
 	}
+
 	mutex_unlock(&ag->mutex_lock);
 }
 
diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h
index a6b8c100ebc8..d955deb08fd4 100644
--- a/drivers/net/wireless/ath/ath12k/core.h
+++ b/drivers/net/wireless/ath/ath12k/core.h
@@ -200,6 +200,10 @@  enum ath12k_scan_state {
 	ATH12K_SCAN_ABORTING,
 };
 
+enum ath12k_hw_group_flags {
+	ATH12K_GROUP_FLAG_REGISTERED,
+};
+
 enum ath12k_dev_flags {
 	ATH12K_CAC_RUNNING,
 	ATH12K_FLAG_CRASH_FLUSH,
@@ -214,6 +218,9 @@  enum ath12k_dev_flags {
 	ATH12K_FLAG_EXT_IRQ_ENABLED,
 	ATH12K_FLAG_QMI_FW_READY_COMPLETE,
 	ATH12K_FLAG_HW_GROUP_ATTACHED,
+	ATH12K_FLAG_PDEV_CREATED,
+	ATH12K_FLAG_CORE_STARTED,
+	ATH12K_FLAG_CORE_HIF_IRQ_ENABLED,
 };
 
 struct ath12k_tx_conf {
@@ -736,6 +743,8 @@  struct ath12k_hw_group {
 	u8 id;
 	u8 num_devices;
 	u8 num_probed;
+	u8 num_started;
+	unsigned long flags;
 	struct ath12k_base *ab[ATH12K_MAX_SOCS];
 	/* To synchronize group create, assign, start, stop */
 	struct mutex mutex_lock;
@@ -1087,4 +1096,27 @@  static inline int ath12k_get_num_hw(struct ath12k_base *ab)
 {
 	return ab->num_hw;
 }
+
+static inline
+struct ath12k_hw_group *ath12k_ab_to_ag(struct ath12k_base *ab)
+{
+	return ab->ag;
+}
+
+static inline
+void ath12k_inc_num_core_started(struct ath12k_base *ab)
+{
+	lockdep_assert_held(&ab->ag->mutex_lock);
+
+	ab->ag->num_started++;
+}
+
+static inline
+void ath12k_dec_num_core_started(struct ath12k_base *ab)
+{
+	lockdep_assert_held(&ab->ag->mutex_lock);
+
+	ab->ag->num_started--;
+}
+
 #endif /* _CORE_H_ */