diff mbox series

[v4,1/3] wifi: ath12k: report station mode transmit rate

Message ID 20240419032122.7009-2-quic_lingbok@quicinc.com
State New
Headers show
Series wifi: ath12k: report station mode stats | expand

Commit Message

Lingbo Kong April 19, 2024, 3:21 a.m. UTC
Currently, the transmit rate of "iw dev xxx station dump" command
always show an invalid value.

To address this issue, ath12k parse the info of transmit complete
report from firmware and indicate the transmit rate to mac80211.

This patch affects the station mode of WCN7850 and QCN9274.

After that, "iw dev xxx station dump" show the correct transmit rate.
Such as:

Station 00:03:7f:12:03:03 (on wlo1)
        inactive time:  872 ms
        rx bytes:       219111
        rx packets:     1133
        tx bytes:       53767
        tx packets:     462
        tx retries:     51
        tx failed:      0
        beacon loss:    0
        beacon rx:      403
        rx drop misc:   74
        signal:         -95 dBm
        beacon signal avg:      -18 dBm
        tx bitrate:     1441.1 MBit/s 80MHz EHT-MCS 13 EHT-NSS 2 EHT-GI 0

Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.2.1-00201-QCAHKSWPL_SILICONZ-1

Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
---
v4:
1.change ATH12K_EHT_MCS_MAX from 13 to 15

v3:
no change

v2:
1.change copyright

 drivers/net/wireless/ath/ath12k/core.h   |   2 +
 drivers/net/wireless/ath/ath12k/dp_rx.h  |   3 +
 drivers/net/wireless/ath/ath12k/dp_tx.c  | 147 ++++++++++++++++++++++-
 drivers/net/wireless/ath/ath12k/hal_tx.h |   9 +-
 drivers/net/wireless/ath/ath12k/mac.c    | 124 +++++++++++++++++++
 drivers/net/wireless/ath/ath12k/mac.h    |   4 +-
 6 files changed, 282 insertions(+), 7 deletions(-)

Comments

Kalle Valo April 25, 2024, 10:37 a.m. UTC | #1
Lingbo Kong <quic_lingbok@quicinc.com> writes:

> Currently, the transmit rate of "iw dev xxx station dump" command
> always show an invalid value.
>
> To address this issue, ath12k parse the info of transmit complete
> report from firmware and indicate the transmit rate to mac80211.
>
> This patch affects the station mode of WCN7850 and QCN9274.
>
> After that, "iw dev xxx station dump" show the correct transmit rate.
> Such as:
>
> Station 00:03:7f:12:03:03 (on wlo1)
>         inactive time:  872 ms
>         rx bytes:       219111
>         rx packets:     1133
>         tx bytes:       53767
>         tx packets:     462
>         tx retries:     51
>         tx failed:      0
>         beacon loss:    0
>         beacon rx:      403
>         rx drop misc:   74
>         signal:         -95 dBm
>         beacon signal avg:      -18 dBm
>         tx bitrate:     1441.1 MBit/s 80MHz EHT-MCS 13 EHT-NSS 2 EHT-GI 0
>
> Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.2.1-00201-QCAHKSWPL_SILICONZ-1
>
> Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>

[...]

> +static void ath12k_dp_tx_update(struct ath12k *ar, struct hal_tx_status *ts)
> +{
> +	if (ar->last_ppdu_id != 0) {
> +		if (ar->last_ppdu_id == ts->ppdu_id ||
> +		    ar->cached_ppdu_id == ar->last_ppdu_id)
> +			ar->cached_ppdu_id = ar->last_ppdu_id;
> +
> +		ath12k_dp_tx_update_txcompl(ar, ts);
> +	}
> +
> +	ar->last_ppdu_id = ts->ppdu_id;
> +}

A code comment would help a lot. Why is ar->cached_ppdu_id needed here?

And if 'ar->cached_ppdu_id == ar->last_ppdu_id' is true why do then do
'ar->cached_ppdu_id = ar->last_ppdu_id'? The value of ar->cached_ppdu_id
is not changing here (unless I'm missing something).

Also I'm worried about locking. How is access to ar->last_ppdu_id and
ar->cached_ppdu_id protected?
Kalle Valo April 25, 2024, 4:54 p.m. UTC | #2
Lingbo Kong <quic_lingbok@quicinc.com> writes:

> Currently, the transmit rate of "iw dev xxx station dump" command
> always show an invalid value.
>
> To address this issue, ath12k parse the info of transmit complete
> report from firmware and indicate the transmit rate to mac80211.
>
> This patch affects the station mode of WCN7850 and QCN9274.
>
> After that, "iw dev xxx station dump" show the correct transmit rate.
> Such as:
>
> Station 00:03:7f:12:03:03 (on wlo1)
>         inactive time:  872 ms
>         rx bytes:       219111
>         rx packets:     1133
>         tx bytes:       53767
>         tx packets:     462
>         tx retries:     51
>         tx failed:      0
>         beacon loss:    0
>         beacon rx:      403
>         rx drop misc:   74
>         signal:         -95 dBm
>         beacon signal avg:      -18 dBm
>         tx bitrate:     1441.1 MBit/s 80MHz EHT-MCS 13 EHT-NSS 2 EHT-GI 0
>
> Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.2.1-00201-QCAHKSWPL_SILICONZ-1
>
> Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>

I'm still going throught the patchset, please don't send a new version
yet. Few quick comments:

> +static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct hal_tx_status *ts)
> +{
> +	struct ath12k_base *ab = ar->ab;
> +	struct ath12k_peer *peer;
> +	struct ath12k_sta *arsta;
> +	struct ieee80211_sta *sta;
> +	u16 rate;
> +	u8 rate_idx = 0;
> +	int ret;
> +
> +	spin_lock_bh(&ab->base_lock);

Did you analyse how this function, and especially taking the base_lock,
affects performance?

> +enum nl80211_he_ru_alloc ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc(u16 ru_tones)
> +{
> +	enum nl80211_he_ru_alloc ret;
> +
> +	switch (ru_tones) {
> +	case 26:
> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
> +		break;
> +	case 52:
> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_52;
> +		break;
> +	case 106:
> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_106;
> +		break;
> +	case 242:
> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_242;
> +		break;
> +	case 484:
> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_484;
> +		break;
> +	case 996:
> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_996;
> +		break;
> +	case (996 * 2):
> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_2x996;
> +		break;
> +	default:
> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
> +		break;
> +	}
> +
> +	return ret;
> +}

How does this function compare to ath12k_he_ru_tones_to_nl80211_he_ru_alloc()?

> +enum nl80211_eht_gi ath12k_mac_eht_gi_to_nl80211_eht_gi(u8 sgi)
> +{
> +	enum nl80211_eht_gi ret;
> +
> +	switch (sgi) {
> +	case RX_MSDU_START_SGI_0_8_US:
> +		ret = NL80211_RATE_INFO_EHT_GI_0_8;
> +		break;
> +	case RX_MSDU_START_SGI_1_6_US:
> +		ret = NL80211_RATE_INFO_EHT_GI_1_6;
> +		break;
> +	case RX_MSDU_START_SGI_3_2_US:
> +		ret = NL80211_RATE_INFO_EHT_GI_3_2;
> +		break;
> +	default:
> +		ret = NL80211_RATE_INFO_EHT_GI_0_8;
> +		break;
> +	}
> +
> +	return ret;
> +}

BTW the ret variable is unnessary, this could be simplified to:

switch (foo) {
case FOO1:
        return BAR1;
case FOO2:
        return BAR2;
default:
        return BAR3;
}

> +enum nl80211_eht_ru_alloc ath12k_mac_eht_ru_tones_to_nl80211_eht_ru_alloc(u16 ru_tones)
> +{
> +	enum nl80211_eht_ru_alloc ret;
> +
> +	switch (ru_tones) {
> +	case 26:
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_26;
> +		break;
> +	case 52:
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_52;
> +		break;
> +	case (52 + 26):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_52P26;
> +		break;
> +	case 106:
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_106;
> +		break;
> +	case (106 + 26):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_106P26;
> +		break;
> +	case 242:
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_242;
> +		break;
> +	case 484:
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_484;
> +		break;
> +	case (484 + 242):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_484P242;
> +		break;
> +	case 996:
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996;
> +		break;
> +	case (996 + 484):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484;
> +		break;
> +	case (996 + 484 + 242):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242;
> +		break;
> +	case (2 * 996):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996;
> +		break;
> +	case (2 * 996 + 484):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484;
> +		break;
> +	case (3 * 996):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996;
> +		break;
> +	case (3 * 996 + 484):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484;
> +		break;
> +	case (4 * 996):
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_4x996;
> +		break;
> +	default:
> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_26;
> +	}
> +
> +	return ret;
> +}

Same here.
Lingbo Kong April 26, 2024, 6:41 a.m. UTC | #3
On 2024/4/26 0:54, Kalle Valo wrote:
> Lingbo Kong <quic_lingbok@quicinc.com> writes:
> 
>> Currently, the transmit rate of "iw dev xxx station dump" command
>> always show an invalid value.
>>
>> To address this issue, ath12k parse the info of transmit complete
>> report from firmware and indicate the transmit rate to mac80211.
>>
>> This patch affects the station mode of WCN7850 and QCN9274.
>>
>> After that, "iw dev xxx station dump" show the correct transmit rate.
>> Such as:
>>
>> Station 00:03:7f:12:03:03 (on wlo1)
>>          inactive time:  872 ms
>>          rx bytes:       219111
>>          rx packets:     1133
>>          tx bytes:       53767
>>          tx packets:     462
>>          tx retries:     51
>>          tx failed:      0
>>          beacon loss:    0
>>          beacon rx:      403
>>          rx drop misc:   74
>>          signal:         -95 dBm
>>          beacon signal avg:      -18 dBm
>>          tx bitrate:     1441.1 MBit/s 80MHz EHT-MCS 13 EHT-NSS 2 EHT-GI 0
>>
>> Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
>> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.2.1-00201-QCAHKSWPL_SILICONZ-1
>>
>> Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
> 
> I'm still going throught the patchset, please don't send a new version
> yet. Few quick comments:
> 
>> +static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct hal_tx_status *ts)
>> +{
>> +	struct ath12k_base *ab = ar->ab;
>> +	struct ath12k_peer *peer;
>> +	struct ath12k_sta *arsta;
>> +	struct ieee80211_sta *sta;
>> +	u16 rate;
>> +	u8 rate_idx = 0;
>> +	int ret;
>> +
>> +	spin_lock_bh(&ab->base_lock);
> 
> Did you analyse how this function, and especially taking the base_lock,
> affects performance?
> 

The base_lock is used here because of the need to look for peers based 
on the ts->peer_id when calling ath12k_peer_find_by_id() function, which 
i think might affect performance.

Do i need to run a throughput test?

>> +enum nl80211_he_ru_alloc ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc(u16 ru_tones)
>> +{
>> +	enum nl80211_he_ru_alloc ret;
>> +
>> +	switch (ru_tones) {
>> +	case 26:
>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
>> +		break;
>> +	case 52:
>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_52;
>> +		break;
>> +	case 106:
>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_106;
>> +		break;
>> +	case 242:
>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_242;
>> +		break;
>> +	case 484:
>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_484;
>> +		break;
>> +	case 996:
>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_996;
>> +		break;
>> +	case (996 * 2):
>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_2x996;
>> +		break;
>> +	default:
>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
>> +		break;
>> +	}
>> +
>> +	return ret;
>> +}
> 
> How does this function compare to ath12k_he_ru_tones_to_nl80211_he_ru_alloc()?
> 

ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc() is different from 
ath12k_he_ru_tones_to_nl80211_he_ru_alloc().

the logic of ath12k_he_ru_tones_to_nl80211_he_ru_alloc() is

static inline
enum nl80211_he_ru_alloc ath12k_he_ru_tones_to_nl80211_he_ru_alloc(u16 
ru_tones)
{
         enum nl80211_he_ru_alloc ret;

         switch (ru_tones) {
         case RU_52:
                 ret = NL80211_RATE_INFO_HE_RU_ALLOC_52;
                 break;
         case RU_106:
                 ret = NL80211_RATE_INFO_HE_RU_ALLOC_106;
                 break;
         case RU_242:
                 ret = NL80211_RATE_INFO_HE_RU_ALLOC_242;
                 break;
         case RU_484:
                 ret = NL80211_RATE_INFO_HE_RU_ALLOC_484;
                 break;
         case RU_996:
                 ret = NL80211_RATE_INFO_HE_RU_ALLOC_996;
                 break;
         case RU_26:
                 fallthrough;
         default:
                 ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
                 break;
         }
         return ret;
}

#define RU_26  1
#define RU_52  2
#define RU_106 4
#define RU_242 9
#define RU_484 18
#define RU_996 37


>> +enum nl80211_eht_gi ath12k_mac_eht_gi_to_nl80211_eht_gi(u8 sgi)
>> +{
>> +	enum nl80211_eht_gi ret;
>> +
>> +	switch (sgi) {
>> +	case RX_MSDU_START_SGI_0_8_US:
>> +		ret = NL80211_RATE_INFO_EHT_GI_0_8;
>> +		break;
>> +	case RX_MSDU_START_SGI_1_6_US:
>> +		ret = NL80211_RATE_INFO_EHT_GI_1_6;
>> +		break;
>> +	case RX_MSDU_START_SGI_3_2_US:
>> +		ret = NL80211_RATE_INFO_EHT_GI_3_2;
>> +		break;
>> +	default:
>> +		ret = NL80211_RATE_INFO_EHT_GI_0_8;
>> +		break;
>> +	}
>> +
>> +	return ret;
>> +}
> 
> BTW the ret variable is unnessary, this could be simplified to:
> 
> switch (foo) {
> case FOO1:
>          return BAR1;
> case FOO2:
>          return BAR2;
> default:
>          return BAR3;
> }
> 
>> +enum nl80211_eht_ru_alloc ath12k_mac_eht_ru_tones_to_nl80211_eht_ru_alloc(u16 ru_tones)
>> +{
>> +	enum nl80211_eht_ru_alloc ret;
>> +
>> +	switch (ru_tones) {
>> +	case 26:
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_26;
>> +		break;
>> +	case 52:
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_52;
>> +		break;
>> +	case (52 + 26):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_52P26;
>> +		break;
>> +	case 106:
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_106;
>> +		break;
>> +	case (106 + 26):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_106P26;
>> +		break;
>> +	case 242:
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_242;
>> +		break;
>> +	case 484:
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_484;
>> +		break;
>> +	case (484 + 242):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_484P242;
>> +		break;
>> +	case 996:
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996;
>> +		break;
>> +	case (996 + 484):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484;
>> +		break;
>> +	case (996 + 484 + 242):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242;
>> +		break;
>> +	case (2 * 996):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996;
>> +		break;
>> +	case (2 * 996 + 484):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484;
>> +		break;
>> +	case (3 * 996):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996;
>> +		break;
>> +	case (3 * 996 + 484):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484;
>> +		break;
>> +	case (4 * 996):
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_4x996;
>> +		break;
>> +	default:
>> +		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_26;
>> +	}
>> +
>> +	return ret;
>> +}
> 
> Same here.
>
Lingbo Kong April 26, 2024, 8:01 a.m. UTC | #4
On 2024/4/25 18:37, Kalle Valo wrote:
> Lingbo Kong <quic_lingbok@quicinc.com> writes:
> 
>> Currently, the transmit rate of "iw dev xxx station dump" command
>> always show an invalid value.
>>
>> To address this issue, ath12k parse the info of transmit complete
>> report from firmware and indicate the transmit rate to mac80211.
>>
>> This patch affects the station mode of WCN7850 and QCN9274.
>>
>> After that, "iw dev xxx station dump" show the correct transmit rate.
>> Such as:
>>
>> Station 00:03:7f:12:03:03 (on wlo1)
>>          inactive time:  872 ms
>>          rx bytes:       219111
>>          rx packets:     1133
>>          tx bytes:       53767
>>          tx packets:     462
>>          tx retries:     51
>>          tx failed:      0
>>          beacon loss:    0
>>          beacon rx:      403
>>          rx drop misc:   74
>>          signal:         -95 dBm
>>          beacon signal avg:      -18 dBm
>>          tx bitrate:     1441.1 MBit/s 80MHz EHT-MCS 13 EHT-NSS 2 EHT-GI 0
>>
>> Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
>> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.2.1-00201-QCAHKSWPL_SILICONZ-1
>>
>> Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
> 
> [...]
> 
>> +static void ath12k_dp_tx_update(struct ath12k *ar, struct hal_tx_status *ts)
>> +{
>> +	if (ar->last_ppdu_id != 0) {
>> +		if (ar->last_ppdu_id == ts->ppdu_id ||
>> +		    ar->cached_ppdu_id == ar->last_ppdu_id)
>> +			ar->cached_ppdu_id = ar->last_ppdu_id;
>> +
>> +		ath12k_dp_tx_update_txcompl(ar, ts);
>> +	}
>> +
>> +	ar->last_ppdu_id = ts->ppdu_id;
>> +}
> 
> A code comment would help a lot. Why is ar->cached_ppdu_id needed here?
> 
> And if 'ar->cached_ppdu_id == ar->last_ppdu_id' is true why do then do
> 'ar->cached_ppdu_id = ar->last_ppdu_id'? The value of ar->cached_ppdu_id
> is not changing here (unless I'm missing something).
> 
> Also I'm worried about locking. How is access to ar->last_ppdu_id and
> ar->cached_ppdu_id protected?
> 

Thanks for pointing to this.
you're right, the ar->cached_ppdu_id haven't used in here, so need to 
delete it.
i missed something in here.

So, change the ath12k_dp_tx_update(struct ath12k *ar, struct 
hal_tx_status *ts) to
static void ath12k_dp_tx_update(struct ath12k *ar, struct hal_tx_status *ts)
{
	if (ts->flags & HAL_TX_STATUS_FLAGS_FIRST_MSDU) {
		if (ar->last_ppdu_id != 0)
			ath12k_dp_tx_update_txcompl(ar, ts);
		ar->last_ppdu_id = ts->ppdu_id;
	}
}

best regards
Lingbo Kong
Kalle Valo April 26, 2024, 11:21 a.m. UTC | #5
Lingbo Kong <quic_lingbok@quicinc.com> writes:

> On 2024/4/26 0:54, Kalle Valo wrote:
>> Lingbo Kong <quic_lingbok@quicinc.com> writes:
>> 
>>> +static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct
>>> hal_tx_status *ts)
>>> +{
>>> +	struct ath12k_base *ab = ar->ab;
>>> +	struct ath12k_peer *peer;
>>> +	struct ath12k_sta *arsta;
>>> +	struct ieee80211_sta *sta;
>>> +	u16 rate;
>>> +	u8 rate_idx = 0;
>>> +	int ret;
>>> +
>>> +	spin_lock_bh(&ab->base_lock);
>>
>> Did you analyse how this function, and especially taking the
>> base_lock,
>> affects performance?
>
> The base_lock is used here because of the need to look for peers based
> on the ts->peer_id when calling ath12k_peer_find_by_id() function,
> which i think might affect performance.
>
> Do i need to run a throughput test?

Ok, so to answer my question: no, you didn't do any performance
analysis. Throughput test might not be enough, for example the driver
can be used on slower systems and running the test on a fast CPU might
not reveal any problem. A proper analysis would be much better.

>>> +enum nl80211_he_ru_alloc
>>> ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc(u16 ru_tones)
>>> +{
>>> +	enum nl80211_he_ru_alloc ret;
>>> +
>>> +	switch (ru_tones) {
>>> +	case 26:
>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
>>> +		break;
>>> +	case 52:
>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_52;
>>> +		break;
>>> +	case 106:
>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_106;
>>> +		break;
>>> +	case 242:
>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_242;
>>> +		break;
>>> +	case 484:
>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_484;
>>> +		break;
>>> +	case 996:
>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_996;
>>> +		break;
>>> +	case (996 * 2):
>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_2x996;
>>> +		break;
>>> +	default:
>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
>>> +		break;
>>> +	}
>>> +
>>> +	return ret;
>>> +}
>> How does this function compare to
>> ath12k_he_ru_tones_to_nl80211_he_ru_alloc()?
>> 
>
> ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc() is different from
> ath12k_he_ru_tones_to_nl80211_he_ru_alloc().
>
> the logic of ath12k_he_ru_tones_to_nl80211_he_ru_alloc() is

Sure, I can read C. But _why_ do we have two very similar but still
different functions. That looks fishy to me.
Kalle Valo April 26, 2024, 11:24 a.m. UTC | #6
Lingbo Kong <quic_lingbok@quicinc.com> writes:

> On 2024/4/25 18:37, Kalle Valo wrote:
>> Lingbo Kong <quic_lingbok@quicinc.com> writes:
>> 
>>> Currently, the transmit rate of "iw dev xxx station dump" command
>>> always show an invalid value.
>>>
>>> To address this issue, ath12k parse the info of transmit complete
>>> report from firmware and indicate the transmit rate to mac80211.
>>>
>>> This patch affects the station mode of WCN7850 and QCN9274.
>>>
>>> After that, "iw dev xxx station dump" show the correct transmit rate.
>>> Such as:
>>>
>>> Station 00:03:7f:12:03:03 (on wlo1)
>>>          inactive time:  872 ms
>>>          rx bytes:       219111
>>>          rx packets:     1133
>>>          tx bytes:       53767
>>>          tx packets:     462
>>>          tx retries:     51
>>>          tx failed:      0
>>>          beacon loss:    0
>>>          beacon rx:      403
>>>          rx drop misc:   74
>>>          signal:         -95 dBm
>>>          beacon signal avg:      -18 dBm
>>>          tx bitrate:     1441.1 MBit/s 80MHz EHT-MCS 13 EHT-NSS 2 EHT-GI 0
>>>
>>> Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
>>> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.2.1-00201-QCAHKSWPL_SILICONZ-1
>>>
>>> Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
>> [...]
>> 
>>> +static void ath12k_dp_tx_update(struct ath12k *ar, struct hal_tx_status *ts)
>>> +{
>>> +	if (ar->last_ppdu_id != 0) {
>>> +		if (ar->last_ppdu_id == ts->ppdu_id ||
>>> +		    ar->cached_ppdu_id == ar->last_ppdu_id)
>>> +			ar->cached_ppdu_id = ar->last_ppdu_id;
>>> +
>>> +		ath12k_dp_tx_update_txcompl(ar, ts);
>>> +	}
>>> +
>>> +	ar->last_ppdu_id = ts->ppdu_id;
>>> +}
>> A code comment would help a lot. Why is ar->cached_ppdu_id needed
>> here?
>> And if 'ar->cached_ppdu_id == ar->last_ppdu_id' is true why do then
>> do
>> 'ar->cached_ppdu_id = ar->last_ppdu_id'? The value of ar->cached_ppdu_id
>> is not changing here (unless I'm missing something).
>> Also I'm worried about locking. How is access to ar->last_ppdu_id
>> and
>> ar->cached_ppdu_id protected?
>> 
>
> Thanks for pointing to this.
> you're right, the ar->cached_ppdu_id haven't used in here, so need to
> delete it.
> i missed something in here.
>
> So, change the ath12k_dp_tx_update(struct ath12k *ar, struct
> hal_tx_status *ts) to
> static void ath12k_dp_tx_update(struct ath12k *ar, struct hal_tx_status *ts)
> {
> 	if (ts->flags & HAL_TX_STATUS_FLAGS_FIRST_MSDU) {
> 		if (ar->last_ppdu_id != 0)
> 			ath12k_dp_tx_update_txcompl(ar, ts);
> 		ar->last_ppdu_id = ts->ppdu_id;
> 	}
> }

Access to ar->last_ppdu_id still looks racy to me.

And why do we need to track last_ppdu_id? I don't have time to start
investigating that right now, a code comment explaining that would help
a lot.
Karthikeyan Periyasamy April 29, 2024, 9:11 a.m. UTC | #7
On 4/19/2024 8:51 AM, Lingbo Kong wrote:
> Currently, the transmit rate of "iw dev xxx station dump" command
> always show an invalid value.
> 
> To address this issue, ath12k parse the info of transmit complete
> report from firmware and indicate the transmit rate to mac80211.
> 
> This patch affects the station mode of WCN7850 and QCN9274.
> 
> After that, "iw dev xxx station dump" show the correct transmit rate.
> Such as:
> 
> Station 00:03:7f:12:03:03 (on wlo1)
>          inactive time:  872 ms
>          rx bytes:       219111
>          rx packets:     1133
>          tx bytes:       53767
>          tx packets:     462
>          tx retries:     51
>          tx failed:      0
>          beacon loss:    0
>          beacon rx:      403
>          rx drop misc:   74
>          signal:         -95 dBm
>          beacon signal avg:      -18 dBm
>          tx bitrate:     1441.1 MBit/s 80MHz EHT-MCS 13 EHT-NSS 2 EHT-GI 0
> 
> Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.2.1-00201-QCAHKSWPL_SILICONZ-1
> 
> Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
> ---
> v4:
> 1.change ATH12K_EHT_MCS_MAX from 13 to 15
> 
> v3:
> no change
> 
> v2:
> 1.change copyright
> 
>   drivers/net/wireless/ath/ath12k/core.h   |   2 +
>   drivers/net/wireless/ath/ath12k/dp_rx.h  |   3 +
>   drivers/net/wireless/ath/ath12k/dp_tx.c  | 147 ++++++++++++++++++++++-
>   drivers/net/wireless/ath/ath12k/hal_tx.h |   9 +-
>   drivers/net/wireless/ath/ath12k/mac.c    | 124 +++++++++++++++++++
>   drivers/net/wireless/ath/ath12k/mac.h    |   4 +-
>   6 files changed, 282 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h
> index 5d3c1fb632b0..b2ddd1e6fb14 100644
> --- a/drivers/net/wireless/ath/ath12k/core.h
> +++ b/drivers/net/wireless/ath/ath12k/core.h
> @@ -74,6 +74,7 @@ enum wme_ac {
>   #define ATH12K_HT_MCS_MAX	7
>   #define ATH12K_VHT_MCS_MAX	9
>   #define ATH12K_HE_MCS_MAX	11
> +#define ATH12K_EHT_MCS_MAX	15
>   
>   enum ath12k_crypt_mode {
>   	/* Only use hardware crypto engine */
> @@ -448,6 +449,7 @@ struct ath12k_sta {
>   	struct ath12k_rx_peer_stats *rx_stats;
>   	struct ath12k_wbm_tx_stats *wbm_tx_stats;
>   	u32 bw_prev;
> +	u32 peer_nss;
>   };
>   
>   #define ATH12K_MIN_5G_FREQ 4150
> diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h
> index 2ff421160181..1543788c0da7 100644
> --- a/drivers/net/wireless/ath/ath12k/dp_rx.h
> +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h
> @@ -79,6 +79,9 @@ static inline u32 ath12k_he_gi_to_nl80211_he_gi(u8 sgi)
>   	case RX_MSDU_START_SGI_3_2_US:
>   		ret = NL80211_RATE_INFO_HE_GI_3_2;
>   		break;
> +	default:
> +		ret = NL80211_RATE_INFO_HE_GI_0_8;
> +		break;
>   	}
>   
>   	return ret;
> diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c
> index 9b6d7d72f57c..74ef4c7a72c1 100644
> --- a/drivers/net/wireless/ath/ath12k/dp_tx.c
> +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c
> @@ -8,6 +8,8 @@
>   #include "dp_tx.h"
>   #include "debug.h"
>   #include "hw.h"
> +#include "peer.h"
> +#include "mac.h"
>   
>   static enum hal_tcl_encap_type
>   ath12k_dp_tx_get_encap_type(struct ath12k_vif *arvif, struct sk_buff *skb)
> @@ -443,6 +445,125 @@ ath12k_dp_tx_process_htt_tx_complete(struct ath12k_base *ab,
>   	}
>   }
>   
> +static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct hal_tx_status *ts)
> +{
> +	struct ath12k_base *ab = ar->ab;
> +	struct ath12k_peer *peer;
> +	struct ath12k_sta *arsta;
> +	struct ieee80211_sta *sta;
> +	u16 rate;
> +	u8 rate_idx = 0;
> +	int ret;
> +
> +	spin_lock_bh(&ab->base_lock);
> +
> +	peer = ath12k_peer_find_by_id(ab, ts->peer_id);
> +	if (!peer || !peer->sta) {
> +		ath12k_dbg(ab, ATH12K_DBG_DP_TX,
> +			   "failed to find the peer by id %u\n", ts->peer_id);
> +		goto err_out;
> +	}
> +
> +	sta = peer->sta;
> +	arsta = ath12k_sta_to_arsta(sta);
> +
> +	memset(&arsta->txrate, 0, sizeof(arsta->txrate));
> +
> +	/* This is to prefer choose the real NSS value arsta->last_txrate.nss,
> +	 * if it is invalid, then choose the NSS value while assoc.
> +	 */
> +	if (arsta->last_txrate.nss)
> +		arsta->txrate.nss = arsta->last_txrate.nss;
> +	else
> +		arsta->txrate.nss = arsta->peer_nss;
> +
> +	if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11A ||
> +	    ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11B) {
> +		ret = ath12k_mac_hw_ratecode_to_legacy_rate(ts->mcs,
> +							    ts->pkt_type,
> +							    &rate_idx,
> +							    &rate);
> +		if (ret < 0) {
> +			ath12k_warn(ab, "Invalid tx legacy rate %d\n", ret);
> +			goto err_out;
> +		}
> +
> +		arsta->txrate.legacy = rate;
> +	} else if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11N) {
> +		if (ts->mcs > ATH12K_HT_MCS_MAX) {
> +			ath12k_warn(ab, "Invalid HT mcs index %d\n", ts->mcs);
> +			goto err_out;
> +		}
> +
> +		if (arsta->txrate.nss != 0)
> +			arsta->txrate.mcs = ts->mcs + 8 * (arsta->txrate.nss - 1);
> +
> +		arsta->txrate.flags = RATE_INFO_FLAGS_MCS;
> +
> +		if (ts->sgi)
> +			arsta->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
> +	} else if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11AC) {
> +		if (ts->mcs > ATH12K_VHT_MCS_MAX) {
> +			ath12k_warn(ab, "Invalid VHT mcs index %d\n", ts->mcs);
> +			goto err_out;
> +		}
> +
> +		arsta->txrate.mcs = ts->mcs;
> +		arsta->txrate.flags = RATE_INFO_FLAGS_VHT_MCS;
> +
> +		if (ts->sgi)
> +			arsta->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
> +	} else if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11AX) {
> +		if (ts->mcs > ATH12K_HE_MCS_MAX) {
> +			ath12k_warn(ab, "Invalid HE mcs index %d\n", ts->mcs);
> +			goto err_out;
> +		}
> +
> +		arsta->txrate.mcs = ts->mcs;
> +		arsta->txrate.flags = RATE_INFO_FLAGS_HE_MCS;
> +		arsta->txrate.he_gi = ath12k_he_gi_to_nl80211_he_gi(ts->sgi);
> +	} else if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11BE) {
> +		if (ts->mcs > ATH12K_EHT_MCS_MAX) {
> +			ath12k_warn(ab, "Invalid EHT mcs index %d\n", ts->mcs);
> +			goto err_out;
> +		}
> +
> +		arsta->txrate.mcs = ts->mcs;
> +		arsta->txrate.flags = RATE_INFO_FLAGS_EHT_MCS;
> +		arsta->txrate.eht_gi = ath12k_mac_eht_gi_to_nl80211_eht_gi(ts->sgi);
> +	}
> +
> +	arsta->txrate.bw = ath12k_mac_bw_to_mac80211_bw(ts->bw);
> +
> +	if (ts->ofdma && ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11AX) {
> +		arsta->txrate.bw = RATE_INFO_BW_HE_RU;
> +		arsta->txrate.he_ru_alloc =
> +			ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc(ts->ru_tones);
> +	}
> +
> +	if (ts->ofdma && ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11BE) {
> +		arsta->txrate.bw = RATE_INFO_BW_EHT_RU;
> +		arsta->txrate.eht_ru_alloc =
> +			ath12k_mac_eht_ru_tones_to_nl80211_eht_ru_alloc(ts->ru_tones);
> +	}
> +
> +err_out:
> +	spin_unlock_bh(&ab->base_lock);
> +}
> +
> +static void ath12k_dp_tx_update(struct ath12k *ar, struct hal_tx_status *ts)
> +{
> +	if (ar->last_ppdu_id != 0) {
> +		if (ar->last_ppdu_id == ts->ppdu_id ||
> +		    ar->cached_ppdu_id == ar->last_ppdu_id)
> +			ar->cached_ppdu_id = ar->last_ppdu_id;
> +
> +		ath12k_dp_tx_update_txcompl(ar, ts);
> +	}
> +
> +	ar->last_ppdu_id = ts->ppdu_id;
> +}
> +
>   static void ath12k_dp_tx_complete_msdu(struct ath12k *ar,
>   				       struct sk_buff *msdu,
>   				       struct hal_tx_status *ts)
> @@ -498,6 +619,8 @@ static void ath12k_dp_tx_complete_msdu(struct ath12k *ar,
>   	 * Might end up reporting it out-of-band from HTT stats.
>   	 */
>   
> +	ath12k_dp_tx_update(ar, ts);
> +
>   	ieee80211_tx_status_skb(ath12k_ar_to_hw(ar), msdu);
>   
>   exit:
> @@ -522,10 +645,26 @@ static void ath12k_dp_tx_status_parse(struct ath12k_base *ab,
>   
>   	ts->ppdu_id = le32_get_bits(desc->info1,
>   				    HAL_WBM_COMPL_TX_INFO1_TQM_STATUS_NUMBER);
> -	if (le32_to_cpu(desc->rate_stats.info0) & HAL_TX_RATE_STATS_INFO0_VALID)
> -		ts->rate_stats = le32_to_cpu(desc->rate_stats.info0);
> -	else
> -		ts->rate_stats = 0;
> +
> +	if (le32_to_cpu(desc->info2) & HAL_WBM_COMPL_TX_INFO2_FIRST_MSDU)
> +		ts->flags |= HAL_TX_STATUS_FLAGS_FIRST_MSDU;
> +
> +	ts->peer_id = le32_get_bits(desc->info3, HAL_WBM_COMPL_TX_INFO3_PEER_ID);
> +
> +	if (le32_to_cpu(desc->rate_stats.info0) & HAL_TX_RATE_STATS_INFO0_VALID) {
> +		ts->pkt_type = le32_get_bits(desc->rate_stats.info0,
> +					     HAL_TX_RATE_STATS_INFO0_PKT_TYPE);
> +		ts->mcs = le32_get_bits(desc->rate_stats.info0,
> +					HAL_TX_RATE_STATS_INFO0_MCS);
> +		ts->sgi = le32_get_bits(desc->rate_stats.info0,
> +					HAL_TX_RATE_STATS_INFO0_SGI);
> +		ts->bw = le32_get_bits(desc->rate_stats.info0,
> +				       HAL_TX_RATE_STATS_INFO0_BW);
> +		ts->ru_tones = le32_get_bits(desc->rate_stats.info0,
> +					     HAL_TX_RATE_STATS_INFO0_TONES_IN_RU);
> +		ts->ofdma = le32_get_bits(desc->rate_stats.info0,
> +					  HAL_TX_RATE_STATS_INFO0_OFDMA_TX);
> +	}


Why multiple read from dma mapped area say desc->rate_stats.info0 lead 
to increase in CPU cycles. Instead you do one read from dma mapped area 
desc->rate_stats.info0 and classify into your own data structure ?

And the info0 classification used within the 
ath12k_dp_tx_update_txcompl(), so you can do the classification within 
this API.
Lingbo Kong April 29, 2024, 9:29 a.m. UTC | #8
On 2024/4/29 17:11, Karthikeyan Periyasamy wrote:
> 
> 
> On 4/19/2024 8:51 AM, Lingbo Kong wrote:
>> Currently, the transmit rate of "iw dev xxx station dump" command
>> always show an invalid value.
>>
>> To address this issue, ath12k parse the info of transmit complete
>> report from firmware and indicate the transmit rate to mac80211.
>>
>> This patch affects the station mode of WCN7850 and QCN9274.
>>
>> After that, "iw dev xxx station dump" show the correct transmit rate.
>> Such as:
>>
>> Station 00:03:7f:12:03:03 (on wlo1)
>>          inactive time:  872 ms
>>          rx bytes:       219111
>>          rx packets:     1133
>>          tx bytes:       53767
>>          tx packets:     462
>>          tx retries:     51
>>          tx failed:      0
>>          beacon loss:    0
>>          beacon rx:      403
>>          rx drop misc:   74
>>          signal:         -95 dBm
>>          beacon signal avg:      -18 dBm
>>          tx bitrate:     1441.1 MBit/s 80MHz EHT-MCS 13 EHT-NSS 2 
>> EHT-GI 0
>>
>> Tested-on: WCN7850 hw2.0 PCI 
>> WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
>> Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.2.1-00201-QCAHKSWPL_SILICONZ-1
>>
>> Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
[...]
>> @@ -522,10 +645,26 @@ static void ath12k_dp_tx_status_parse(struct 
>> ath12k_base *ab,
>>       ts->ppdu_id = le32_get_bits(desc->info1,
>>                       HAL_WBM_COMPL_TX_INFO1_TQM_STATUS_NUMBER);
>> -    if (le32_to_cpu(desc->rate_stats.info0) & 
>> HAL_TX_RATE_STATS_INFO0_VALID)
>> -        ts->rate_stats = le32_to_cpu(desc->rate_stats.info0);
>> -    else
>> -        ts->rate_stats = 0;
>> +
>> +    if (le32_to_cpu(desc->info2) & HAL_WBM_COMPL_TX_INFO2_FIRST_MSDU)
>> +        ts->flags |= HAL_TX_STATUS_FLAGS_FIRST_MSDU;
>> +
>> +    ts->peer_id = le32_get_bits(desc->info3, 
>> HAL_WBM_COMPL_TX_INFO3_PEER_ID);
>> +
>> +    if (le32_to_cpu(desc->rate_stats.info0) & 
>> HAL_TX_RATE_STATS_INFO0_VALID) {
>> +        ts->pkt_type = le32_get_bits(desc->rate_stats.info0,
>> +                         HAL_TX_RATE_STATS_INFO0_PKT_TYPE);
>> +        ts->mcs = le32_get_bits(desc->rate_stats.info0,
>> +                    HAL_TX_RATE_STATS_INFO0_MCS);
>> +        ts->sgi = le32_get_bits(desc->rate_stats.info0,
>> +                    HAL_TX_RATE_STATS_INFO0_SGI);
>> +        ts->bw = le32_get_bits(desc->rate_stats.info0,
>> +                       HAL_TX_RATE_STATS_INFO0_BW);
>> +        ts->ru_tones = le32_get_bits(desc->rate_stats.info0,
>> +                         HAL_TX_RATE_STATS_INFO0_TONES_IN_RU);
>> +        ts->ofdma = le32_get_bits(desc->rate_stats.info0,
>> +                      HAL_TX_RATE_STATS_INFO0_OFDMA_TX);
>> +    }
> 
> 
> Why multiple read from dma mapped area say desc->rate_stats.info0 lead 
> to increase in CPU cycles. Instead you do one read from dma mapped area 
> desc->rate_stats.info0 and classify into your own data structure ?
> 
> And the info0 classification used within the 
> ath12k_dp_tx_update_txcompl(), so you can do the classification within 
> this API.
> 
yes, thanks for pointing of this.
i will apply it in next version:),

Best regards
Lingbo Kong
Lingbo Kong April 30, 2024, 11:41 a.m. UTC | #9
On 2024/4/26 19:21, Kalle Valo wrote:
> Lingbo Kong <quic_lingbok@quicinc.com> writes:
> 
>> On 2024/4/26 0:54, Kalle Valo wrote:
>>> Lingbo Kong <quic_lingbok@quicinc.com> writes:
>>>
>>>> +static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct
>>>> hal_tx_status *ts)
>>>> +{
>>>> +	struct ath12k_base *ab = ar->ab;
>>>> +	struct ath12k_peer *peer;
>>>> +	struct ath12k_sta *arsta;
>>>> +	struct ieee80211_sta *sta;
>>>> +	u16 rate;
>>>> +	u8 rate_idx = 0;
>>>> +	int ret;
>>>> +
>>>> +	spin_lock_bh(&ab->base_lock);
>>>
>>> Did you analyse how this function, and especially taking the
>>> base_lock,
>>> affects performance?
>>
>> The base_lock is used here because of the need to look for peers based
>> on the ts->peer_id when calling ath12k_peer_find_by_id() function,
>> which i think might affect performance.
>>
>> Do i need to run a throughput test?
> 
> Ok, so to answer my question: no, you didn't do any performance
> analysis. Throughput test might not be enough, for example the driver
> can be used on slower systems and running the test on a fast CPU might
> not reveal any problem. A proper analysis would be much better.
> 

hi, kalle,
i found that ab->base_lock is used in a lot of places in ath12k, so it's 
complicated to do performance analysis in here.

Do you have any suggestions? I would appreciate your suggestions:)

/lingbo kong

>>>> +enum nl80211_he_ru_alloc
>>>> ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc(u16 ru_tones)
>>>> +{
>>>> +	enum nl80211_he_ru_alloc ret;
>>>> +
>>>> +	switch (ru_tones) {
>>>> +	case 26:
>>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
>>>> +		break;
>>>> +	case 52:
>>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_52;
>>>> +		break;
>>>> +	case 106:
>>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_106;
>>>> +		break;
>>>> +	case 242:
>>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_242;
>>>> +		break;
>>>> +	case 484:
>>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_484;
>>>> +		break;
>>>> +	case 996:
>>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_996;
>>>> +		break;
>>>> +	case (996 * 2):
>>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_2x996;
>>>> +		break;
>>>> +	default:
>>>> +		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
>>>> +		break;
>>>> +	}
>>>> +
>>>> +	return ret;
>>>> +}
>>> How does this function compare to
>>> ath12k_he_ru_tones_to_nl80211_he_ru_alloc()?
>>>
>>
>> ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc() is different from
>> ath12k_he_ru_tones_to_nl80211_he_ru_alloc().
>>
>> the logic of ath12k_he_ru_tones_to_nl80211_he_ru_alloc() is
> 
> Sure, I can read C. But _why_ do we have two very similar but still
> different functions. That looks fishy to me.
>
Lingbo Kong June 5, 2024, 6:31 a.m. UTC | #10
On 2024/4/26 19:21, Kalle Valo wrote:
> Lingbo Kong <quic_lingbok@quicinc.com> writes:
> 
>> On 2024/4/26 0:54, Kalle Valo wrote:
>>> Lingbo Kong <quic_lingbok@quicinc.com> writes:
>>>
>>>> +static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct
>>>> hal_tx_status *ts)
>>>> +{
>>>> +	struct ath12k_base *ab = ar->ab;
>>>> +	struct ath12k_peer *peer;
>>>> +	struct ath12k_sta *arsta;
>>>> +	struct ieee80211_sta *sta;
>>>> +	u16 rate;
>>>> +	u8 rate_idx = 0;
>>>> +	int ret;
>>>> +
>>>> +	spin_lock_bh(&ab->base_lock);
>>>
>>> Did you analyse how this function, and especially taking the
>>> base_lock,
>>> affects performance?
>>
>> The base_lock is used here because of the need to look for peers based
>> on the ts->peer_id when calling ath12k_peer_find_by_id() function,
>> which i think might affect performance.
>>
>> Do i need to run a throughput test?
> 
> Ok, so to answer my question: no, you didn't do any performance
> analysis. Throughput test might not be enough, for example the driver
> can be used on slower systems and running the test on a fast CPU might
> not reveal any problem. A proper analysis would be much better.
> 

Hi, kalle,
I did a simple performance analysis of the ath12k_dp_tx_update_txcompl() 
function on slower systems.

Firstly, i use perf tool to set dynamic tracepoints in 
ath12k_dp_tx_complete_msdu() function, and then used the command of 
"iperf -c ip address -w 4M -n 1G -i 1" to do traffic test.

During this process, use ./perf record -a -g to detect the performace of 
the system.

Finally, compare the results with and without this patch.

without this patch
./perf report output
children    self	command		symbol
7.28%	   0.08%      ksoftirqd/0 ath12k_dp_tx_complete_msdu
5.96%      0.03%      swapper     ath12k_dp_tx_complete_msdu   		

iperf output
[  1] 0.0000-62.6712 sec  1.00 GBytes   137 Mbits/sec

with this patch
children    self       command         symbol
7.42%	   0.08%      ksoftirqd/0  ath12k_dp_tx_complete_msdu
6.32%      0.03%      swapper      ath12k_dp_tx_complete_msdu

iperf output
[  1] 0.0000-62.6732 sec  1.00 GBytes   137 Mbits/sec

As can be seen from the table above, with this patch, the CPU time 
percentage will increase by 0.5%.

So, i think applying this patch will definitely have an impact on system 
performance, but the impact is not that big and i think it can be ignored:)

Best regards
Lingbo Kong
Lingbo Kong June 17, 2024, 11:50 a.m. UTC | #11
On 2024/6/5 14:31, Lingbo Kong wrote:
> 
> 
> On 2024/4/26 19:21, Kalle Valo wrote:
>> Lingbo Kong <quic_lingbok@quicinc.com> writes:
>>
>>> On 2024/4/26 0:54, Kalle Valo wrote:
>>>> Lingbo Kong <quic_lingbok@quicinc.com> writes:
>>>>
>>>>> +static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct
>>>>> hal_tx_status *ts)
>>>>> +{
>>>>> +    struct ath12k_base *ab = ar->ab;
>>>>> +    struct ath12k_peer *peer;
>>>>> +    struct ath12k_sta *arsta;
>>>>> +    struct ieee80211_sta *sta;
>>>>> +    u16 rate;
>>>>> +    u8 rate_idx = 0;
>>>>> +    int ret;
>>>>> +
>>>>> +    spin_lock_bh(&ab->base_lock);
>>>>
>>>> Did you analyse how this function, and especially taking the
>>>> base_lock,
>>>> affects performance?
>>>
>>> The base_lock is used here because of the need to look for peers based
>>> on the ts->peer_id when calling ath12k_peer_find_by_id() function,
>>> which i think might affect performance.
>>>
>>> Do i need to run a throughput test?
>>
>> Ok, so to answer my question: no, you didn't do any performance
>> analysis. Throughput test might not be enough, for example the driver
>> can be used on slower systems and running the test on a fast CPU might
>> not reveal any problem. A proper analysis would be much better.
>>
> 
> Hi, kalle,
> I did a simple performance analysis of the ath12k_dp_tx_update_txcompl() 
> function on slower systems.
> 
> Firstly, i use perf tool to set dynamic tracepoints in 
> ath12k_dp_tx_complete_msdu() function, and then used the command of 
> "iperf -c ip address -w 4M -n 1G -i 1" to do traffic test.
> 
> During this process, use ./perf record -a -g to detect the performace of 
> the system.
> 
> Finally, compare the results with and without this patch.
> 
> without this patch
> ./perf report output
> children    self    command        symbol
> 7.28%       0.08%      ksoftirqd/0 ath12k_dp_tx_complete_msdu
> 5.96%      0.03%      swapper     ath12k_dp_tx_complete_msdu
> 
> iperf output
> [  1] 0.0000-62.6712 sec  1.00 GBytes   137 Mbits/sec
> 
> with this patch
> children    self       command         symbol
> 7.42%       0.08%      ksoftirqd/0  ath12k_dp_tx_complete_msdu
> 6.32%      0.03%      swapper      ath12k_dp_tx_complete_msdu
> 
> iperf output
> [  1] 0.0000-62.6732 sec  1.00 GBytes   137 Mbits/sec
> 
> As can be seen from the table above, with this patch, the CPU time 
> percentage will increase by 0.5%.
> 
> So, i think applying this patch will definitely have an impact on system 
> performance, but the impact is not that big and i think it can be ignored:)
> 
> Best regards
> Lingbo Kong

Hi, kalle
do you have any comments regarding the above content?:)

best regards
Lingbo Kong
diff mbox series

Patch

diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h
index 5d3c1fb632b0..b2ddd1e6fb14 100644
--- a/drivers/net/wireless/ath/ath12k/core.h
+++ b/drivers/net/wireless/ath/ath12k/core.h
@@ -74,6 +74,7 @@  enum wme_ac {
 #define ATH12K_HT_MCS_MAX	7
 #define ATH12K_VHT_MCS_MAX	9
 #define ATH12K_HE_MCS_MAX	11
+#define ATH12K_EHT_MCS_MAX	15
 
 enum ath12k_crypt_mode {
 	/* Only use hardware crypto engine */
@@ -448,6 +449,7 @@  struct ath12k_sta {
 	struct ath12k_rx_peer_stats *rx_stats;
 	struct ath12k_wbm_tx_stats *wbm_tx_stats;
 	u32 bw_prev;
+	u32 peer_nss;
 };
 
 #define ATH12K_MIN_5G_FREQ 4150
diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h
index 2ff421160181..1543788c0da7 100644
--- a/drivers/net/wireless/ath/ath12k/dp_rx.h
+++ b/drivers/net/wireless/ath/ath12k/dp_rx.h
@@ -79,6 +79,9 @@  static inline u32 ath12k_he_gi_to_nl80211_he_gi(u8 sgi)
 	case RX_MSDU_START_SGI_3_2_US:
 		ret = NL80211_RATE_INFO_HE_GI_3_2;
 		break;
+	default:
+		ret = NL80211_RATE_INFO_HE_GI_0_8;
+		break;
 	}
 
 	return ret;
diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c
index 9b6d7d72f57c..74ef4c7a72c1 100644
--- a/drivers/net/wireless/ath/ath12k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath12k/dp_tx.c
@@ -8,6 +8,8 @@ 
 #include "dp_tx.h"
 #include "debug.h"
 #include "hw.h"
+#include "peer.h"
+#include "mac.h"
 
 static enum hal_tcl_encap_type
 ath12k_dp_tx_get_encap_type(struct ath12k_vif *arvif, struct sk_buff *skb)
@@ -443,6 +445,125 @@  ath12k_dp_tx_process_htt_tx_complete(struct ath12k_base *ab,
 	}
 }
 
+static void ath12k_dp_tx_update_txcompl(struct ath12k *ar, struct hal_tx_status *ts)
+{
+	struct ath12k_base *ab = ar->ab;
+	struct ath12k_peer *peer;
+	struct ath12k_sta *arsta;
+	struct ieee80211_sta *sta;
+	u16 rate;
+	u8 rate_idx = 0;
+	int ret;
+
+	spin_lock_bh(&ab->base_lock);
+
+	peer = ath12k_peer_find_by_id(ab, ts->peer_id);
+	if (!peer || !peer->sta) {
+		ath12k_dbg(ab, ATH12K_DBG_DP_TX,
+			   "failed to find the peer by id %u\n", ts->peer_id);
+		goto err_out;
+	}
+
+	sta = peer->sta;
+	arsta = ath12k_sta_to_arsta(sta);
+
+	memset(&arsta->txrate, 0, sizeof(arsta->txrate));
+
+	/* This is to prefer choose the real NSS value arsta->last_txrate.nss,
+	 * if it is invalid, then choose the NSS value while assoc.
+	 */
+	if (arsta->last_txrate.nss)
+		arsta->txrate.nss = arsta->last_txrate.nss;
+	else
+		arsta->txrate.nss = arsta->peer_nss;
+
+	if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11A ||
+	    ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11B) {
+		ret = ath12k_mac_hw_ratecode_to_legacy_rate(ts->mcs,
+							    ts->pkt_type,
+							    &rate_idx,
+							    &rate);
+		if (ret < 0) {
+			ath12k_warn(ab, "Invalid tx legacy rate %d\n", ret);
+			goto err_out;
+		}
+
+		arsta->txrate.legacy = rate;
+	} else if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11N) {
+		if (ts->mcs > ATH12K_HT_MCS_MAX) {
+			ath12k_warn(ab, "Invalid HT mcs index %d\n", ts->mcs);
+			goto err_out;
+		}
+
+		if (arsta->txrate.nss != 0)
+			arsta->txrate.mcs = ts->mcs + 8 * (arsta->txrate.nss - 1);
+
+		arsta->txrate.flags = RATE_INFO_FLAGS_MCS;
+
+		if (ts->sgi)
+			arsta->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
+	} else if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11AC) {
+		if (ts->mcs > ATH12K_VHT_MCS_MAX) {
+			ath12k_warn(ab, "Invalid VHT mcs index %d\n", ts->mcs);
+			goto err_out;
+		}
+
+		arsta->txrate.mcs = ts->mcs;
+		arsta->txrate.flags = RATE_INFO_FLAGS_VHT_MCS;
+
+		if (ts->sgi)
+			arsta->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
+	} else if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11AX) {
+		if (ts->mcs > ATH12K_HE_MCS_MAX) {
+			ath12k_warn(ab, "Invalid HE mcs index %d\n", ts->mcs);
+			goto err_out;
+		}
+
+		arsta->txrate.mcs = ts->mcs;
+		arsta->txrate.flags = RATE_INFO_FLAGS_HE_MCS;
+		arsta->txrate.he_gi = ath12k_he_gi_to_nl80211_he_gi(ts->sgi);
+	} else if (ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11BE) {
+		if (ts->mcs > ATH12K_EHT_MCS_MAX) {
+			ath12k_warn(ab, "Invalid EHT mcs index %d\n", ts->mcs);
+			goto err_out;
+		}
+
+		arsta->txrate.mcs = ts->mcs;
+		arsta->txrate.flags = RATE_INFO_FLAGS_EHT_MCS;
+		arsta->txrate.eht_gi = ath12k_mac_eht_gi_to_nl80211_eht_gi(ts->sgi);
+	}
+
+	arsta->txrate.bw = ath12k_mac_bw_to_mac80211_bw(ts->bw);
+
+	if (ts->ofdma && ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11AX) {
+		arsta->txrate.bw = RATE_INFO_BW_HE_RU;
+		arsta->txrate.he_ru_alloc =
+			ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc(ts->ru_tones);
+	}
+
+	if (ts->ofdma && ts->pkt_type == HAL_TX_RATE_STATS_PKT_TYPE_11BE) {
+		arsta->txrate.bw = RATE_INFO_BW_EHT_RU;
+		arsta->txrate.eht_ru_alloc =
+			ath12k_mac_eht_ru_tones_to_nl80211_eht_ru_alloc(ts->ru_tones);
+	}
+
+err_out:
+	spin_unlock_bh(&ab->base_lock);
+}
+
+static void ath12k_dp_tx_update(struct ath12k *ar, struct hal_tx_status *ts)
+{
+	if (ar->last_ppdu_id != 0) {
+		if (ar->last_ppdu_id == ts->ppdu_id ||
+		    ar->cached_ppdu_id == ar->last_ppdu_id)
+			ar->cached_ppdu_id = ar->last_ppdu_id;
+
+		ath12k_dp_tx_update_txcompl(ar, ts);
+	}
+
+	ar->last_ppdu_id = ts->ppdu_id;
+}
+
 static void ath12k_dp_tx_complete_msdu(struct ath12k *ar,
 				       struct sk_buff *msdu,
 				       struct hal_tx_status *ts)
@@ -498,6 +619,8 @@  static void ath12k_dp_tx_complete_msdu(struct ath12k *ar,
 	 * Might end up reporting it out-of-band from HTT stats.
 	 */
 
+	ath12k_dp_tx_update(ar, ts);
+
 	ieee80211_tx_status_skb(ath12k_ar_to_hw(ar), msdu);
 
 exit:
@@ -522,10 +645,26 @@  static void ath12k_dp_tx_status_parse(struct ath12k_base *ab,
 
 	ts->ppdu_id = le32_get_bits(desc->info1,
 				    HAL_WBM_COMPL_TX_INFO1_TQM_STATUS_NUMBER);
-	if (le32_to_cpu(desc->rate_stats.info0) & HAL_TX_RATE_STATS_INFO0_VALID)
-		ts->rate_stats = le32_to_cpu(desc->rate_stats.info0);
-	else
-		ts->rate_stats = 0;
+
+	if (le32_to_cpu(desc->info2) & HAL_WBM_COMPL_TX_INFO2_FIRST_MSDU)
+		ts->flags |= HAL_TX_STATUS_FLAGS_FIRST_MSDU;
+
+	ts->peer_id = le32_get_bits(desc->info3, HAL_WBM_COMPL_TX_INFO3_PEER_ID);
+
+	if (le32_to_cpu(desc->rate_stats.info0) & HAL_TX_RATE_STATS_INFO0_VALID) {
+		ts->pkt_type = le32_get_bits(desc->rate_stats.info0,
+					     HAL_TX_RATE_STATS_INFO0_PKT_TYPE);
+		ts->mcs = le32_get_bits(desc->rate_stats.info0,
+					HAL_TX_RATE_STATS_INFO0_MCS);
+		ts->sgi = le32_get_bits(desc->rate_stats.info0,
+					HAL_TX_RATE_STATS_INFO0_SGI);
+		ts->bw = le32_get_bits(desc->rate_stats.info0,
+				       HAL_TX_RATE_STATS_INFO0_BW);
+		ts->ru_tones = le32_get_bits(desc->rate_stats.info0,
+					     HAL_TX_RATE_STATS_INFO0_TONES_IN_RU);
+		ts->ofdma = le32_get_bits(desc->rate_stats.info0,
+					  HAL_TX_RATE_STATS_INFO0_OFDMA_TX);
+	}
 }
 
 void ath12k_dp_tx_completion_handler(struct ath12k_base *ab, int ring_id)
diff --git a/drivers/net/wireless/ath/ath12k/hal_tx.h b/drivers/net/wireless/ath/ath12k/hal_tx.h
index 7c837094a6f7..a3cf4db456e5 100644
--- a/drivers/net/wireless/ath/ath12k/hal_tx.h
+++ b/drivers/net/wireless/ath/ath12k/hal_tx.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause-Clear */
 /*
  * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
  */
 
 #ifndef ATH12K_HAL_TX_H
@@ -63,7 +63,12 @@  struct hal_tx_status {
 	u8 try_cnt;
 	u8 tid;
 	u16 peer_id;
-	u32 rate_stats;
+	enum hal_tx_rate_stats_pkt_type pkt_type;
+	enum hal_tx_rate_stats_sgi sgi;
+	enum ath12k_supported_bw bw;
+	u8 mcs;
+	u16 ru_tones;
+	u8 ofdma;
 };
 
 #define HAL_TX_PHY_DESC_INFO0_BF_TYPE		GENMASK(17, 16)
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index 56b1f8b6844e..cd13fa48e97d 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -329,6 +329,122 @@  static const char *ath12k_mac_phymode_str(enum wmi_phy_mode mode)
 	return "<unknown>";
 }
 
+enum nl80211_he_ru_alloc ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc(u16 ru_tones)
+{
+	enum nl80211_he_ru_alloc ret;
+
+	switch (ru_tones) {
+	case 26:
+		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
+		break;
+	case 52:
+		ret = NL80211_RATE_INFO_HE_RU_ALLOC_52;
+		break;
+	case 106:
+		ret = NL80211_RATE_INFO_HE_RU_ALLOC_106;
+		break;
+	case 242:
+		ret = NL80211_RATE_INFO_HE_RU_ALLOC_242;
+		break;
+	case 484:
+		ret = NL80211_RATE_INFO_HE_RU_ALLOC_484;
+		break;
+	case 996:
+		ret = NL80211_RATE_INFO_HE_RU_ALLOC_996;
+		break;
+	case (996 * 2):
+		ret = NL80211_RATE_INFO_HE_RU_ALLOC_2x996;
+		break;
+	default:
+		ret = NL80211_RATE_INFO_HE_RU_ALLOC_26;
+		break;
+	}
+
+	return ret;
+}
+
+enum nl80211_eht_gi ath12k_mac_eht_gi_to_nl80211_eht_gi(u8 sgi)
+{
+	enum nl80211_eht_gi ret;
+
+	switch (sgi) {
+	case RX_MSDU_START_SGI_0_8_US:
+		ret = NL80211_RATE_INFO_EHT_GI_0_8;
+		break;
+	case RX_MSDU_START_SGI_1_6_US:
+		ret = NL80211_RATE_INFO_EHT_GI_1_6;
+		break;
+	case RX_MSDU_START_SGI_3_2_US:
+		ret = NL80211_RATE_INFO_EHT_GI_3_2;
+		break;
+	default:
+		ret = NL80211_RATE_INFO_EHT_GI_0_8;
+		break;
+	}
+
+	return ret;
+}
+
+enum nl80211_eht_ru_alloc ath12k_mac_eht_ru_tones_to_nl80211_eht_ru_alloc(u16 ru_tones)
+{
+	enum nl80211_eht_ru_alloc ret;
+
+	switch (ru_tones) {
+	case 26:
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_26;
+		break;
+	case 52:
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_52;
+		break;
+	case (52 + 26):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_52P26;
+		break;
+	case 106:
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_106;
+		break;
+	case (106 + 26):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_106P26;
+		break;
+	case 242:
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_242;
+		break;
+	case 484:
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_484;
+		break;
+	case (484 + 242):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_484P242;
+		break;
+	case 996:
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996;
+		break;
+	case (996 + 484):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484;
+		break;
+	case (996 + 484 + 242):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242;
+		break;
+	case (2 * 996):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996;
+		break;
+	case (2 * 996 + 484):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484;
+		break;
+	case (3 * 996):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996;
+		break;
+	case (3 * 996 + 484):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484;
+		break;
+	case (4 * 996):
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_4x996;
+		break;
+	default:
+		ret = NL80211_RATE_INFO_EHT_RU_ALLOC_26;
+	}
+
+	return ret;
+}
+
 enum rate_info_bw
 ath12k_mac_bw_to_mac80211_bw(enum ath12k_supported_bw bw)
 {
@@ -2487,8 +2603,12 @@  static void ath12k_peer_assoc_prepare(struct ath12k *ar,
 				      struct ath12k_wmi_peer_assoc_arg *arg,
 				      bool reassoc)
 {
+	struct ath12k_sta *arsta;
+
 	lockdep_assert_held(&ar->conf_mutex);
 
+	arsta = ath12k_sta_to_arsta(sta);
+
 	memset(arg, 0, sizeof(*arg));
 
 	reinit_completion(&ar->peer_assoc_done);
@@ -2505,6 +2625,8 @@  static void ath12k_peer_assoc_prepare(struct ath12k *ar,
 	ath12k_peer_assoc_h_phymode(ar, vif, sta, arg);
 	ath12k_peer_assoc_h_smps(sta, arg);
 
+	arsta->peer_nss = arg->peer_nss;
+
 	/* TODO: amsdu_disable req? */
 }
 
@@ -8073,6 +8195,8 @@  static void ath12k_mac_op_sta_statistics(struct ieee80211_hw *hw,
 		sinfo->txrate.he_gi = arsta->txrate.he_gi;
 		sinfo->txrate.he_dcm = arsta->txrate.he_dcm;
 		sinfo->txrate.he_ru_alloc = arsta->txrate.he_ru_alloc;
+		sinfo->txrate.eht_gi = arsta->txrate.eht_gi;
+		sinfo->txrate.eht_ru_alloc = arsta->txrate.eht_ru_alloc;
 	}
 	sinfo->txrate.flags = arsta->txrate.flags;
 	sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h
index 69fd282b9dd3..b22321aadc84 100644
--- a/drivers/net/wireless/ath/ath12k/mac.h
+++ b/drivers/net/wireless/ath/ath12k/mac.h
@@ -81,5 +81,7 @@  int ath12k_mac_rfkill_config(struct ath12k *ar);
 int ath12k_mac_wait_tx_complete(struct ath12k *ar);
 void ath12k_mac_handle_beacon(struct ath12k *ar, struct sk_buff *skb);
 void ath12k_mac_handle_beacon_miss(struct ath12k *ar, u32 vdev_id);
-
+enum nl80211_he_ru_alloc ath12k_mac_he_ru_tones_to_nl80211_he_ru_alloc(u16 ru_tones);
+enum nl80211_eht_ru_alloc ath12k_mac_eht_ru_tones_to_nl80211_eht_ru_alloc(u16 ru_tones);
+enum nl80211_eht_gi ath12k_mac_eht_gi_to_nl80211_eht_gi(u8 sgi);
 #endif