diff mbox series

[3/4] wifi: rtw88: usb: Support RX aggregation

Message ID a549707a-09f4-4787-8111-65cc266675d6@gmail.com
State New
Headers show
Series [1/4] wifi: rtw88: Init RX burst length for 8822cu/8822bu/8821cu | expand

Commit Message

Bitterblue Smith July 28, 2024, 7:42 p.m. UTC
The chips can be configured to aggregate several frames into a single
USB transfer. Modify rtw_usb_rx_handler() to support this case.

RX aggregation improves the RX speed on certain ARM systems, like the
NanoPi NEO Core2.

Currently none of the chips are configured to aggregate frames.

Tested with RTL8811CU and RTL8723DU.

Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
---
 drivers/net/wireless/realtek/rtw88/usb.c | 57 +++++++++++++++---------
 1 file changed, 37 insertions(+), 20 deletions(-)

Comments

Sascha Hauer July 30, 2024, 6:39 a.m. UTC | #1
On Sun, Jul 28, 2024 at 10:42:32PM +0300, Bitterblue Smith wrote:
> The chips can be configured to aggregate several frames into a single
> USB transfer. Modify rtw_usb_rx_handler() to support this case.
> 
> RX aggregation improves the RX speed on certain ARM systems, like the
> NanoPi NEO Core2.
> 
> Currently none of the chips are configured to aggregate frames.
> 
> Tested with RTL8811CU and RTL8723DU.
> 
> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
> ---
>  drivers/net/wireless/realtek/rtw88/usb.c | 57 +++++++++++++++---------
>  1 file changed, 37 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
> index 73948078068f..d61be1029a7b 100644
> --- a/drivers/net/wireless/realtek/rtw88/usb.c
> +++ b/drivers/net/wireless/realtek/rtw88/usb.c
> @@ -546,11 +546,12 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>  	struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_work);
>  	struct rtw_dev *rtwdev = rtwusb->rtwdev;
>  	const struct rtw_chip_info *chip = rtwdev->chip;
> -	struct rtw_rx_pkt_stat pkt_stat;
> +	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
>  	struct ieee80211_rx_status rx_status;
> +	u32 pkt_offset, next_pkt, urb_len;
> +	struct rtw_rx_pkt_stat pkt_stat;
> +	struct sk_buff *next_skb = NULL;
>  	struct sk_buff *skb;
> -	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
> -	u32 pkt_offset;
>  	u8 *rx_desc;
>  	int limit;
>  
> @@ -559,29 +560,44 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>  		if (!skb)
>  			break;
>  
> -		rx_desc = skb->data;
> -		chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat,
> -					 &rx_status);
> -		pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
> -			     pkt_stat.shift;
> -
> -		if (pkt_stat.is_c2h) {
> -			skb_put(skb, pkt_stat.pkt_len + pkt_offset);
> -			rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
> -			continue;
> -		}
> -
>  		if (skb_queue_len(&rtwusb->rx_queue) >= RTW_USB_MAX_RXQ_LEN) {
>  			dev_dbg_ratelimited(rtwdev->dev, "failed to get rx_queue, overflow\n");
>  			dev_kfree_skb_any(skb);
>  			continue;
>  		}
>  
> -		skb_put(skb, pkt_stat.pkt_len);
> -		skb_reserve(skb, pkt_offset);
> -		rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
> -		memcpy(skb->cb, &rx_status, sizeof(rx_status));
> -		ieee80211_rx_irqsafe(rtwdev->hw, skb);
> +		urb_len = skb->len;
> +
> +		do {
> +			rx_desc = skb->data;
> +			chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat,
> +						 &rx_status);
> +			pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
> +				     pkt_stat.shift;
> +
> +			next_pkt = round_up(pkt_stat.pkt_len + pkt_offset, 8);
> +
> +			if (urb_len >= next_pkt + pkt_desc_sz)
> +				next_skb = skb_clone(skb, GFP_KERNEL);

You could add a:
			else
				next_skb = NULL;

here and drop the next_skb = NULL from the end of the loop. No
functional change, but easier to read.

> +
> +			if (pkt_stat.is_c2h) {
> +				skb_trim(skb, pkt_stat.pkt_len + pkt_offset);
> +				rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
> +			} else {
> +				skb_pull(skb, pkt_offset);
> +				skb_trim(skb, pkt_stat.pkt_len);
> +				rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
> +				memcpy(skb->cb, &rx_status, sizeof(rx_status));
> +				ieee80211_rx_irqsafe(rtwdev->hw, skb);
> +			}
> +
> +			skb = next_skb;
> +			if (skb)
> +				skb_pull(next_skb, next_pkt);

You could use skb instead of next_skb here. Both are the same, so no
functional change, just makes it a bit easier to read when you use the
same variable that you just tested for validity.

> +
> +			urb_len -= next_pkt;
> +			next_skb = NULL;
> +		} while (skb && urb_len >= pkt_desc_sz);

You can drop the urb_len >= pkt_desc_sz check. It will be exactly true
when skb is non NULL as well.

Sascha
Bitterblue Smith July 31, 2024, 4:58 p.m. UTC | #2
On 30/07/2024 09:39, Sascha Hauer wrote:
> On Sun, Jul 28, 2024 at 10:42:32PM +0300, Bitterblue Smith wrote:
>> The chips can be configured to aggregate several frames into a single
>> USB transfer. Modify rtw_usb_rx_handler() to support this case.
>>
>> RX aggregation improves the RX speed on certain ARM systems, like the
>> NanoPi NEO Core2.
>>
>> Currently none of the chips are configured to aggregate frames.
>>
>> Tested with RTL8811CU and RTL8723DU.
>>
>> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
>> ---
>>  drivers/net/wireless/realtek/rtw88/usb.c | 57 +++++++++++++++---------
>>  1 file changed, 37 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
>> index 73948078068f..d61be1029a7b 100644
>> --- a/drivers/net/wireless/realtek/rtw88/usb.c
>> +++ b/drivers/net/wireless/realtek/rtw88/usb.c
>> @@ -546,11 +546,12 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>>  	struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_work);
>>  	struct rtw_dev *rtwdev = rtwusb->rtwdev;
>>  	const struct rtw_chip_info *chip = rtwdev->chip;
>> -	struct rtw_rx_pkt_stat pkt_stat;
>> +	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
>>  	struct ieee80211_rx_status rx_status;
>> +	u32 pkt_offset, next_pkt, urb_len;
>> +	struct rtw_rx_pkt_stat pkt_stat;
>> +	struct sk_buff *next_skb = NULL;
>>  	struct sk_buff *skb;
>> -	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
>> -	u32 pkt_offset;
>>  	u8 *rx_desc;
>>  	int limit;
>>  
>> @@ -559,29 +560,44 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>>  		if (!skb)
>>  			break;
>>  
>> -		rx_desc = skb->data;
>> -		chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat,
>> -					 &rx_status);
>> -		pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
>> -			     pkt_stat.shift;
>> -
>> -		if (pkt_stat.is_c2h) {
>> -			skb_put(skb, pkt_stat.pkt_len + pkt_offset);
>> -			rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
>> -			continue;
>> -		}
>> -
>>  		if (skb_queue_len(&rtwusb->rx_queue) >= RTW_USB_MAX_RXQ_LEN) {
>>  			dev_dbg_ratelimited(rtwdev->dev, "failed to get rx_queue, overflow\n");
>>  			dev_kfree_skb_any(skb);
>>  			continue;
>>  		}
>>  
>> -		skb_put(skb, pkt_stat.pkt_len);
>> -		skb_reserve(skb, pkt_offset);
>> -		rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
>> -		memcpy(skb->cb, &rx_status, sizeof(rx_status));
>> -		ieee80211_rx_irqsafe(rtwdev->hw, skb);
>> +		urb_len = skb->len;
>> +
>> +		do {
>> +			rx_desc = skb->data;
>> +			chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat,
>> +						 &rx_status);
>> +			pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
>> +				     pkt_stat.shift;
>> +
>> +			next_pkt = round_up(pkt_stat.pkt_len + pkt_offset, 8);
>> +
>> +			if (urb_len >= next_pkt + pkt_desc_sz)
>> +				next_skb = skb_clone(skb, GFP_KERNEL);
> 
> You could add a:
> 			else
> 				next_skb = NULL;
> 
> here and drop the next_skb = NULL from the end of the loop. No
> functional change, but easier to read.
> 
>> +
>> +			if (pkt_stat.is_c2h) {
>> +				skb_trim(skb, pkt_stat.pkt_len + pkt_offset);
>> +				rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
>> +			} else {
>> +				skb_pull(skb, pkt_offset);
>> +				skb_trim(skb, pkt_stat.pkt_len);
>> +				rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
>> +				memcpy(skb->cb, &rx_status, sizeof(rx_status));
>> +				ieee80211_rx_irqsafe(rtwdev->hw, skb);
>> +			}
>> +
>> +			skb = next_skb;
>> +			if (skb)
>> +				skb_pull(next_skb, next_pkt);
> 
> You could use skb instead of next_skb here. Both are the same, so no
> functional change, just makes it a bit easier to read when you use the
> same variable that you just tested for validity.
> 
>> +
>> +			urb_len -= next_pkt;
>> +			next_skb = NULL;
>> +		} while (skb && urb_len >= pkt_desc_sz);
> 
> You can drop the urb_len >= pkt_desc_sz check. It will be exactly true
> when skb is non NULL as well.
> 
> Sascha
> 

That all sounds correct, thank you.
Bitterblue Smith July 31, 2024, 4:58 p.m. UTC | #3
On 30/07/2024 07:33, Ping-Ke Shih wrote:
> Bitterblue Smith <rtl8821cerfe2@gmail.com> wrote:
>>
>> The chips can be configured to aggregate several frames into a single
>> USB transfer. Modify rtw_usb_rx_handler() to support this case.
>>
>> RX aggregation improves the RX speed on certain ARM systems, like the
>> NanoPi NEO Core2.
>>
>> Currently none of the chips are configured to aggregate frames.
>>
>> Tested with RTL8811CU and RTL8723DU.
>>
>> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
>> ---
>>  drivers/net/wireless/realtek/rtw88/usb.c | 57 +++++++++++++++---------
>>  1 file changed, 37 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
>> index 73948078068f..d61be1029a7b 100644
>> --- a/drivers/net/wireless/realtek/rtw88/usb.c
>> +++ b/drivers/net/wireless/realtek/rtw88/usb.c
>> @@ -546,11 +546,12 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>>         struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_work);
>>         struct rtw_dev *rtwdev = rtwusb->rtwdev;
>>         const struct rtw_chip_info *chip = rtwdev->chip;
>> -       struct rtw_rx_pkt_stat pkt_stat;
>> +       u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
>>         struct ieee80211_rx_status rx_status;
>> +       u32 pkt_offset, next_pkt, urb_len;
>> +       struct rtw_rx_pkt_stat pkt_stat;
>> +       struct sk_buff *next_skb = NULL;
>>         struct sk_buff *skb;
>> -       u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
>> -       u32 pkt_offset;
>>         u8 *rx_desc;
>>         int limit;
>>
>> @@ -559,29 +560,44 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>>                 if (!skb)
>>                         break;
>>
>> -               rx_desc = skb->data;
>> -               chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat,
>> -                                        &rx_status);
>> -               pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
>> -                            pkt_stat.shift;
>> -
>> -               if (pkt_stat.is_c2h) {
>> -                       skb_put(skb, pkt_stat.pkt_len + pkt_offset);
>> -                       rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
>> -                       continue;
>> -               }
>> -
>>                 if (skb_queue_len(&rtwusb->rx_queue) >= RTW_USB_MAX_RXQ_LEN) {
>>                         dev_dbg_ratelimited(rtwdev->dev, "failed to get rx_queue, overflow\n");
>>                         dev_kfree_skb_any(skb);
>>                         continue;
>>                 }
>>
>> -               skb_put(skb, pkt_stat.pkt_len);
>> -               skb_reserve(skb, pkt_offset);
>> -               rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
>> -               memcpy(skb->cb, &rx_status, sizeof(rx_status));
>> -               ieee80211_rx_irqsafe(rtwdev->hw, skb);
>> +               urb_len = skb->len;
>> +
>> +               do {
>> +                       rx_desc = skb->data;
>> +                       chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat,
>> +                                                &rx_status);
>> +                       pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
>> +                                    pkt_stat.shift;
>> +
>> +                       next_pkt = round_up(pkt_stat.pkt_len + pkt_offset, 8);
>> +
>> +                       if (urb_len >= next_pkt + pkt_desc_sz)
>> +                               next_skb = skb_clone(skb, GFP_KERNEL);
>> +
>> +                       if (pkt_stat.is_c2h) {
>> +                               skb_trim(skb, pkt_stat.pkt_len + pkt_offset);
>> +                               rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
>> +                       } else {
>> +                               skb_pull(skb, pkt_offset);
>> +                               skb_trim(skb, pkt_stat.pkt_len);
>> +                               rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
>> +                               memcpy(skb->cb, &rx_status, sizeof(rx_status));
>> +                               ieee80211_rx_irqsafe(rtwdev->hw, skb);
>> +                       }
>> +
>> +                       skb = next_skb;
>> +                       if (skb)
>> +                               skb_pull(next_skb, next_pkt);
>> +
>> +                       urb_len -= next_pkt;
> 
> I would like a checking to prevent underflow caused by unexpected hardware data.
> 

There is a check above: if (urb_len >= next_pkt + pkt_desc_sz)
If this check fails, skb becomes NULL and the loop stops.

>> +                       next_skb = NULL;
>> +               } while (skb && urb_len >= pkt_desc_sz);>>         }
>>  }
>>
>> @@ -625,6 +641,7 @@ static void rtw_usb_read_port_complete(struct urb *urb)
>>                         if (skb)
>>                                 dev_kfree_skb_any(skb);
>>                 } else {
>> +                       skb_put(skb, urb->actual_length);
>>                         skb_queue_tail(&rtwusb->rx_queue, skb);
>>                         queue_work(rtwusb->rxwq, &rtwusb->rx_work);
>>                 }
>> --
>> 2.45.2
>
diff mbox series

Patch

diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
index 73948078068f..d61be1029a7b 100644
--- a/drivers/net/wireless/realtek/rtw88/usb.c
+++ b/drivers/net/wireless/realtek/rtw88/usb.c
@@ -546,11 +546,12 @@  static void rtw_usb_rx_handler(struct work_struct *work)
 	struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_work);
 	struct rtw_dev *rtwdev = rtwusb->rtwdev;
 	const struct rtw_chip_info *chip = rtwdev->chip;
-	struct rtw_rx_pkt_stat pkt_stat;
+	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
 	struct ieee80211_rx_status rx_status;
+	u32 pkt_offset, next_pkt, urb_len;
+	struct rtw_rx_pkt_stat pkt_stat;
+	struct sk_buff *next_skb = NULL;
 	struct sk_buff *skb;
-	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
-	u32 pkt_offset;
 	u8 *rx_desc;
 	int limit;
 
@@ -559,29 +560,44 @@  static void rtw_usb_rx_handler(struct work_struct *work)
 		if (!skb)
 			break;
 
-		rx_desc = skb->data;
-		chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat,
-					 &rx_status);
-		pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
-			     pkt_stat.shift;
-
-		if (pkt_stat.is_c2h) {
-			skb_put(skb, pkt_stat.pkt_len + pkt_offset);
-			rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
-			continue;
-		}
-
 		if (skb_queue_len(&rtwusb->rx_queue) >= RTW_USB_MAX_RXQ_LEN) {
 			dev_dbg_ratelimited(rtwdev->dev, "failed to get rx_queue, overflow\n");
 			dev_kfree_skb_any(skb);
 			continue;
 		}
 
-		skb_put(skb, pkt_stat.pkt_len);
-		skb_reserve(skb, pkt_offset);
-		rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
-		memcpy(skb->cb, &rx_status, sizeof(rx_status));
-		ieee80211_rx_irqsafe(rtwdev->hw, skb);
+		urb_len = skb->len;
+
+		do {
+			rx_desc = skb->data;
+			chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat,
+						 &rx_status);
+			pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
+				     pkt_stat.shift;
+
+			next_pkt = round_up(pkt_stat.pkt_len + pkt_offset, 8);
+
+			if (urb_len >= next_pkt + pkt_desc_sz)
+				next_skb = skb_clone(skb, GFP_KERNEL);
+
+			if (pkt_stat.is_c2h) {
+				skb_trim(skb, pkt_stat.pkt_len + pkt_offset);
+				rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb);
+			} else {
+				skb_pull(skb, pkt_offset);
+				skb_trim(skb, pkt_stat.pkt_len);
+				rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
+				memcpy(skb->cb, &rx_status, sizeof(rx_status));
+				ieee80211_rx_irqsafe(rtwdev->hw, skb);
+			}
+
+			skb = next_skb;
+			if (skb)
+				skb_pull(next_skb, next_pkt);
+
+			urb_len -= next_pkt;
+			next_skb = NULL;
+		} while (skb && urb_len >= pkt_desc_sz);
 	}
 }
 
@@ -625,6 +641,7 @@  static void rtw_usb_read_port_complete(struct urb *urb)
 			if (skb)
 				dev_kfree_skb_any(skb);
 		} else {
+			skb_put(skb, urb->actual_length);
 			skb_queue_tail(&rtwusb->rx_queue, skb);
 			queue_work(rtwusb->rxwq, &rtwusb->rx_work);
 		}