diff mbox series

[v3,7/8] wifi: wfx: allow to send frames during ROC

Message ID 20231004172843.195332-8-jerome.pouiller@silabs.com
State New
Headers show
Series wfx: implement Remain On Channel | expand

Commit Message

Jérôme Pouiller Oct. 4, 2023, 5:28 p.m. UTC
Until now, all the traffic was blocked during scan operation. However,
scan operation is going to be used to implement Remain On Channel (ROC).
In this case, special frames (marked with IEEE80211_TX_CTL_TX_OFFCHAN)
must be sent during the operation.

These frames need to be sent on the virtual interface #2. Until now,
this interface was only used by the device for internal purpose. But
since API 3.9, it can be used to send data during scan operation (we
hijack the scan process to implement ROC).

Thus, we need to change a bit the way we match the frames with the
interface.

Fortunately, the frames received during the scan are marked with the
correct interface number. So there is no change to do on this part.

Signed-off-by: Jérôme Pouiller <jerome.pouiller@silabs.com>
---
 drivers/net/wireless/silabs/wfx/data_tx.c | 36 ++++++++++++++++-----
 drivers/net/wireless/silabs/wfx/data_tx.h |  2 ++
 drivers/net/wireless/silabs/wfx/queue.c   | 38 ++++++++++++++++++-----
 drivers/net/wireless/silabs/wfx/queue.h   |  1 +
 4 files changed, 62 insertions(+), 15 deletions(-)

Comments

A. Sverdlin Nov. 26, 2024, 7:27 a.m. UTC | #1
Hi Jerome,

I've just got this (with CONFIG_PROVE_LOCKING) in idle,
without any traffic on the wlan interface:

[119012.104386] INFO: trying to register non-static key.
[119012.109730] The code is fine but needs lockdep annotation, or maybe
[119012.116313] you didn't initialize this object before use?
[119012.121992] turning off the locking correctness validator.
[119012.127778] CPU: 0 UID: 0 PID: 336 Comm: kworker/0:1H Tainted: G           O       6.11.0
[119012.139802] Tainted: [O]=OOT_MODULE
[119012.148547] Workqueue: wfx_bh_wq bh_work [wfx]
[119012.153591] Call trace:
[119012.156282]  dump_backtrace+0xa0/0x128
[119012.160340]  show_stack+0x20/0x38
[119012.163939]  dump_stack_lvl+0x8c/0xd0
[119012.167890]  dump_stack+0x18/0x28
[119012.171472]  register_lock_class+0x4b0/0x4c0
[119012.176033]  __lock_acquire+0xa0/0x1f50
[119012.180148]  lock_acquire+0x1f8/0x330
[119012.184083]  _raw_spin_lock_irqsave+0x68/0x98
[119012.188731]  skb_dequeue+0x2c/0xa8
[119012.192390]  wfx_tx_queues_get+0x20c/0x5a0 [wfx]
[119012.197525]  bh_work+0x3bc/0x950 [wfx]
[119012.201749]  process_one_work+0x234/0x658
[119012.206040]  worker_thread+0x1bc/0x360
[119012.210056]  kthread+0x124/0x130
[119012.213535]  ret_from_fork+0x10/0x20

the uptime is pretty high, as you can see, it's not in startup.
But I've noticed that NetworkManeger closes and opens
the interface from time to time, which leads to 
wfx_remove_interface() of wvif->id 0 and consequent
wfx_add_interface() of wvif->id 0. And only 0, which seems to be relevant,
see below...

On Wed, 2023-10-04 at 19:28 +0200, Jérôme Pouiller wrote:
> Until now, all the traffic was blocked during scan operation. However,
> scan operation is going to be used to implement Remain On Channel (ROC).
> In this case, special frames (marked with IEEE80211_TX_CTL_TX_OFFCHAN)
> must be sent during the operation.
> 
> These frames need to be sent on the virtual interface #2. Until now,
> this interface was only used by the device for internal purpose. But
> since API 3.9, it can be used to send data during scan operation (we
> hijack the scan process to implement ROC).
> 
> Thus, we need to change a bit the way we match the frames with the
> interface.
> 
> Fortunately, the frames received during the scan are marked with the
> correct interface number. So there is no change to do on this part.
> 
> Signed-off-by: Jérôme Pouiller <jerome.pouiller@silabs.com>
> ---
>  drivers/net/wireless/silabs/wfx/data_tx.c | 36 ++++++++++++++++-----
>  drivers/net/wireless/silabs/wfx/data_tx.h |  2 ++
>  drivers/net/wireless/silabs/wfx/queue.c   | 38 ++++++++++++++++++-----
>  drivers/net/wireless/silabs/wfx/queue.h   |  1 +
>  4 files changed, 62 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/wireless/silabs/wfx/data_tx.c b/drivers/net/wireless/silabs/wfx/data_tx.c
> index ce2b5dcfd8d89..e8b6d41f55196 100644
> --- a/drivers/net/wireless/silabs/wfx/data_tx.c
> +++ b/drivers/net/wireless/silabs/wfx/data_tx.c
> @@ -226,6 +226,18 @@ struct wfx_hif_req_tx *wfx_skb_txreq(struct sk_buff *skb)
>  	return req;
>  }
>  
> +struct wfx_vif *wfx_skb_wvif(struct wfx_dev *wdev, struct sk_buff *skb)
> +{
> +	struct wfx_tx_priv *tx_priv = wfx_skb_tx_priv(skb);
> +	struct wfx_hif_msg *hif = (struct wfx_hif_msg *)skb->data;
> +
> +	if (tx_priv->vif_id != hif->interface && hif->interface != 2) {
> +		dev_err(wdev->dev, "corrupted skb");
> +		return wdev_to_wvif(wdev, hif->interface);
> +	}
> +	return wdev_to_wvif(wdev, tx_priv->vif_id);
> +}
> +
>  static u8 wfx_tx_get_link_id(struct wfx_vif *wvif, struct ieee80211_sta *sta,
>  			     struct ieee80211_hdr *hdr)
>  {
> @@ -352,6 +364,7 @@ static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, struct
>  	/* Fill tx_priv */
>  	tx_priv = (struct wfx_tx_priv *)tx_info->rate_driver_data;
>  	tx_priv->icv_size = wfx_tx_get_icv_len(hw_key);
> +	tx_priv->vif_id = wvif->id;
>  
>  	/* Fill hif_msg */
>  	WARN(skb_headroom(skb) < wmsg_len, "not enough space in skb");
> @@ -362,7 +375,10 @@ static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, struct
>  	hif_msg = (struct wfx_hif_msg *)skb->data;
>  	hif_msg->len = cpu_to_le16(skb->len);
>  	hif_msg->id = HIF_REQ_ID_TX;
> -	hif_msg->interface = wvif->id;
> +	if (tx_info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)
> +		hif_msg->interface = 2;

I never actually see wfx_add_interface() with id 2...
Which leaves all the queues uninitialized....

> +	else
> +		hif_msg->interface = wvif->id;
>  	if (skb->len > le16_to_cpu(wvif->wdev->hw_caps.size_inp_ch_buf)) {
>  		dev_warn(wvif->wdev->dev,
>  			 "requested frame size (%d) is larger than maximum supported (%d)\n",
> @@ -383,9 +399,15 @@ static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, struct
>  	req->fc_offset = offset;
>  	/* Queue index are inverted between firmware and Linux */
>  	req->queue_id = 3 - queue_id;
> -	req->peer_sta_id = wfx_tx_get_link_id(wvif, sta, hdr);
> -	req->retry_policy_index = wfx_tx_get_retry_policy_id(wvif, tx_info);
> -	req->frame_format = wfx_tx_get_frame_format(tx_info);
> +	if (tx_info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) {
> +		req->peer_sta_id = HIF_LINK_ID_NOT_ASSOCIATED;
> +		req->retry_policy_index = HIF_TX_RETRY_POLICY_INVALID;
> +		req->frame_format = HIF_FRAME_FORMAT_NON_HT;
> +	} else {
> +		req->peer_sta_id = wfx_tx_get_link_id(wvif, sta, hdr);
> +		req->retry_policy_index = wfx_tx_get_retry_policy_id(wvif, tx_info);
> +		req->frame_format = wfx_tx_get_frame_format(tx_info);
> +	}
>  	if (tx_info->driver_rates[0].flags & IEEE80211_TX_RC_SHORT_GI)
>  		req->short_gi = 1;
>  	if (tx_info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM)
> @@ -501,7 +523,7 @@ void wfx_tx_confirm_cb(struct wfx_dev *wdev, const struct wfx_hif_cnf_tx *arg)
>  	}
>  	tx_info = IEEE80211_SKB_CB(skb);
>  	tx_priv = wfx_skb_tx_priv(skb);
> -	wvif = wdev_to_wvif(wdev, ((struct wfx_hif_msg *)skb->data)->interface);
> +	wvif = wfx_skb_wvif(wdev, skb);
>  	WARN_ON(!wvif);
>  	if (!wvif)
>  		return;
> @@ -563,7 +585,6 @@ void wfx_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, b
>  	struct wfx_dev *wdev = hw->priv;
>  	struct sk_buff_head dropped;
>  	struct wfx_vif *wvif;
> -	struct wfx_hif_msg *hif;
>  	struct sk_buff *skb;
>  
>  	skb_queue_head_init(&dropped);
> @@ -579,8 +600,7 @@ void wfx_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, b
>  	if (wdev->chip_frozen)
>  		wfx_pending_drop(wdev, &dropped);
>  	while ((skb = skb_dequeue(&dropped)) != NULL) {
> -		hif = (struct wfx_hif_msg *)skb->data;
> -		wvif = wdev_to_wvif(wdev, hif->interface);
> +		wvif = wfx_skb_wvif(wdev, skb);
>  		ieee80211_tx_info_clear_status(IEEE80211_SKB_CB(skb));
>  		wfx_skb_dtor(wvif, skb);
>  	}
> diff --git a/drivers/net/wireless/silabs/wfx/data_tx.h b/drivers/net/wireless/silabs/wfx/data_tx.h
> index a5b80eacce39a..0621b82103bef 100644
> --- a/drivers/net/wireless/silabs/wfx/data_tx.h
> +++ b/drivers/net/wireless/silabs/wfx/data_tx.h
> @@ -36,6 +36,7 @@ struct wfx_tx_policy_cache {
>  struct wfx_tx_priv {
>  	ktime_t xmit_timestamp;
>  	unsigned char icv_size;
> +	unsigned char vif_id;
>  };
>  
>  void wfx_tx_policy_init(struct wfx_vif *wvif);
> @@ -47,5 +48,6 @@ void wfx_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, b
>  
>  struct wfx_tx_priv *wfx_skb_tx_priv(struct sk_buff *skb);
>  struct wfx_hif_req_tx *wfx_skb_txreq(struct sk_buff *skb);
> +struct wfx_vif *wfx_skb_wvif(struct wfx_dev *wdev, struct sk_buff *skb);
>  
>  #endif
> diff --git a/drivers/net/wireless/silabs/wfx/queue.c b/drivers/net/wireless/silabs/wfx/queue.c
> index 37f492e5d3bea..e61b86f211e53 100644
> --- a/drivers/net/wireless/silabs/wfx/queue.c
> +++ b/drivers/net/wireless/silabs/wfx/queue.c
> @@ -68,13 +68,16 @@ void wfx_tx_queues_init(struct wfx_vif *wvif)
>  	for (i = 0; i < IEEE80211_NUM_ACS; ++i) {
>  		skb_queue_head_init(&wvif->tx_queue[i].normal);
>  		skb_queue_head_init(&wvif->tx_queue[i].cab);
> +		skb_queue_head_init(&wvif->tx_queue[i].offchan);
>  		wvif->tx_queue[i].priority = priorities[i];
>  	}
>  }
>  
>  bool wfx_tx_queue_empty(struct wfx_vif *wvif, struct wfx_queue *queue)
>  {
> -	return skb_queue_empty_lockless(&queue->normal) && skb_queue_empty_lockless(&queue->cab);
> +	return skb_queue_empty_lockless(&queue->normal) &&
> +	       skb_queue_empty_lockless(&queue->cab) &&
> +	       skb_queue_empty_lockless(&queue->offchan);
>  }
>  
>  void wfx_tx_queues_check_empty(struct wfx_vif *wvif)
> @@ -103,8 +106,9 @@ static void __wfx_tx_queue_drop(struct wfx_vif *wvif,
>  void wfx_tx_queue_drop(struct wfx_vif *wvif, struct wfx_queue *queue,
>  		       struct sk_buff_head *dropped)
>  {
> -	__wfx_tx_queue_drop(wvif, &queue->cab, dropped);
>  	__wfx_tx_queue_drop(wvif, &queue->normal, dropped);
> +	__wfx_tx_queue_drop(wvif, &queue->cab, dropped);
> +	__wfx_tx_queue_drop(wvif, &queue->offchan, dropped);
>  	wake_up(&wvif->wdev->tx_dequeue);
>  }
>  
> @@ -113,7 +117,9 @@ void wfx_tx_queues_put(struct wfx_vif *wvif, struct sk_buff *skb)
>  	struct wfx_queue *queue = &wvif->tx_queue[skb_get_queue_mapping(skb)];
>  	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
>  
> -	if (tx_info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM)
> +	if (tx_info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)
> +		skb_queue_tail(&queue->offchan, skb);
> +	else if (tx_info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM)
>  		skb_queue_tail(&queue->cab, skb);
>  	else
>  		skb_queue_tail(&queue->normal, skb);
> @@ -123,13 +129,11 @@ void wfx_pending_drop(struct wfx_dev *wdev, struct sk_buff_head *dropped)
>  {
>  	struct wfx_queue *queue;
>  	struct wfx_vif *wvif;
> -	struct wfx_hif_msg *hif;
>  	struct sk_buff *skb;
>  
>  	WARN(!wdev->chip_frozen, "%s should only be used to recover a frozen device", __func__);
>  	while ((skb = skb_dequeue(&wdev->tx_pending)) != NULL) {
> -		hif = (struct wfx_hif_msg *)skb->data;
> -		wvif = wdev_to_wvif(wdev, hif->interface);
> +		wvif = wfx_skb_wvif(wdev, skb);
>  		if (wvif) {
>  			queue = &wvif->tx_queue[skb_get_queue_mapping(skb)];
>  			WARN_ON(skb_get_queue_mapping(skb) > 3);
> @@ -155,7 +159,7 @@ struct sk_buff *wfx_pending_get(struct wfx_dev *wdev, u32 packet_id)
>  		if (req->packet_id != packet_id)
>  			continue;
>  		spin_unlock_bh(&wdev->tx_pending.lock);
> -		wvif = wdev_to_wvif(wdev, hif->interface);
> +		wvif = wfx_skb_wvif(wdev, skb);
>  		if (wvif) {
>  			queue = &wvif->tx_queue[skb_get_queue_mapping(skb)];
>  			WARN_ON(skb_get_queue_mapping(skb) > 3);
> @@ -246,6 +250,26 @@ static struct sk_buff *wfx_tx_queues_get_skb(struct wfx_dev *wdev)
>  		}
>  	}
>  
> +	wvif = NULL;
> +	while ((wvif = wvif_iterate(wdev, wvif)) != NULL) {
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Is there actually any point in iterating over wvifs here?
It has been done right before and all the queues are now sorted in
the common "queues"?

> +		for (i = 0; i < num_queues; i++) {
> +			skb = skb_dequeue(&queues[i]->offchan);
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Nevertheless, the lockdep splat comes from here, because
wfx_tx_queues_init() has never been called for wvif->id == 2.

What was your original plan for this to happen?
Do we need an explicit analogue of wfx_add_interface() for vif->id 2 somewhere?

I still have not come with a reproducer yet, but you definitely have more
insight into this code, maybe this will ring some bells on your side...

PS. It's v6.11, even though it's exactly the same splan as in
"staging: wfx: fix potential use before init", but the patch in indeed inside.

> +			if (!skb)
> +				continue;
> +			hif = (struct wfx_hif_msg *)skb->data;
> +			/* Offchan frames are assigned to a special interface.
> +			 * The only interface allowed to send data during scan.
> +			 */
> +			WARN_ON(hif->interface != 2);
> +			atomic_inc(&queues[i]->pending_frames);
> +			trace_queues_stats(wdev, queues[i]);
> +			return skb;
> +		}
> +	}
> +
> +	if (mutex_is_locked(&wdev->scan_lock))
> +		return NULL;
> +
>  	wvif = NULL;
>  	while ((wvif = wvif_iterate(wdev, wvif)) != NULL) {
>  		if (!wvif->after_dtim_tx_allowed)
> diff --git a/drivers/net/wireless/silabs/wfx/queue.h b/drivers/net/wireless/silabs/wfx/queue.h
> index 4731debca93d2..6857fbd60fbad 100644
> --- a/drivers/net/wireless/silabs/wfx/queue.h
> +++ b/drivers/net/wireless/silabs/wfx/queue.h
> @@ -17,6 +17,7 @@ struct wfx_vif;
>  struct wfx_queue {
>  	struct sk_buff_head normal;
>  	struct sk_buff_head cab; /* Content After (DTIM) Beacon */
> +	struct sk_buff_head offchan;
>  	atomic_t            pending_frames;
>  	int                 priority;
>  };
Jérôme Pouiller Nov. 26, 2024, 2:45 p.m. UTC | #2
On Tuesday 26 November 2024 08:27:43 CET Sverdlin, Alexander wrote:
> 
> Hi Jerome,
> 
> I've just got this (with CONFIG_PROVE_LOCKING) in idle,
> without any traffic on the wlan interface:
> 
> [119012.104386] INFO: trying to register non-static key.
> [119012.109730] The code is fine but needs lockdep annotation, or maybe
> [119012.116313] you didn't initialize this object before use?
> [119012.121992] turning off the locking correctness validator.
> [119012.127778] CPU: 0 UID: 0 PID: 336 Comm: kworker/0:1H Tainted: G           O       6.11.0
> [119012.139802] Tainted: [O]=OOT_MODULE
> [119012.148547] Workqueue: wfx_bh_wq bh_work [wfx]
> [119012.153591] Call trace:
> [119012.156282]  dump_backtrace+0xa0/0x128
> [119012.160340]  show_stack+0x20/0x38
> [119012.163939]  dump_stack_lvl+0x8c/0xd0
> [119012.167890]  dump_stack+0x18/0x28
> [119012.171472]  register_lock_class+0x4b0/0x4c0
> [119012.176033]  __lock_acquire+0xa0/0x1f50
> [119012.180148]  lock_acquire+0x1f8/0x330
> [119012.184083]  _raw_spin_lock_irqsave+0x68/0x98
> [119012.188731]  skb_dequeue+0x2c/0xa8
> [119012.192390]  wfx_tx_queues_get+0x20c/0x5a0 [wfx]
> [119012.197525]  bh_work+0x3bc/0x950 [wfx]
> [119012.201749]  process_one_work+0x234/0x658
> [119012.206040]  worker_thread+0x1bc/0x360
> [119012.210056]  kthread+0x124/0x130
> [119012.213535]  ret_from_fork+0x10/0x20
> 
> the uptime is pretty high, as you can see, it's not in startup.
> But I've noticed that NetworkManeger closes and opens
> the interface from time to time, which leads to
> wfx_remove_interface() of wvif->id 0 and consequent
> wfx_add_interface() of wvif->id 0. And only 0, which seems to be relevant,
> see below...

Thank you for the report. Let's dive into this.


> On Wed, 2023-10-04 at 19:28 +0200, Jérôme Pouiller wrote:
> > Until now, all the traffic was blocked during scan operation. However,
> > scan operation is going to be used to implement Remain On Channel (ROC).
> > In this case, special frames (marked with IEEE80211_TX_CTL_TX_OFFCHAN)
> > must be sent during the operation.
> >
> > These frames need to be sent on the virtual interface #2. Until now,
> > this interface was only used by the device for internal purpose. But
> > since API 3.9, it can be used to send data during scan operation (we
> > hijack the scan process to implement ROC).
> >
> > Thus, we need to change a bit the way we match the frames with the
> > interface.
> >
> > Fortunately, the frames received during the scan are marked with the
> > correct interface number. So there is no change to do on this part.
> >
> > Signed-off-by: Jérôme Pouiller <jerome.pouiller@silabs.com>
> > ---
> >  drivers/net/wireless/silabs/wfx/data_tx.c | 36 ++++++++++++++++-----
> >  drivers/net/wireless/silabs/wfx/data_tx.h |  2 ++
> >  drivers/net/wireless/silabs/wfx/queue.c   | 38 ++++++++++++++++++-----
> >  drivers/net/wireless/silabs/wfx/queue.h   |  1 +
> >  4 files changed, 62 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/net/wireless/silabs/wfx/data_tx.c b/drivers/net/wireless/silabs/wfx/data_tx.c
> > index ce2b5dcfd8d89..e8b6d41f55196 100644
> > --- a/drivers/net/wireless/silabs/wfx/data_tx.c
> > +++ b/drivers/net/wireless/silabs/wfx/data_tx.c
> > @@ -226,6 +226,18 @@ struct wfx_hif_req_tx *wfx_skb_txreq(struct sk_buff *skb)
> >       return req;
> >  }
> >
> > +struct wfx_vif *wfx_skb_wvif(struct wfx_dev *wdev, struct sk_buff *skb)
> > +{
> > +     struct wfx_tx_priv *tx_priv = wfx_skb_tx_priv(skb);
> > +     struct wfx_hif_msg *hif = (struct wfx_hif_msg *)skb->data;
> > +
> > +     if (tx_priv->vif_id != hif->interface && hif->interface != 2) {
> > +             dev_err(wdev->dev, "corrupted skb");
> > +             return wdev_to_wvif(wdev, hif->interface);
> > +     }
> > +     return wdev_to_wvif(wdev, tx_priv->vif_id);
> > +}
> > +
> >  static u8 wfx_tx_get_link_id(struct wfx_vif *wvif, struct ieee80211_sta *sta,
> >                            struct ieee80211_hdr *hdr)
> >  {
> > @@ -352,6 +364,7 @@ static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, struct
> >       /* Fill tx_priv */
> >       tx_priv = (struct wfx_tx_priv *)tx_info->rate_driver_data;
> >       tx_priv->icv_size = wfx_tx_get_icv_len(hw_key);
> > +     tx_priv->vif_id = wvif->id;
> >
> >       /* Fill hif_msg */
> >       WARN(skb_headroom(skb) < wmsg_len, "not enough space in skb");
> > @@ -362,7 +375,10 @@ static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, struct
> >       hif_msg = (struct wfx_hif_msg *)skb->data;
> >       hif_msg->len = cpu_to_le16(skb->len);
> >       hif_msg->id = HIF_REQ_ID_TX;
> > -     hif_msg->interface = wvif->id;
> > +     if (tx_info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)
> > +             hif_msg->interface = 2;
> 
> I never actually see wfx_add_interface() with id 2...
> Which leaves all the queues uninitialized....

This is expected. Interface 2 is not a real interface. You can consider
it as a special queue (for offchannel packets) on the device.

[...]

> > @@ -246,6 +250,26 @@ static struct sk_buff *wfx_tx_queues_get_skb(struct wfx_dev *wdev)
> >               }
> >       }
> >
> > +     wvif = NULL;
> > +     while ((wvif = wvif_iterate(wdev, wvif)) != NULL) {
>         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> Is there actually any point in iterating over wvifs here?
> It has been done right before and all the queues are now sorted in
> the common "queues"?

hmm... you're probably right.

> 
> > +             for (i = 0; i < num_queues; i++) {
> > +                     skb = skb_dequeue(&queues[i]->offchan);
>                               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> Nevertheless, the lockdep splat comes from here, because
> wfx_tx_queues_init() has never been called for wvif->id == 2.
>
> What was your original plan for this to happen?
> Do we need an explicit analogue of wfx_add_interface() for vif->id 2 somewhere?
> 
> I still have not come with a reproducer yet, but you definitely have more
> insight into this code, maybe this will ring some bells on your side...
> 
> PS. It's v6.11, even though it's exactly the same splan as in
> "staging: wfx: fix potential use before init", but the patch in indeed inside.

Yes, probably a very similar issue to "staging: wfx: fix potential use 
before init". I don't believe the issue is related to wvif->id == 2.

You have only produced this issue once, that's it?

I wonder why this does not happen with queues[i]->normal and
queues[i]->cab. Is it because queues[i]->offchan is the first to be
checked? Or mutex_is_locked(&wdev->scan_lock) has an impact in the
process?

In wfx_add_interface(), the list of wvif is protected by conf_lock.
However, wfx_tx_queues_get_skb() is not protected by conf_lock. We
initialize struct wvif before to add it to the wvif list and we
consider it is sufficient. However, after reading memory-barriers.txt
again, it's probably a wrong assumption.


So, maybe this could fix the issue:

diff --git i/drivers/net/wireless/silabs/wfx/sta.c w/drivers/net/wireless/silabs/wfx/sta.c
index a904602f02ce..b22ea4243c0f 100644
--- i/drivers/net/wireless/silabs/wfx/sta.c
+++ w/drivers/net/wireless/silabs/wfx/sta.c
@@ -748,6 +748,7 @@ int wfx_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif)

        for (i = 0; i < ARRAY_SIZE(wdev->vif); i++) {
                if (!wdev->vif[i]) {
+                       smp_mb();
                        wdev->vif[i] = vif;
                        wvif->id = i;
                        break;


However, I am not confident in playing with memory barriers.
A. Sverdlin Nov. 26, 2024, 3:41 p.m. UTC | #3
Hi Jerome,

On Tue, 2024-11-26 at 15:45 +0100, Jérôme Pouiller wrote:
> In wfx_add_interface(), the list of wvif is protected by conf_lock.
> However, wfx_tx_queues_get_skb() is not protected by conf_lock. We
> initialize struct wvif before to add it to the wvif list and we
> consider it is sufficient. However, after reading memory-barriers.txt
> again, it's probably a wrong assumption.
> 
> 
> So, maybe this could fix the issue:
> 
> diff --git i/drivers/net/wireless/silabs/wfx/sta.c w/drivers/net/wireless/silabs/wfx/sta.c
> index a904602f02ce..b22ea4243c0f 100644
> --- i/drivers/net/wireless/silabs/wfx/sta.c
> +++ w/drivers/net/wireless/silabs/wfx/sta.c
> @@ -748,6 +748,7 @@ int wfx_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
> 
>         for (i = 0; i < ARRAY_SIZE(wdev->vif); i++) {
>                 if (!wdev->vif[i]) {
> +                       smp_mb();
>                         wdev->vif[i] = vif;
>                         wvif->id = i;
>                         break;
> 
> 
> However, I am not confident in playing with memory barriers.

yes, I'd consider the whole TX path very racy again VIF add/remove.
But this is a separate topic...
A. Sverdlin Nov. 26, 2024, 3:54 p.m. UTC | #4
Thanks for the quick reply Jerome,

On Tue, 2024-11-26 at 15:45 +0100, Jérôme Pouiller wrote:
> > > +             for (i = 0; i < num_queues; i++) {
> > > +                     skb = skb_dequeue(&queues[i]->offchan);
> >                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> > 
> > Nevertheless, the lockdep splat comes from here, because
> > wfx_tx_queues_init() has never been called for wvif->id == 2.
> > 
> > What was your original plan for this to happen?
> > Do we need an explicit analogue of wfx_add_interface() for vif->id 2 somewhere?
> > 
> > I still have not come with a reproducer yet, but you definitely have more
> > insight into this code, maybe this will ring some bells on your side...
> > 
> > PS. It's v6.11, even though it's exactly the same splan as in
> > "staging: wfx: fix potential use before init", but the patch in indeed inside.
> 
> Yes, probably a very similar issue to "staging: wfx: fix potential use 
> before init". I don't believe the issue is related to wvif->id == 2.
> 
> You have only produced this issue once, that's it?
> 
> I wonder why this does not happen with queues[i]->normal and
> queues[i]->cab. Is it because queues[i]->offchan is the first to be
> checked? Or mutex_is_locked(&wdev->scan_lock) has an impact in the
> process?
> 
> In wfx_add_interface(), the list of wvif is protected by conf_lock.
> However, wfx_tx_queues_get_skb() is not protected by conf_lock. We
> initialize struct wvif before to add it to the wvif list and we
> consider it is sufficient. However, after reading memory-barriers.txt
> again, it's probably a wrong assumption.

I've actually disassembled the stack trace exactly to offchan processing.
I have no idea why kernel sends offchan on a non-configured idle interface,
I still need to come up with a reproducer.

But as soon as there is an offchan in the sorted list of "queues" (coming
originally from VIF 0:

void wfx_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb)
{
...
        if (tx_info->control.vif)
                wvif = (struct wfx_vif *)tx_info->control.vif->drv_priv;
        else
                wvif = wvif_iterate(wdev, NULL);
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Puts any offchan into offchan not of VIF 2, but of the only VIF 0...

I think you are right, this could only be offchan queue of VIF 0.
But then it's just a race of TX workqueue against
wfx_remove_interface()/wfx_add_interface() pair (which I see regularly).

We probably need RCU in the TX path and NetLink lock in the VIF add/remove
path similar to other network drivers...
I can try to come up with a patch for this...
diff mbox series

Patch

diff --git a/drivers/net/wireless/silabs/wfx/data_tx.c b/drivers/net/wireless/silabs/wfx/data_tx.c
index ce2b5dcfd8d89..e8b6d41f55196 100644
--- a/drivers/net/wireless/silabs/wfx/data_tx.c
+++ b/drivers/net/wireless/silabs/wfx/data_tx.c
@@ -226,6 +226,18 @@  struct wfx_hif_req_tx *wfx_skb_txreq(struct sk_buff *skb)
 	return req;
 }
 
+struct wfx_vif *wfx_skb_wvif(struct wfx_dev *wdev, struct sk_buff *skb)
+{
+	struct wfx_tx_priv *tx_priv = wfx_skb_tx_priv(skb);
+	struct wfx_hif_msg *hif = (struct wfx_hif_msg *)skb->data;
+
+	if (tx_priv->vif_id != hif->interface && hif->interface != 2) {
+		dev_err(wdev->dev, "corrupted skb");
+		return wdev_to_wvif(wdev, hif->interface);
+	}
+	return wdev_to_wvif(wdev, tx_priv->vif_id);
+}
+
 static u8 wfx_tx_get_link_id(struct wfx_vif *wvif, struct ieee80211_sta *sta,
 			     struct ieee80211_hdr *hdr)
 {
@@ -352,6 +364,7 @@  static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, struct
 	/* Fill tx_priv */
 	tx_priv = (struct wfx_tx_priv *)tx_info->rate_driver_data;
 	tx_priv->icv_size = wfx_tx_get_icv_len(hw_key);
+	tx_priv->vif_id = wvif->id;
 
 	/* Fill hif_msg */
 	WARN(skb_headroom(skb) < wmsg_len, "not enough space in skb");
@@ -362,7 +375,10 @@  static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, struct
 	hif_msg = (struct wfx_hif_msg *)skb->data;
 	hif_msg->len = cpu_to_le16(skb->len);
 	hif_msg->id = HIF_REQ_ID_TX;
-	hif_msg->interface = wvif->id;
+	if (tx_info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)
+		hif_msg->interface = 2;
+	else
+		hif_msg->interface = wvif->id;
 	if (skb->len > le16_to_cpu(wvif->wdev->hw_caps.size_inp_ch_buf)) {
 		dev_warn(wvif->wdev->dev,
 			 "requested frame size (%d) is larger than maximum supported (%d)\n",
@@ -383,9 +399,15 @@  static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, struct
 	req->fc_offset = offset;
 	/* Queue index are inverted between firmware and Linux */
 	req->queue_id = 3 - queue_id;
-	req->peer_sta_id = wfx_tx_get_link_id(wvif, sta, hdr);
-	req->retry_policy_index = wfx_tx_get_retry_policy_id(wvif, tx_info);
-	req->frame_format = wfx_tx_get_frame_format(tx_info);
+	if (tx_info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) {
+		req->peer_sta_id = HIF_LINK_ID_NOT_ASSOCIATED;
+		req->retry_policy_index = HIF_TX_RETRY_POLICY_INVALID;
+		req->frame_format = HIF_FRAME_FORMAT_NON_HT;
+	} else {
+		req->peer_sta_id = wfx_tx_get_link_id(wvif, sta, hdr);
+		req->retry_policy_index = wfx_tx_get_retry_policy_id(wvif, tx_info);
+		req->frame_format = wfx_tx_get_frame_format(tx_info);
+	}
 	if (tx_info->driver_rates[0].flags & IEEE80211_TX_RC_SHORT_GI)
 		req->short_gi = 1;
 	if (tx_info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM)
@@ -501,7 +523,7 @@  void wfx_tx_confirm_cb(struct wfx_dev *wdev, const struct wfx_hif_cnf_tx *arg)
 	}
 	tx_info = IEEE80211_SKB_CB(skb);
 	tx_priv = wfx_skb_tx_priv(skb);
-	wvif = wdev_to_wvif(wdev, ((struct wfx_hif_msg *)skb->data)->interface);
+	wvif = wfx_skb_wvif(wdev, skb);
 	WARN_ON(!wvif);
 	if (!wvif)
 		return;
@@ -563,7 +585,6 @@  void wfx_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, b
 	struct wfx_dev *wdev = hw->priv;
 	struct sk_buff_head dropped;
 	struct wfx_vif *wvif;
-	struct wfx_hif_msg *hif;
 	struct sk_buff *skb;
 
 	skb_queue_head_init(&dropped);
@@ -579,8 +600,7 @@  void wfx_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, b
 	if (wdev->chip_frozen)
 		wfx_pending_drop(wdev, &dropped);
 	while ((skb = skb_dequeue(&dropped)) != NULL) {
-		hif = (struct wfx_hif_msg *)skb->data;
-		wvif = wdev_to_wvif(wdev, hif->interface);
+		wvif = wfx_skb_wvif(wdev, skb);
 		ieee80211_tx_info_clear_status(IEEE80211_SKB_CB(skb));
 		wfx_skb_dtor(wvif, skb);
 	}
diff --git a/drivers/net/wireless/silabs/wfx/data_tx.h b/drivers/net/wireless/silabs/wfx/data_tx.h
index a5b80eacce39a..0621b82103bef 100644
--- a/drivers/net/wireless/silabs/wfx/data_tx.h
+++ b/drivers/net/wireless/silabs/wfx/data_tx.h
@@ -36,6 +36,7 @@  struct wfx_tx_policy_cache {
 struct wfx_tx_priv {
 	ktime_t xmit_timestamp;
 	unsigned char icv_size;
+	unsigned char vif_id;
 };
 
 void wfx_tx_policy_init(struct wfx_vif *wvif);
@@ -47,5 +48,6 @@  void wfx_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, b
 
 struct wfx_tx_priv *wfx_skb_tx_priv(struct sk_buff *skb);
 struct wfx_hif_req_tx *wfx_skb_txreq(struct sk_buff *skb);
+struct wfx_vif *wfx_skb_wvif(struct wfx_dev *wdev, struct sk_buff *skb);
 
 #endif
diff --git a/drivers/net/wireless/silabs/wfx/queue.c b/drivers/net/wireless/silabs/wfx/queue.c
index 37f492e5d3bea..e61b86f211e53 100644
--- a/drivers/net/wireless/silabs/wfx/queue.c
+++ b/drivers/net/wireless/silabs/wfx/queue.c
@@ -68,13 +68,16 @@  void wfx_tx_queues_init(struct wfx_vif *wvif)
 	for (i = 0; i < IEEE80211_NUM_ACS; ++i) {
 		skb_queue_head_init(&wvif->tx_queue[i].normal);
 		skb_queue_head_init(&wvif->tx_queue[i].cab);
+		skb_queue_head_init(&wvif->tx_queue[i].offchan);
 		wvif->tx_queue[i].priority = priorities[i];
 	}
 }
 
 bool wfx_tx_queue_empty(struct wfx_vif *wvif, struct wfx_queue *queue)
 {
-	return skb_queue_empty_lockless(&queue->normal) && skb_queue_empty_lockless(&queue->cab);
+	return skb_queue_empty_lockless(&queue->normal) &&
+	       skb_queue_empty_lockless(&queue->cab) &&
+	       skb_queue_empty_lockless(&queue->offchan);
 }
 
 void wfx_tx_queues_check_empty(struct wfx_vif *wvif)
@@ -103,8 +106,9 @@  static void __wfx_tx_queue_drop(struct wfx_vif *wvif,
 void wfx_tx_queue_drop(struct wfx_vif *wvif, struct wfx_queue *queue,
 		       struct sk_buff_head *dropped)
 {
-	__wfx_tx_queue_drop(wvif, &queue->cab, dropped);
 	__wfx_tx_queue_drop(wvif, &queue->normal, dropped);
+	__wfx_tx_queue_drop(wvif, &queue->cab, dropped);
+	__wfx_tx_queue_drop(wvif, &queue->offchan, dropped);
 	wake_up(&wvif->wdev->tx_dequeue);
 }
 
@@ -113,7 +117,9 @@  void wfx_tx_queues_put(struct wfx_vif *wvif, struct sk_buff *skb)
 	struct wfx_queue *queue = &wvif->tx_queue[skb_get_queue_mapping(skb)];
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 
-	if (tx_info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM)
+	if (tx_info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)
+		skb_queue_tail(&queue->offchan, skb);
+	else if (tx_info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM)
 		skb_queue_tail(&queue->cab, skb);
 	else
 		skb_queue_tail(&queue->normal, skb);
@@ -123,13 +129,11 @@  void wfx_pending_drop(struct wfx_dev *wdev, struct sk_buff_head *dropped)
 {
 	struct wfx_queue *queue;
 	struct wfx_vif *wvif;
-	struct wfx_hif_msg *hif;
 	struct sk_buff *skb;
 
 	WARN(!wdev->chip_frozen, "%s should only be used to recover a frozen device", __func__);
 	while ((skb = skb_dequeue(&wdev->tx_pending)) != NULL) {
-		hif = (struct wfx_hif_msg *)skb->data;
-		wvif = wdev_to_wvif(wdev, hif->interface);
+		wvif = wfx_skb_wvif(wdev, skb);
 		if (wvif) {
 			queue = &wvif->tx_queue[skb_get_queue_mapping(skb)];
 			WARN_ON(skb_get_queue_mapping(skb) > 3);
@@ -155,7 +159,7 @@  struct sk_buff *wfx_pending_get(struct wfx_dev *wdev, u32 packet_id)
 		if (req->packet_id != packet_id)
 			continue;
 		spin_unlock_bh(&wdev->tx_pending.lock);
-		wvif = wdev_to_wvif(wdev, hif->interface);
+		wvif = wfx_skb_wvif(wdev, skb);
 		if (wvif) {
 			queue = &wvif->tx_queue[skb_get_queue_mapping(skb)];
 			WARN_ON(skb_get_queue_mapping(skb) > 3);
@@ -246,6 +250,26 @@  static struct sk_buff *wfx_tx_queues_get_skb(struct wfx_dev *wdev)
 		}
 	}
 
+	wvif = NULL;
+	while ((wvif = wvif_iterate(wdev, wvif)) != NULL) {
+		for (i = 0; i < num_queues; i++) {
+			skb = skb_dequeue(&queues[i]->offchan);
+			if (!skb)
+				continue;
+			hif = (struct wfx_hif_msg *)skb->data;
+			/* Offchan frames are assigned to a special interface.
+			 * The only interface allowed to send data during scan.
+			 */
+			WARN_ON(hif->interface != 2);
+			atomic_inc(&queues[i]->pending_frames);
+			trace_queues_stats(wdev, queues[i]);
+			return skb;
+		}
+	}
+
+	if (mutex_is_locked(&wdev->scan_lock))
+		return NULL;
+
 	wvif = NULL;
 	while ((wvif = wvif_iterate(wdev, wvif)) != NULL) {
 		if (!wvif->after_dtim_tx_allowed)
diff --git a/drivers/net/wireless/silabs/wfx/queue.h b/drivers/net/wireless/silabs/wfx/queue.h
index 4731debca93d2..6857fbd60fbad 100644
--- a/drivers/net/wireless/silabs/wfx/queue.h
+++ b/drivers/net/wireless/silabs/wfx/queue.h
@@ -17,6 +17,7 @@  struct wfx_vif;
 struct wfx_queue {
 	struct sk_buff_head normal;
 	struct sk_buff_head cab; /* Content After (DTIM) Beacon */
+	struct sk_buff_head offchan;
 	atomic_t            pending_frames;
 	int                 priority;
 };