diff mbox series

[2/3] wifi: ath9k: fix races between ath9k_wmi_cmd and ath9k_wmi_ctrl_rx

Message ID 20230315202112.163012-3-pchelkin@ispras.ru
State New
Headers show
Series wifi: ath9k: deal with uninit memory | expand

Commit Message

Fedor Pchelkin March 15, 2023, 8:21 p.m. UTC
Currently, the synchronization between ath9k_wmi_cmd() and
ath9k_wmi_ctrl_rx() is exposed to races which can result, for example, not
only in wmi->cmd_rsp_buf being incorrectly initialized, but in overall
invalid behaviour of ath9k_wmi_cmd().

Consider the following scenario:

CPU0					CPU1

ath9k_wmi_cmd(...)
  mutex_lock(&wmi->op_mutex)
  ath9k_wmi_cmd_issue(...)
  wait_for_completion_timeout(...)
  ---
  timeout
  ---
  mutex_unlock(&wmi->op_mutex)
  return -ETIMEDOUT

ath9k_wmi_cmd(...)
  mutex_lock(&wmi->op_mutex)
  ath9k_wmi_cmd_issue(...)
  wait_for_completion_timeout(...)
					/* the one left after the first
					 * ath9k_wmi_cmd call
					 */
					ath9k_wmi_rsp_callback(...)
					  memcpy(...)
					  complete(&wmi->cmd_wait);
  /* Awakened by the bogus callback
   * => invalid return
   */
  mutex_unlock(&wmi->op_mutex)
  return 0

This may occur even on uniprocessor machines, and in SMP case the races
may be even more intricate.

To fix this, move the contents of ath9k_wmi_rsp_callback() under wmi_lock
inside ath9k_wmi_ctrl_rx() so that the wmi->cmd_wait can be completed only
for initially designated wmi_cmd call, otherwise the path would be
rejected with last_seq_id check.

Also move recording the rsp buffer and length into ath9k_wmi_cmd_issue()
under the same wmi_lock with last_seq_id update to avoid their racy
changes.

Found by Linux Verification Center (linuxtesting.org) with Syzkaller.

Fixes: fb9987d0f748 ("ath9k_htc: Support for AR9271 chipset.")
Reported-and-tested-by: syzbot+df61b36319e045c00a08@syzkaller.appspotmail.com
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
---
This patch depends on [PATCH 1/3] wifi: ath9k: avoid referencing uninit
memory in ath9k_wmi_ctrl_rx

 drivers/net/wireless/ath/ath9k/wmi.c | 48 ++++++++++++++--------------
 1 file changed, 24 insertions(+), 24 deletions(-)

Comments

Fedor Pchelkin April 24, 2023, 7:11 p.m. UTC | #1
This problem is realy subtle, I suppose. In the v2 commit info, which I'll
send in the next mail, the race condition is described which can lead to
invalid behaviour.

Couldn't reproduce that particular problem on real hardware, but if
force timeouts to wmi cmd completions, local KMSan catches some uninit
values.

The synchronization between ath9k_wmi_cmd and ath9k_wmi_ctrl_rx on
timeouts is good, especially after 8a2f35b98306 ("wifi: ath9k: Fix
potential stack-out-of-bounds write in ath9k_wmi_rsp_callback()").

And I think the only place where the fuzzer can provoke failure is when
wmi->last_seq_id in callback is checked before it is assigned zero inside
ath9k_wmi_cmd() during timeout exit. This scenario is more thoroughly
described in patch v2.

Well, the issue seems to be rare and I don't know how to properly test it
on real hardware.

I've made some checks on a basic driver workflow, and there weren't any
stalls or explicit failures, and the patch seems to close that tiny race
condition window. But, anyway, it requires more discussion.
diff mbox series

Patch

diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c
index 2e7c361b62f5..99a91bbaace9 100644
--- a/drivers/net/wireless/ath/ath9k/wmi.c
+++ b/drivers/net/wireless/ath/ath9k/wmi.c
@@ -200,20 +200,6 @@  void ath9k_fatal_work(struct work_struct *work)
 	ath9k_htc_reset(priv);
 }
 
-static void ath9k_wmi_rsp_callback(struct wmi *wmi, struct sk_buff *skb)
-{
-	skb_pull(skb, sizeof(struct wmi_cmd_hdr));
-
-	/* Once again validate the SKB. */
-	if (unlikely(skb->len < wmi->cmd_rsp_len))
-		return;
-
-	if (wmi->cmd_rsp_buf != NULL && wmi->cmd_rsp_len != 0)
-		memcpy(wmi->cmd_rsp_buf, skb->data, wmi->cmd_rsp_len);
-
-	complete(&wmi->cmd_wait);
-}
-
 static void ath9k_wmi_ctrl_rx(void *priv, struct sk_buff *skb,
 			      enum htc_endpoint_id epid)
 {
@@ -242,14 +228,26 @@  static void ath9k_wmi_ctrl_rx(void *priv, struct sk_buff *skb,
 
 	/* Check if there has been a timeout. */
 	spin_lock_irqsave(&wmi->wmi_lock, flags);
-	if (be16_to_cpu(hdr->seq_no) != wmi->last_seq_id) {
+	if (be16_to_cpu(hdr->seq_no) != wmi->last_seq_id ||
+	    be16_to_cpu(hdr->seq_no) == 0) {
+		spin_unlock_irqrestore(&wmi->wmi_lock, flags);
+		goto free_skb;
+	}
+
+	/* Next, process WMI command response */
+	skb_pull(skb, sizeof(struct wmi_cmd_hdr));
+
+	/* Once again validate the SKB. */
+	if (unlikely(skb->len < wmi->cmd_rsp_len)) {
 		spin_unlock_irqrestore(&wmi->wmi_lock, flags);
 		goto free_skb;
 	}
-	spin_unlock_irqrestore(&wmi->wmi_lock, flags);
 
-	/* WMI command response */
-	ath9k_wmi_rsp_callback(wmi, skb);
+	if (wmi->cmd_rsp_buf != NULL && wmi->cmd_rsp_len != 0)
+		memcpy(wmi->cmd_rsp_buf, skb->data, wmi->cmd_rsp_len);
+
+	complete(&wmi->cmd_wait);
+	spin_unlock_irqrestore(&wmi->wmi_lock, flags);
 
 free_skb:
 	kfree_skb(skb);
@@ -287,7 +285,8 @@  int ath9k_wmi_connect(struct htc_target *htc, struct wmi *wmi,
 
 static int ath9k_wmi_cmd_issue(struct wmi *wmi,
 			       struct sk_buff *skb,
-			       enum wmi_cmd_id cmd, u16 len)
+			       enum wmi_cmd_id cmd, u16 len,
+			       u8 *rsp_buf, u32 rsp_len)
 {
 	struct wmi_cmd_hdr *hdr;
 	unsigned long flags;
@@ -297,6 +296,11 @@  static int ath9k_wmi_cmd_issue(struct wmi *wmi,
 	hdr->seq_no = cpu_to_be16(++wmi->tx_seq_id);
 
 	spin_lock_irqsave(&wmi->wmi_lock, flags);
+
+	/* record the rsp buffer and length */
+	wmi->cmd_rsp_buf = rsp_buf;
+	wmi->cmd_rsp_len = rsp_len;
+
 	wmi->last_seq_id = wmi->tx_seq_id;
 	spin_unlock_irqrestore(&wmi->wmi_lock, flags);
 
@@ -337,11 +341,7 @@  int ath9k_wmi_cmd(struct wmi *wmi, enum wmi_cmd_id cmd_id,
 		goto out;
 	}
 
-	/* record the rsp buffer and length */
-	wmi->cmd_rsp_buf = rsp_buf;
-	wmi->cmd_rsp_len = rsp_len;
-
-	ret = ath9k_wmi_cmd_issue(wmi, skb, cmd_id, cmd_len);
+	ret = ath9k_wmi_cmd_issue(wmi, skb, cmd_id, cmd_len, rsp_buf, rsp_len);
 	if (ret)
 		goto out;