@@ -602,7 +602,6 @@ mt76_dma_rx_poll(struct napi_struct *napi, int budget)
dev = container_of(napi->dev, struct mt76_dev, napi_dev);
qid = napi - dev->napi;
- local_bh_disable();
rcu_read_lock();
do {
@@ -612,7 +611,6 @@ mt76_dma_rx_poll(struct napi_struct *napi, int budget)
} while (cur && done < budget);
rcu_read_unlock();
- local_bh_enable();
if (done < budget && napi_complete(napi))
dev->drv->rx_poll_complete(dev, qid);
@@ -626,6 +624,10 @@ mt76_dma_init(struct mt76_dev *dev)
int i;
init_dummy_netdev(&dev->napi_dev);
+ init_dummy_netdev(&dev->tx_napi_dev);
+ snprintf(dev->napi_dev.name, sizeof(dev->napi_dev.name), "%s",
+ wiphy_name(dev->hw->wiphy));
+ dev->napi_dev.threaded = 1;
mt76_for_each_q_rx(dev, i) {
netif_napi_add(&dev->napi_dev, &dev->napi[i], mt76_dma_rx_poll,
@@ -631,6 +631,7 @@ struct mt76_dev {
struct mt76_mcu mcu;
struct net_device napi_dev;
+ struct net_device tx_napi_dev;
spinlock_t rx_lock;
struct napi_struct napi[__MT_RXQ_MAX];
struct sk_buff_head rx_skb[__MT_RXQ_MAX];
@@ -223,7 +223,7 @@ int mt7603_dma_init(struct mt7603_dev *dev)
if (ret)
return ret;
- netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+ netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
mt7603_poll_tx, NAPI_POLL_WEIGHT);
napi_enable(&dev->mt76.tx_napi);
@@ -245,7 +245,7 @@ int mt7615_dma_init(struct mt7615_dev *dev)
if (ret < 0)
return ret;
- netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+ netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
mt7615_poll_tx, NAPI_POLL_WEIGHT);
napi_enable(&dev->mt76.tx_napi);
@@ -230,7 +230,7 @@ int mt76x02_dma_init(struct mt76x02_dev *dev)
if (ret)
return ret;
- netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+ netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
mt76x02_poll_tx, NAPI_POLL_WEIGHT);
napi_enable(&dev->mt76.tx_napi);
@@ -325,7 +325,7 @@ int mt7915_dma_init(struct mt7915_dev *dev)
if (ret < 0)
return ret;
- netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+ netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
mt7915_poll_tx, NAPI_POLL_WEIGHT);
napi_enable(&dev->mt76.tx_napi);
@@ -299,7 +299,7 @@ int mt7921_dma_init(struct mt7921_dev *dev)
if (ret < 0)
return ret;
- netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+ netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
mt7921_poll_tx, NAPI_POLL_WEIGHT);
napi_enable(&dev->mt76.tx_napi);
With threaded NAPI, the rx handler function is no longer bound to the CPU that fired the interrupt, which significantly helps to spread the workload over multiple CPUs, especially when multiple devices are using threaded NAPI at the same time. Exclude the tx handler from threaded NAPI by using a separate dummy netdev. The work is small and short-lived enough that it makes more sense to run it in softirq instead of creating a dedicated thread Signed-off-by: Felix Fietkau <nbd@nbd.name> --- drivers/net/wireless/mediatek/mt76/dma.c | 6 ++++-- drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/mt7603/dma.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7615/dma.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/dma.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7921/dma.c | 2 +- 7 files changed, 10 insertions(+), 7 deletions(-)