Message ID | 1544777941-24083-2-git-send-email-ilias.apalodimas@linaro.org |
---|---|
State | New |
Headers | show |
Series | [net-next,1/2] net: socionext: correctly recover txq after being full | expand |
On Fri, 14 Dec 2018 at 09:59, Ilias Apalodimas <ilias.apalodimas@linaro.org> wrote: > > Currently the driver issues 2 mmio reads to figure out the number of > transmitted packets and clean them. We can get rid of the expensive > reads since BIT 31 of the Tx descriptor can be used for that. > We can also remove the budget counting of Tx completions since all of > the descriptors are not deliberately processed. > This last sentence reflects what we discussed off-line, right? That reaping used Tx descriptors shouldn't be counted against the budget, but should only occur if the budget is > 0 after processing the Rx queue? </ilias.apalodimas@linaro.org> > Performance numbers using pktgen are: > size pre-patch(pps) post-patch(pps) > 64 362483 427916 > 128 358315 411686 > 256 352725 389683 > 512 215675 216464 > 1024 113812 114442 > > Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> > --- > drivers/net/ethernet/socionext/netsec.c | 97 ++++++++++++++----------- > 1 file changed, 54 insertions(+), 43 deletions(-) > > diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c > index 584a6b3f6542..05a0948ad929 100644 > --- a/drivers/net/ethernet/socionext/netsec.c > +++ b/drivers/net/ethernet/socionext/netsec.c > @@ -257,7 +257,6 @@ struct netsec_desc_ring { > dma_addr_t desc_dma; > struct netsec_desc *desc; > void *vaddr; > - u16 pkt_cnt; > u16 head, tail; > }; > > @@ -598,33 +597,26 @@ static void netsec_set_rx_de(struct netsec_priv *priv, > dring->desc[idx].len = desc->len; > } > > -static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) > +static bool netsec_clean_tx_dring(struct netsec_priv *priv) > { > struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; > unsigned int pkts, bytes; > - > - dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); > - > - if (dring->pkt_cnt < budget) > - budget = dring->pkt_cnt; > + struct netsec_de *entry; > + int tail = dring->tail; > + int cnt = 0; > > pkts = 0; > bytes = 0; > + entry = dring->vaddr + DESC_SZ * tail; > > - while (pkts < budget) { > + while (!(entry->attr & (1U << NETSEC_TX_SHIFT_OWN_FIELD)) && > + cnt < DESC_NUM) { > struct netsec_desc *desc; > - struct netsec_de *entry; > - int tail, eop; > - > - tail = dring->tail; > - > - /* move tail ahead */ > - dring->tail = (tail + 1) % DESC_NUM; > + int eop; > > desc = &dring->desc[tail]; > - entry = dring->vaddr + DESC_SZ * tail; > - > eop = (entry->attr >> NETSEC_TX_LAST) & 1; > + dma_rmb(); > > dma_unmap_single(priv->dev, desc->dma_addr, desc->len, > DMA_TO_DEVICE); > @@ -633,38 +625,51 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) > bytes += desc->skb->len; > dev_kfree_skb(desc->skb); > } > + /* clean up so netsec_uninit_pkt_dring() won't free the skb > + * again > + */ > *desc = (struct netsec_desc){}; > + > + /* entry->attr is not going to be accessed by the NIC until > + * netsec_set_tx_de() is called. No need for a dma_wmb() here > + */ > + entry->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; > + /* move tail ahead */ > + dring->tail = (tail + 1) % DESC_NUM; > + > + tail = dring->tail; > + entry = dring->vaddr + DESC_SZ * tail; > + cnt++; > } > - dring->pkt_cnt -= budget; > > - priv->ndev->stats.tx_packets += budget; > + if (!cnt) > + return false; > + > + /* reading the register clears the irq */ > + netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); > + > + priv->ndev->stats.tx_packets += cnt; > priv->ndev->stats.tx_bytes += bytes; > > - netdev_completed_queue(priv->ndev, budget, bytes); > + netdev_completed_queue(priv->ndev, cnt, bytes); > > - return budget; > + return true; > } > > -static int netsec_process_tx(struct netsec_priv *priv, int budget) > +static void netsec_process_tx(struct netsec_priv *priv) > { > struct net_device *ndev = priv->ndev; > - int new, done = 0; > + bool cleaned; > > - do { > - new = netsec_clean_tx_dring(priv, budget); > - done += new; > - budget -= new; > - } while (new); > + cleaned = netsec_clean_tx_dring(priv); > > - if (done && netif_queue_stopped(ndev)) { > + if (cleaned && netif_queue_stopped(ndev)) { > /* Make sure we update the value, anyone stopping the queue > * after this will read the proper consumer idx > */ > smp_wmb(); > netif_wake_queue(ndev); > } > - > - return done; > } > > static void *netsec_alloc_rx_data(struct netsec_priv *priv, > @@ -813,24 +818,17 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) > static int netsec_napi_poll(struct napi_struct *napi, int budget) > { > struct netsec_priv *priv; > - int tx, rx, done, todo; > + int rx, done, todo; > > priv = container_of(napi, struct netsec_priv, napi); > > + netsec_process_tx(priv); > + So shouldn't we do this *after* processing Rx, and only if there is budget left? > todo = budget; > do { > - if (!todo) > - break; > - > - tx = netsec_process_tx(priv, todo); > - todo -= tx; > - > - if (!todo) > - break; > - > rx = netsec_process_rx(priv, todo); > todo -= rx; > - } while (rx || tx); > + } while (rx); > > done = budget - todo; > > @@ -1007,7 +1005,6 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id) > > dring->head = 0; > dring->tail = 0; > - dring->pkt_cnt = 0; > > if (id == NETSEC_RING_TX) > netdev_reset_queue(priv->ndev); > @@ -1030,6 +1027,7 @@ static void netsec_free_dring(struct netsec_priv *priv, int id) > static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) > { > struct netsec_desc_ring *dring = &priv->desc_ring[id]; > + int i; > > dring->vaddr = dma_zalloc_coherent(priv->dev, DESC_SZ * DESC_NUM, > &dring->desc_dma, GFP_KERNEL); > @@ -1040,6 +1038,19 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) > if (!dring->desc) > goto err; > > + if (id == NETSEC_RING_TX) { > + for (i = 0; i < DESC_NUM; i++) { > + struct netsec_de *de; > + > + de = dring->vaddr + (DESC_SZ * i); > + /* de->attr is not going to be accessed by the NIC > + * until netsec_set_tx_de() is called. > + * No need for a dma_wmb() here > + */ > + de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; > + } > + } > + > return 0; > err: > netsec_free_dring(priv, id); > -- > 2.19.1 >
On Fri, Dec 14, 2018 at 11:59:00AM +0100, Ard Biesheuvel wrote: > On Fri, 14 Dec 2018 at 09:59, Ilias Apalodimas > <ilias.apalodimas@linaro.org> wrote: > > > > Currently the driver issues 2 mmio reads to figure out the number of > > transmitted packets and clean them. We can get rid of the expensive > > reads since BIT 31 of the Tx descriptor can be used for that. > > We can also remove the budget counting of Tx completions since all of > > the descriptors are not deliberately processed. > > > > This last sentence reflects what we discussed off-line, right? That > reaping used Tx descriptors shouldn't be counted against the budget, > but should only occur if the budget is > 0 after processing the Rx > queue? yes > > Performance numbers using pktgen are: > > size pre-patch(pps) post-patch(pps) > > 64 362483 427916 > > 128 358315 411686 > > 256 352725 389683 > > 512 215675 216464 > > 1024 113812 114442 > > > > Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> > > --- > > drivers/net/ethernet/socionext/netsec.c | 97 ++++++++++++++----------- > > 1 file changed, 54 insertions(+), 43 deletions(-) > > > > diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c > > index 584a6b3f6542..05a0948ad929 100644 > > --- a/drivers/net/ethernet/socionext/netsec.c > > +++ b/drivers/net/ethernet/socionext/netsec.c > > @@ -257,7 +257,6 @@ struct netsec_desc_ring { > > dma_addr_t desc_dma; > > struct netsec_desc *desc; > > void *vaddr; > > - u16 pkt_cnt; > > u16 head, tail; > > }; > > > > @@ -598,33 +597,26 @@ static void netsec_set_rx_de(struct netsec_priv *priv, > > dring->desc[idx].len = desc->len; > > } > > > > -static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) > > +static bool netsec_clean_tx_dring(struct netsec_priv *priv) > > { > > struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; > > unsigned int pkts, bytes; > > - > > - dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); > > - > > - if (dring->pkt_cnt < budget) > > - budget = dring->pkt_cnt; > > + struct netsec_de *entry; > > + int tail = dring->tail; > > + int cnt = 0; > > > > pkts = 0; > > bytes = 0; > > + entry = dring->vaddr + DESC_SZ * tail; > > > > - while (pkts < budget) { > > + while (!(entry->attr & (1U << NETSEC_TX_SHIFT_OWN_FIELD)) && > > + cnt < DESC_NUM) { > > struct netsec_desc *desc; > > - struct netsec_de *entry; > > - int tail, eop; > > - > > - tail = dring->tail; > > - > > - /* move tail ahead */ > > - dring->tail = (tail + 1) % DESC_NUM; > > + int eop; > > > > desc = &dring->desc[tail]; > > - entry = dring->vaddr + DESC_SZ * tail; > > - > > eop = (entry->attr >> NETSEC_TX_LAST) & 1; > > + dma_rmb(); > > > > dma_unmap_single(priv->dev, desc->dma_addr, desc->len, > > DMA_TO_DEVICE); > > @@ -633,38 +625,51 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) > > bytes += desc->skb->len; > > dev_kfree_skb(desc->skb); > > } > > + /* clean up so netsec_uninit_pkt_dring() won't free the skb > > + * again > > + */ > > *desc = (struct netsec_desc){}; > > + > > + /* entry->attr is not going to be accessed by the NIC until > > + * netsec_set_tx_de() is called. No need for a dma_wmb() here > > + */ > > + entry->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; > > + /* move tail ahead */ > > + dring->tail = (tail + 1) % DESC_NUM; > > + > > + tail = dring->tail; > > + entry = dring->vaddr + DESC_SZ * tail; > > + cnt++; > > } > > - dring->pkt_cnt -= budget; > > > > - priv->ndev->stats.tx_packets += budget; > > + if (!cnt) > > + return false; > > + > > + /* reading the register clears the irq */ > > + netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); > > + > > + priv->ndev->stats.tx_packets += cnt; > > priv->ndev->stats.tx_bytes += bytes; > > > > - netdev_completed_queue(priv->ndev, budget, bytes); > > + netdev_completed_queue(priv->ndev, cnt, bytes); > > > > - return budget; > > + return true; > > } > > > > -static int netsec_process_tx(struct netsec_priv *priv, int budget) > > +static void netsec_process_tx(struct netsec_priv *priv) > > { > > struct net_device *ndev = priv->ndev; > > - int new, done = 0; > > + bool cleaned; > > > > - do { > > - new = netsec_clean_tx_dring(priv, budget); > > - done += new; > > - budget -= new; > > - } while (new); > > + cleaned = netsec_clean_tx_dring(priv); > > > > - if (done && netif_queue_stopped(ndev)) { > > + if (cleaned && netif_queue_stopped(ndev)) { > > /* Make sure we update the value, anyone stopping the queue > > * after this will read the proper consumer idx > > */ > > smp_wmb(); > > netif_wake_queue(ndev); > > } > > - > > - return done; > > } > > > > static void *netsec_alloc_rx_data(struct netsec_priv *priv, > > @@ -813,24 +818,17 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) > > static int netsec_napi_poll(struct napi_struct *napi, int budget) > > { > > struct netsec_priv *priv; > > - int tx, rx, done, todo; > > + int rx, done, todo; > > > > priv = container_of(napi, struct netsec_priv, napi); > > > > + netsec_process_tx(priv); > > + > > So shouldn't we do this *after* processing Rx, and only if there is budget left? I am not really sure, this would drown Tx processing if you had a bunch of received packets that exhausted the budget. Intel 1gbit drivers are doing something similar. They reclaim Tx prior to processing Rx. The only thing that could be checked here i guess is keep the napi poll running if *all* Tx descriptors were processed at some point instead of re-enabling irqs. > > > todo = budget; > > do { > > - if (!todo) > > - break; > > - > > - tx = netsec_process_tx(priv, todo); > > - todo -= tx; > > - > > - if (!todo) > > - break; > > - > > rx = netsec_process_rx(priv, todo); > > todo -= rx; > > - } while (rx || tx); > > + } while (rx); > > > > done = budget - todo; > > > > @@ -1007,7 +1005,6 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id) > > > > dring->head = 0; > > dring->tail = 0; > > - dring->pkt_cnt = 0; > > > > if (id == NETSEC_RING_TX) > > netdev_reset_queue(priv->ndev); > > @@ -1030,6 +1027,7 @@ static void netsec_free_dring(struct netsec_priv *priv, int id) > > static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) > > { > > struct netsec_desc_ring *dring = &priv->desc_ring[id]; > > + int i; > > > > dring->vaddr = dma_zalloc_coherent(priv->dev, DESC_SZ * DESC_NUM, > > &dring->desc_dma, GFP_KERNEL); > > @@ -1040,6 +1038,19 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) > > if (!dring->desc) > > goto err; > > > > + if (id == NETSEC_RING_TX) { > > + for (i = 0; i < DESC_NUM; i++) { > > + struct netsec_de *de; > > + > > + de = dring->vaddr + (DESC_SZ * i); > > + /* de->attr is not going to be accessed by the NIC > > + * until netsec_set_tx_de() is called. > > + * No need for a dma_wmb() here > > + */ > > + de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; > > + } > > + } > > + > > return 0; > > err: > > netsec_free_dring(priv, id); > > -- > > 2.19.1 > >
On Fri, 14 Dec 2018 at 12:18, Ilias Apalodimas <ilias.apalodimas@linaro.org> wrote: > > On Fri, Dec 14, 2018 at 11:59:00AM +0100, Ard Biesheuvel wrote: > > On Fri, 14 Dec 2018 at 09:59, Ilias Apalodimas > > <ilias.apalodimas@linaro.org> wrote: > > > > > > Currently the driver issues 2 mmio reads to figure out the number of > > > transmitted packets and clean them. We can get rid of the expensive > > > reads since BIT 31 of the Tx descriptor can be used for that. > > > We can also remove the budget counting of Tx completions since all of > > > the descriptors are not deliberately processed. > > > > > > > This last sentence reflects what we discussed off-line, right? That > > reaping used Tx descriptors shouldn't be counted against the budget, > > but should only occur if the budget is > 0 after processing the Rx > > queue? > > yes > > > > Performance numbers using pktgen are: > > > size pre-patch(pps) post-patch(pps) > > > 64 362483 427916 > > > 128 358315 411686 > > > 256 352725 389683 > > > 512 215675 216464 > > > 1024 113812 114442 > > > > > > Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> > > > --- > > > drivers/net/ethernet/socionext/netsec.c | 97 ++++++++++++++----------- > > > 1 file changed, 54 insertions(+), 43 deletions(-) > > > > > > diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c > > > index 584a6b3f6542..05a0948ad929 100644 > > > --- a/drivers/net/ethernet/socionext/netsec.c > > > +++ b/drivers/net/ethernet/socionext/netsec.c > > > @@ -257,7 +257,6 @@ struct netsec_desc_ring { > > > dma_addr_t desc_dma; > > > struct netsec_desc *desc; > > > void *vaddr; > > > - u16 pkt_cnt; > > > u16 head, tail; > > > }; > > > > > > @@ -598,33 +597,26 @@ static void netsec_set_rx_de(struct netsec_priv *priv, > > > dring->desc[idx].len = desc->len; > > > } > > > > > > -static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) > > > +static bool netsec_clean_tx_dring(struct netsec_priv *priv) > > > { > > > struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; > > > unsigned int pkts, bytes; > > > - > > > - dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); > > > - > > > - if (dring->pkt_cnt < budget) > > > - budget = dring->pkt_cnt; > > > + struct netsec_de *entry; > > > + int tail = dring->tail; > > > + int cnt = 0; > > > > > > pkts = 0; > > > bytes = 0; > > > + entry = dring->vaddr + DESC_SZ * tail; > > > > > > - while (pkts < budget) { > > > + while (!(entry->attr & (1U << NETSEC_TX_SHIFT_OWN_FIELD)) && > > > + cnt < DESC_NUM) { > > > struct netsec_desc *desc; > > > - struct netsec_de *entry; > > > - int tail, eop; > > > - > > > - tail = dring->tail; > > > - > > > - /* move tail ahead */ > > > - dring->tail = (tail + 1) % DESC_NUM; > > > + int eop; > > > > > > desc = &dring->desc[tail]; > > > - entry = dring->vaddr + DESC_SZ * tail; > > > - > > > eop = (entry->attr >> NETSEC_TX_LAST) & 1; > > > + dma_rmb(); > > > > > > dma_unmap_single(priv->dev, desc->dma_addr, desc->len, > > > DMA_TO_DEVICE); > > > @@ -633,38 +625,51 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) > > > bytes += desc->skb->len; > > > dev_kfree_skb(desc->skb); > > > } > > > + /* clean up so netsec_uninit_pkt_dring() won't free the skb > > > + * again > > > + */ > > > *desc = (struct netsec_desc){}; > > > + > > > + /* entry->attr is not going to be accessed by the NIC until > > > + * netsec_set_tx_de() is called. No need for a dma_wmb() here > > > + */ > > > + entry->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; > > > + /* move tail ahead */ > > > + dring->tail = (tail + 1) % DESC_NUM; > > > + > > > + tail = dring->tail; > > > + entry = dring->vaddr + DESC_SZ * tail; > > > + cnt++; > > > } > > > - dring->pkt_cnt -= budget; > > > > > > - priv->ndev->stats.tx_packets += budget; > > > + if (!cnt) > > > + return false; > > > + > > > + /* reading the register clears the irq */ > > > + netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); > > > + > > > + priv->ndev->stats.tx_packets += cnt; > > > priv->ndev->stats.tx_bytes += bytes; > > > > > > - netdev_completed_queue(priv->ndev, budget, bytes); > > > + netdev_completed_queue(priv->ndev, cnt, bytes); > > > > > > - return budget; > > > + return true; > > > } > > > > > > -static int netsec_process_tx(struct netsec_priv *priv, int budget) > > > +static void netsec_process_tx(struct netsec_priv *priv) > > > { > > > struct net_device *ndev = priv->ndev; > > > - int new, done = 0; > > > + bool cleaned; > > > > > > - do { > > > - new = netsec_clean_tx_dring(priv, budget); > > > - done += new; > > > - budget -= new; > > > - } while (new); > > > + cleaned = netsec_clean_tx_dring(priv); > > > > > > - if (done && netif_queue_stopped(ndev)) { > > > + if (cleaned && netif_queue_stopped(ndev)) { > > > /* Make sure we update the value, anyone stopping the queue > > > * after this will read the proper consumer idx > > > */ > > > smp_wmb(); > > > netif_wake_queue(ndev); > > > } > > > - > > > - return done; > > > } > > > > > > static void *netsec_alloc_rx_data(struct netsec_priv *priv, > > > @@ -813,24 +818,17 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) > > > static int netsec_napi_poll(struct napi_struct *napi, int budget) > > > { > > > struct netsec_priv *priv; > > > - int tx, rx, done, todo; > > > + int rx, done, todo; > > > > > > priv = container_of(napi, struct netsec_priv, napi); > > > > > > + netsec_process_tx(priv); > > > + > > > > So shouldn't we do this *after* processing Rx, and only if there is budget left? > > I am not really sure, this would drown Tx processing if you had a bunch of > received packets that exhausted the budget. > Intel 1gbit drivers are doing something similar. They reclaim Tx prior to > processing Rx. The only thing that could be checked here i guess is keep the > napi poll running if *all* Tx descriptors were processed at some point instead > of re-enabling irqs. > The reason I suggest it is because you quoted it from the documentation :-) But if reality deviates from that, then sure, let's follow the examples of others.
Hi Ard, > > > > > > So shouldn't we do this *after* processing Rx, and only if there is budget left? > > > > I am not really sure, this would drown Tx processing if you had a bunch of > > received packets that exhausted the budget. > > Intel 1gbit drivers are doing something similar. They reclaim Tx prior to > > processing Rx. The only thing that could be checked here i guess is keep the > > napi poll running if *all* Tx descriptors were processed at some point instead > > of re-enabling irqs. > > > > The reason I suggest it is because you quoted it from the documentation :-) Yes i understand. I had my doubts as well. That's hy i tried following the example of another driver. > > But if reality deviates from that, then sure, let's follow the > examples of others. Agree. Unless someone has any objections i am fine with the current patches. Thanks! /Ilias
On Fri, 14 Dec 2018 at 08:33, Ilias Apalodimas <ilias.apalodimas@linaro.org> wrote: > > Hi Ard, > > > > > > > > So shouldn't we do this *after* processing Rx, and only if there is budget left? > > > > > > I am not really sure, this would drown Tx processing if you had a bunch of > > > received packets that exhausted the budget. > > > Intel 1gbit drivers are doing something similar. They reclaim Tx prior to > > > processing Rx. The only thing that could be checked here i guess is keep the > > > napi poll running if *all* Tx descriptors were processed at some point instead > > > of re-enabling irqs. > > > > > > > The reason I suggest it is because you quoted it from the documentation :-) > > Yes i understand. I had my doubts as well. > That's hy i tried following the example of another driver. > > > > > But if reality deviates from that, then sure, let's follow the > > examples of others. > > Agree. Unless someone has any objections i am fine with the current patches. > The idea was to do as much as we can in one irq and irqs disabled for most of the time. But the numbers are more important, so I am fine too. Thanks.
From: Ilias Apalodimas <ilias.apalodimas@linaro.org> Date: Fri, 14 Dec 2018 10:59:01 +0200 > Currently the driver issues 2 mmio reads to figure out the number of > transmitted packets and clean them. We can get rid of the expensive > reads since BIT 31 of the Tx descriptor can be used for that. > We can also remove the budget counting of Tx completions since all of > the descriptors are not deliberately processed. > > Performance numbers using pktgen are: > size pre-patch(pps) post-patch(pps) > 64 362483 427916 > 128 358315 411686 > 256 352725 389683 > 512 215675 216464 > 1024 113812 114442 > > Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> Applied.
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 584a6b3f6542..05a0948ad929 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -257,7 +257,6 @@ struct netsec_desc_ring { dma_addr_t desc_dma; struct netsec_desc *desc; void *vaddr; - u16 pkt_cnt; u16 head, tail; }; @@ -598,33 +597,26 @@ static void netsec_set_rx_de(struct netsec_priv *priv, dring->desc[idx].len = desc->len; } -static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) +static bool netsec_clean_tx_dring(struct netsec_priv *priv) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; unsigned int pkts, bytes; - - dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); - - if (dring->pkt_cnt < budget) - budget = dring->pkt_cnt; + struct netsec_de *entry; + int tail = dring->tail; + int cnt = 0; pkts = 0; bytes = 0; + entry = dring->vaddr + DESC_SZ * tail; - while (pkts < budget) { + while (!(entry->attr & (1U << NETSEC_TX_SHIFT_OWN_FIELD)) && + cnt < DESC_NUM) { struct netsec_desc *desc; - struct netsec_de *entry; - int tail, eop; - - tail = dring->tail; - - /* move tail ahead */ - dring->tail = (tail + 1) % DESC_NUM; + int eop; desc = &dring->desc[tail]; - entry = dring->vaddr + DESC_SZ * tail; - eop = (entry->attr >> NETSEC_TX_LAST) & 1; + dma_rmb(); dma_unmap_single(priv->dev, desc->dma_addr, desc->len, DMA_TO_DEVICE); @@ -633,38 +625,51 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) bytes += desc->skb->len; dev_kfree_skb(desc->skb); } + /* clean up so netsec_uninit_pkt_dring() won't free the skb + * again + */ *desc = (struct netsec_desc){}; + + /* entry->attr is not going to be accessed by the NIC until + * netsec_set_tx_de() is called. No need for a dma_wmb() here + */ + entry->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; + /* move tail ahead */ + dring->tail = (tail + 1) % DESC_NUM; + + tail = dring->tail; + entry = dring->vaddr + DESC_SZ * tail; + cnt++; } - dring->pkt_cnt -= budget; - priv->ndev->stats.tx_packets += budget; + if (!cnt) + return false; + + /* reading the register clears the irq */ + netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); + + priv->ndev->stats.tx_packets += cnt; priv->ndev->stats.tx_bytes += bytes; - netdev_completed_queue(priv->ndev, budget, bytes); + netdev_completed_queue(priv->ndev, cnt, bytes); - return budget; + return true; } -static int netsec_process_tx(struct netsec_priv *priv, int budget) +static void netsec_process_tx(struct netsec_priv *priv) { struct net_device *ndev = priv->ndev; - int new, done = 0; + bool cleaned; - do { - new = netsec_clean_tx_dring(priv, budget); - done += new; - budget -= new; - } while (new); + cleaned = netsec_clean_tx_dring(priv); - if (done && netif_queue_stopped(ndev)) { + if (cleaned && netif_queue_stopped(ndev)) { /* Make sure we update the value, anyone stopping the queue * after this will read the proper consumer idx */ smp_wmb(); netif_wake_queue(ndev); } - - return done; } static void *netsec_alloc_rx_data(struct netsec_priv *priv, @@ -813,24 +818,17 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) static int netsec_napi_poll(struct napi_struct *napi, int budget) { struct netsec_priv *priv; - int tx, rx, done, todo; + int rx, done, todo; priv = container_of(napi, struct netsec_priv, napi); + netsec_process_tx(priv); + todo = budget; do { - if (!todo) - break; - - tx = netsec_process_tx(priv, todo); - todo -= tx; - - if (!todo) - break; - rx = netsec_process_rx(priv, todo); todo -= rx; - } while (rx || tx); + } while (rx); done = budget - todo; @@ -1007,7 +1005,6 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id) dring->head = 0; dring->tail = 0; - dring->pkt_cnt = 0; if (id == NETSEC_RING_TX) netdev_reset_queue(priv->ndev); @@ -1030,6 +1027,7 @@ static void netsec_free_dring(struct netsec_priv *priv, int id) static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) { struct netsec_desc_ring *dring = &priv->desc_ring[id]; + int i; dring->vaddr = dma_zalloc_coherent(priv->dev, DESC_SZ * DESC_NUM, &dring->desc_dma, GFP_KERNEL); @@ -1040,6 +1038,19 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) if (!dring->desc) goto err; + if (id == NETSEC_RING_TX) { + for (i = 0; i < DESC_NUM; i++) { + struct netsec_de *de; + + de = dring->vaddr + (DESC_SZ * i); + /* de->attr is not going to be accessed by the NIC + * until netsec_set_tx_de() is called. + * No need for a dma_wmb() here + */ + de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; + } + } + return 0; err: netsec_free_dring(priv, id);
Currently the driver issues 2 mmio reads to figure out the number of transmitted packets and clean them. We can get rid of the expensive reads since BIT 31 of the Tx descriptor can be used for that. We can also remove the budget counting of Tx completions since all of the descriptors are not deliberately processed. Performance numbers using pktgen are: size pre-patch(pps) post-patch(pps) 64 362483 427916 128 358315 411686 256 352725 389683 512 215675 216464 1024 113812 114442 Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> --- drivers/net/ethernet/socionext/netsec.c | 97 ++++++++++++++----------- 1 file changed, 54 insertions(+), 43 deletions(-) -- 2.19.1