Message ID | 1604498942-24274-5-git-send-email-magnus.karlsson@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | xsk: i40e: Tx performance improvements | expand |
Magnus Karlsson wrote: > From: Magnus Karlsson <magnus.karlsson@intel.com> > > Introduce one cache line worth of padding between the consumer pointer > and the flags field as well as between the flags field and the start > of the descriptors in all the lockless rings. This so that the x86 HW > adjacency prefetcher will not prefetch the adjacent pointer/field when > only one pointer/field is going to be used. This improves throughput > performance for the l2fwd sample app with 1% on my machine with HW > prefetching turned on in the BIOS. > > Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> > --- Acked-by: John Fastabend <john.fastabend@gmail.com> > net/xdp/xsk_queue.h | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h > index cdb9cf3..74fac80 100644 > --- a/net/xdp/xsk_queue.h > +++ b/net/xdp/xsk_queue.h > @@ -18,9 +18,11 @@ struct xdp_ring { > /* Hinder the adjacent cache prefetcher to prefetch the consumer > * pointer if the producer pointer is touched and vice versa. > */ > - u32 pad ____cacheline_aligned_in_smp; > + u32 pad1 ____cacheline_aligned_in_smp; > u32 consumer ____cacheline_aligned_in_smp; > + u32 pad2 ____cacheline_aligned_in_smp; > u32 flags; > + u32 pad3 ____cacheline_aligned_in_smp; > }; > > /* Used for the RX and TX queues for packets */ > -- > 2.7.4 > > _______________________________________________ > Intel-wired-lan mailing list > Intel-wired-lan@osuosl.org > https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index cdb9cf3..74fac80 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -18,9 +18,11 @@ struct xdp_ring { /* Hinder the adjacent cache prefetcher to prefetch the consumer * pointer if the producer pointer is touched and vice versa. */ - u32 pad ____cacheline_aligned_in_smp; + u32 pad1 ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; + u32 pad2 ____cacheline_aligned_in_smp; u32 flags; + u32 pad3 ____cacheline_aligned_in_smp; }; /* Used for the RX and TX queues for packets */