Message ID | 20210711190308.8476-1-xiyou.wangcong@gmail.com |
---|---|
State | New |
Headers | show |
Series | [net-next,v2] net_sched: introduce tracepoint trace_qdisc_enqueue() | expand |
On Mon, Jul 12, 2021 at 3:03 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > From: Qitao Xu <qitao.xu@bytedance.com> > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > the entrance of TC layer on TX side. This is kinda symmetric to > trace_qdisc_dequeue(), and together they can be used to calculate > the packet queueing latency. It is more accurate than > trace_net_dev_queue(), because we already successfully enqueue > the packet at that point. > > Note, trace ring buffer is only accessible to privileged users, > it is safe to use %px to print a real kernel address here. > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > Cc: Jiri Pirko <jiri@resnulli.us> > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > --- > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > net/core/dev.c | 9 +++++++++ > 2 files changed, 35 insertions(+) > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > index 58209557cb3a..c3006c6b4a87 100644 > --- a/include/trace/events/qdisc.h > +++ b/include/trace/events/qdisc.h > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > __entry->txq_state, __entry->packets, __entry->skbaddr ) > ); > > +TRACE_EVENT(qdisc_enqueue, > + > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > + > + TP_ARGS(qdisc, txq, skb), > + > + TP_STRUCT__entry( > + __field(struct Qdisc *, qdisc) > + __field(void *, skbaddr) > + __field(int, ifindex) > + __field(u32, handle) > + __field(u32, parent) > + ), > + > + TP_fast_assign( > + __entry->qdisc = qdisc; > + __entry->skbaddr = skb; > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > + __entry->handle = qdisc->handle; > + __entry->parent = qdisc->parent; > + ), Hi qitao, cong Why not support the txq, we get more info from txq. and we should take care of the return value of q->enqueue, because we can know what happens in the qdisc queue(not necessary to work with qdisc:dequeue). and we can use a tracepoint filter for the return value too. we should introduce a new function to instead of now codes, that may make the codes clean. Please review my patch for more info. https://patchwork.kernel.org/project/netdevbpf/patch/20210711050007.1200-1-xiangxia.m.yue@gmail.com/ > + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", > + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) > +); > + > TRACE_EVENT(qdisc_reset, > > TP_PROTO(struct Qdisc *q), > diff --git a/net/core/dev.c b/net/core/dev.c > index c253c2aafe97..20b9376de301 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -131,6 +131,7 @@ > #include <trace/events/napi.h> > #include <trace/events/net.h> > #include <trace/events/skb.h> > +#include <trace/events/qdisc.h> > #include <linux/inetdevice.h> > #include <linux/cpu_rmap.h> > #include <linux/static_key.h> > @@ -3864,6 +3865,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > if (unlikely(!nolock_qdisc_is_empty(q))) { > rc = q->enqueue(skb, q, &to_free) & > NET_XMIT_MASK; > + if (rc == NET_XMIT_SUCCESS) > + trace_qdisc_enqueue(q, txq, skb); > __qdisc_run(q); > qdisc_run_end(q); > > @@ -3880,6 +3883,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > } > > rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; > + if (rc == NET_XMIT_SUCCESS) > + trace_qdisc_enqueue(q, txq, skb); > + > qdisc_run(q); > > no_lock_out: > @@ -3924,6 +3930,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > rc = NET_XMIT_SUCCESS; > } else { > rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; > + if (rc == NET_XMIT_SUCCESS) > + trace_qdisc_enqueue(q, txq, skb); > + > if (qdisc_run_begin(q)) { > if (unlikely(contended)) { > spin_unlock(&q->busylock); > -- > 2.27.0 > -- Best regards, Tonghao
On Sun, Jul 11, 2021 at 5:50 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > On Mon, Jul 12, 2021 at 3:03 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > From: Qitao Xu <qitao.xu@bytedance.com> > > > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > > the entrance of TC layer on TX side. This is kinda symmetric to > > trace_qdisc_dequeue(), and together they can be used to calculate > > the packet queueing latency. It is more accurate than > > trace_net_dev_queue(), because we already successfully enqueue > > the packet at that point. > > > > Note, trace ring buffer is only accessible to privileged users, > > it is safe to use %px to print a real kernel address here. > > > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > > Cc: Jiri Pirko <jiri@resnulli.us> > > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > > --- > > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > > net/core/dev.c | 9 +++++++++ > > 2 files changed, 35 insertions(+) > > > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > > index 58209557cb3a..c3006c6b4a87 100644 > > --- a/include/trace/events/qdisc.h > > +++ b/include/trace/events/qdisc.h > > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > > __entry->txq_state, __entry->packets, __entry->skbaddr ) > > ); > > > > +TRACE_EVENT(qdisc_enqueue, > > + > > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > > + > > + TP_ARGS(qdisc, txq, skb), > > + > > + TP_STRUCT__entry( > > + __field(struct Qdisc *, qdisc) > > + __field(void *, skbaddr) > > + __field(int, ifindex) > > + __field(u32, handle) > > + __field(u32, parent) > > + ), > > + > > + TP_fast_assign( > > + __entry->qdisc = qdisc; > > + __entry->skbaddr = skb; > > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > > + __entry->handle = qdisc->handle; > > + __entry->parent = qdisc->parent; > > + ), > Hi qitao, cong > Why not support the txq, we get more info from txq. Because we only want to calculate queueing latency, not anything else. If you need it, you are welcome to add anything reasonable in the future, it won't break ABI (see 3dd344ea84e122f791ab). > and we should take care of the return value of q->enqueue, because we > can know what happens in the qdisc queue(not necessary to work with > qdisc:dequeue). > and we can use a tracepoint filter for the return value too. Disagree. Because we really have no interest in dropped packets. Even if we really do, we could trace kfree_skb(), not really here. > we should introduce a new function to instead of now codes, that may > make the codes clean. Please review my patch for more info. Just 3 lines of code, it is totally personal taste. > https://patchwork.kernel.org/project/netdevbpf/patch/20210711050007.1200-1-xiangxia.m.yue@gmail.com/ I did review it. Like I said, %p does not work. Have you tested your patches? ;) Thanks.
On 2021/7/12 3:03, Cong Wang wrote: > From: Qitao Xu <qitao.xu@bytedance.com> > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > the entrance of TC layer on TX side. This is kinda symmetric to > trace_qdisc_dequeue(), and together they can be used to calculate > the packet queueing latency. It is more accurate than > trace_net_dev_queue(), because we already successfully enqueue > the packet at that point. > > Note, trace ring buffer is only accessible to privileged users, > it is safe to use %px to print a real kernel address here. > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > Cc: Jiri Pirko <jiri@resnulli.us> > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > --- > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > net/core/dev.c | 9 +++++++++ > 2 files changed, 35 insertions(+) > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > index 58209557cb3a..c3006c6b4a87 100644 > --- a/include/trace/events/qdisc.h > +++ b/include/trace/events/qdisc.h > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > __entry->txq_state, __entry->packets, __entry->skbaddr ) > ); > > +TRACE_EVENT(qdisc_enqueue, > + > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > + > + TP_ARGS(qdisc, txq, skb), > + > + TP_STRUCT__entry( > + __field(struct Qdisc *, qdisc) > + __field(void *, skbaddr) > + __field(int, ifindex) > + __field(u32, handle) > + __field(u32, parent) > + ), > + > + TP_fast_assign( > + __entry->qdisc = qdisc; > + __entry->skbaddr = skb; > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > + __entry->handle = qdisc->handle; > + __entry->parent = qdisc->parent; > + ), > + > + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", > + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) > +); > + > TRACE_EVENT(qdisc_reset, > > TP_PROTO(struct Qdisc *q), > diff --git a/net/core/dev.c b/net/core/dev.c > index c253c2aafe97..20b9376de301 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -131,6 +131,7 @@ > #include <trace/events/napi.h> > #include <trace/events/net.h> > #include <trace/events/skb.h> > +#include <trace/events/qdisc.h> > #include <linux/inetdevice.h> > #include <linux/cpu_rmap.h> > #include <linux/static_key.h> > @@ -3864,6 +3865,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > if (unlikely(!nolock_qdisc_is_empty(q))) { > rc = q->enqueue(skb, q, &to_free) & > NET_XMIT_MASK; > + if (rc == NET_XMIT_SUCCESS) If NET_XMIT_CN is returned, the skb seems to be enqueued too? Also instead of checking the rc before calling the trace_*, maybe it make more sense to add the rc to the tracepoint, so that the checking is avoided, and we are able to tell the enqueuing result of a specific skb from that tracepoint too. > + trace_qdisc_enqueue(q, txq, skb); Does it make sense to wrap the about to something like: int sch_enqueue(....) { rc = q->enqueue(skb, q, &to_free).. .... trace_qdisc_enqueue(q, txq, skb); } So that the below code can reuse that wrapper too. > __qdisc_run(q); > qdisc_run_end(q); > > @@ -3880,6 +3883,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > } > > rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; > + if (rc == NET_XMIT_SUCCESS) > + trace_qdisc_enqueue(q, txq, skb); > + > qdisc_run(q); > > no_lock_out: > @@ -3924,6 +3930,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > rc = NET_XMIT_SUCCESS; > } else { > rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; > + if (rc == NET_XMIT_SUCCESS) > + trace_qdisc_enqueue(q, txq, skb); > + > if (qdisc_run_begin(q)) { > if (unlikely(contended)) { > spin_unlock(&q->busylock); >
On Mon, Jul 12, 2021 at 9:47 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > On Sun, Jul 11, 2021 at 5:50 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > On Mon, Jul 12, 2021 at 3:03 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > From: Qitao Xu <qitao.xu@bytedance.com> > > > > > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > > > the entrance of TC layer on TX side. This is kinda symmetric to > > > trace_qdisc_dequeue(), and together they can be used to calculate > > > the packet queueing latency. It is more accurate than > > > trace_net_dev_queue(), because we already successfully enqueue > > > the packet at that point. > > > > > > Note, trace ring buffer is only accessible to privileged users, > > > it is safe to use %px to print a real kernel address here. > > > > > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > > > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > > > Cc: Jiri Pirko <jiri@resnulli.us> > > > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > > > --- > > > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > > > net/core/dev.c | 9 +++++++++ > > > 2 files changed, 35 insertions(+) > > > > > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > > > index 58209557cb3a..c3006c6b4a87 100644 > > > --- a/include/trace/events/qdisc.h > > > +++ b/include/trace/events/qdisc.h > > > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > > > __entry->txq_state, __entry->packets, __entry->skbaddr ) > > > ); > > > > > > +TRACE_EVENT(qdisc_enqueue, > > > + > > > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > > > + > > > + TP_ARGS(qdisc, txq, skb), > > > + > > > + TP_STRUCT__entry( > > > + __field(struct Qdisc *, qdisc) > > > + __field(void *, skbaddr) > > > + __field(int, ifindex) > > > + __field(u32, handle) > > > + __field(u32, parent) > > > + ), > > > + > > > + TP_fast_assign( > > > + __entry->qdisc = qdisc; > > > + __entry->skbaddr = skb; > > > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > > > + __entry->handle = qdisc->handle; > > > + __entry->parent = qdisc->parent; > > > + ), > > Hi qitao, cong > > Why not support the txq, we get more info from txq. > > Because we only want to calculate queueing latency, not anything > else. If you need it, you are welcome to add anything reasonable > in the future, it won't break ABI (see 3dd344ea84e122f791ab). Thanks! > > and we should take care of the return value of q->enqueue, because we > > can know what happens in the qdisc queue(not necessary to work with > > qdisc:dequeue). > > and we can use a tracepoint filter for the return value too. > > Disagree. Because we really have no interest in dropped packets. > Even if we really do, we could trace kfree_skb(), not really here. The qdisc returns not only the NET_XMIT_DROP, right ? skbprio_enqueue, sfq_enqueue and red_enqueue may return the NET_XMIT_CN. > > we should introduce a new function to instead of now codes, that may > > make the codes clean. Please review my patch for more info. > > Just 3 lines of code, it is totally personal taste. > > > https://patchwork.kernel.org/project/netdevbpf/patch/20210711050007.1200-1-xiangxia.m.yue@gmail.com/ > > I did review it. Like I said, %p does not work. Have you tested your > patches? ;) Yes, I tested them, I didn't find the error. my patch is based on commit id 89212e160b81e778f829b89743570665810e3b13 > Thanks. -- Best regards, Tonghao
On Mon, Jul 12, 2021 at 11:01 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > On 2021/7/12 3:03, Cong Wang wrote: > > From: Qitao Xu <qitao.xu@bytedance.com> > > > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > > the entrance of TC layer on TX side. This is kinda symmetric to > > trace_qdisc_dequeue(), and together they can be used to calculate > > the packet queueing latency. It is more accurate than > > trace_net_dev_queue(), because we already successfully enqueue > > the packet at that point. > > > > Note, trace ring buffer is only accessible to privileged users, > > it is safe to use %px to print a real kernel address here. > > > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > > Cc: Jiri Pirko <jiri@resnulli.us> > > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > > --- > > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > > net/core/dev.c | 9 +++++++++ > > 2 files changed, 35 insertions(+) > > > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > > index 58209557cb3a..c3006c6b4a87 100644 > > --- a/include/trace/events/qdisc.h > > +++ b/include/trace/events/qdisc.h > > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > > __entry->txq_state, __entry->packets, __entry->skbaddr ) > > ); > > > > +TRACE_EVENT(qdisc_enqueue, > > + > > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > > + > > + TP_ARGS(qdisc, txq, skb), > > + > > + TP_STRUCT__entry( > > + __field(struct Qdisc *, qdisc) > > + __field(void *, skbaddr) > > + __field(int, ifindex) > > + __field(u32, handle) > > + __field(u32, parent) > > + ), > > + > > + TP_fast_assign( > > + __entry->qdisc = qdisc; > > + __entry->skbaddr = skb; > > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > > + __entry->handle = qdisc->handle; > > + __entry->parent = qdisc->parent; > > + ), > > + > > + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", > > + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) > > +); > > + > > TRACE_EVENT(qdisc_reset, > > > > TP_PROTO(struct Qdisc *q), > > diff --git a/net/core/dev.c b/net/core/dev.c > > index c253c2aafe97..20b9376de301 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -131,6 +131,7 @@ > > #include <trace/events/napi.h> > > #include <trace/events/net.h> > > #include <trace/events/skb.h> > > +#include <trace/events/qdisc.h> > > #include <linux/inetdevice.h> > > #include <linux/cpu_rmap.h> > > #include <linux/static_key.h> > > @@ -3864,6 +3865,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > > if (unlikely(!nolock_qdisc_is_empty(q))) { > > rc = q->enqueue(skb, q, &to_free) & > > NET_XMIT_MASK; > > + if (rc == NET_XMIT_SUCCESS) > > If NET_XMIT_CN is returned, the skb seems to be enqueued too? > > Also instead of checking the rc before calling the trace_*, maybe > it make more sense to add the rc to the tracepoint, so that the checking > is avoided, and we are able to tell the enqueuing result of a specific skb > from that tracepoint too. Yes, I will fix it. > > + trace_qdisc_enqueue(q, txq, skb); > > Does it make sense to wrap the about to something like: > > int sch_enqueue(....) > { > rc = q->enqueue(skb, q, &to_free).. > .... > trace_qdisc_enqueue(q, txq, skb); > } Yes, I agree, my patch uses qdisc_enqueue_skb, because __dev_xmit_skb invoke the qdisc_xxx api. https://patchwork.kernel.org/project/netdevbpf/patch/20210711050007.1200-1-xiangxia.m.yue@gmail.com/ > So that the below code can reuse that wrapper too. > > > __qdisc_run(q); > > qdisc_run_end(q); > > > > @@ -3880,6 +3883,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > > } > > > > rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; > > + if (rc == NET_XMIT_SUCCESS) > > + trace_qdisc_enqueue(q, txq, skb); > > + > > qdisc_run(q); > > > > no_lock_out: > > @@ -3924,6 +3930,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > > rc = NET_XMIT_SUCCESS; > > } else { > > rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; > > + if (rc == NET_XMIT_SUCCESS) > > + trace_qdisc_enqueue(q, txq, skb); > > + > > if (qdisc_run_begin(q)) { > > if (unlikely(contended)) { > > spin_unlock(&q->busylock); > > -- Best regards, Tonghao
On Sun, Jul 11, 2021 at 8:02 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > On Mon, Jul 12, 2021 at 9:47 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > On Sun, Jul 11, 2021 at 5:50 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > On Mon, Jul 12, 2021 at 3:03 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > From: Qitao Xu <qitao.xu@bytedance.com> > > > > > > > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > > > > the entrance of TC layer on TX side. This is kinda symmetric to > > > > trace_qdisc_dequeue(), and together they can be used to calculate > > > > the packet queueing latency. It is more accurate than > > > > trace_net_dev_queue(), because we already successfully enqueue > > > > the packet at that point. > > > > > > > > Note, trace ring buffer is only accessible to privileged users, > > > > it is safe to use %px to print a real kernel address here. > > > > > > > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > > > > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > > > > Cc: Jiri Pirko <jiri@resnulli.us> > > > > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > > > > --- > > > > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > > > > net/core/dev.c | 9 +++++++++ > > > > 2 files changed, 35 insertions(+) > > > > > > > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > > > > index 58209557cb3a..c3006c6b4a87 100644 > > > > --- a/include/trace/events/qdisc.h > > > > +++ b/include/trace/events/qdisc.h > > > > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > > > > __entry->txq_state, __entry->packets, __entry->skbaddr ) > > > > ); > > > > > > > > +TRACE_EVENT(qdisc_enqueue, > > > > + > > > > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > > > > + > > > > + TP_ARGS(qdisc, txq, skb), > > > > + > > > > + TP_STRUCT__entry( > > > > + __field(struct Qdisc *, qdisc) > > > > + __field(void *, skbaddr) > > > > + __field(int, ifindex) > > > > + __field(u32, handle) > > > > + __field(u32, parent) > > > > + ), > > > > + > > > > + TP_fast_assign( > > > > + __entry->qdisc = qdisc; > > > > + __entry->skbaddr = skb; > > > > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > > > > + __entry->handle = qdisc->handle; > > > > + __entry->parent = qdisc->parent; > > > > + ), > > > Hi qitao, cong > > > Why not support the txq, we get more info from txq. > > > > Because we only want to calculate queueing latency, not anything > > else. If you need it, you are welcome to add anything reasonable > > in the future, it won't break ABI (see 3dd344ea84e122f791ab). > Thanks! > > > and we should take care of the return value of q->enqueue, because we > > > can know what happens in the qdisc queue(not necessary to work with > > > qdisc:dequeue). > > > and we can use a tracepoint filter for the return value too. > > > > Disagree. Because we really have no interest in dropped packets. > > Even if we really do, we could trace kfree_skb(), not really here. > The qdisc returns not only the NET_XMIT_DROP, right ? > skbprio_enqueue, sfq_enqueue and red_enqueue may return the NET_XMIT_CN. > Sure, in that case a different packet is dropped, once again you can trace it with kfree_skb() if you want. What's the problem? > > > > we should introduce a new function to instead of now codes, that may > > > make the codes clean. Please review my patch for more info. > > > > Just 3 lines of code, it is totally personal taste. > > > > > https://patchwork.kernel.org/project/netdevbpf/patch/20210711050007.1200-1-xiangxia.m.yue@gmail.com/ > > > > I did review it. Like I said, %p does not work. Have you tested your > > patches? ;) > Yes, I tested them, I didn't find the error. my patch is based on > commit id 89212e160b81e778f829b89743570665810e3b13 I seriously doubt it, because we actually used %p in the beginning too and got the same address for two different packets, this is why we have to move to %px. It is 100% reproducible, so it probably means you didn't test it at all. Thanks.
On Sun, Jul 11, 2021 at 8:01 PM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > On 2021/7/12 3:03, Cong Wang wrote: > > From: Qitao Xu <qitao.xu@bytedance.com> > > > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > > the entrance of TC layer on TX side. This is kinda symmetric to > > trace_qdisc_dequeue(), and together they can be used to calculate > > the packet queueing latency. It is more accurate than > > trace_net_dev_queue(), because we already successfully enqueue > > the packet at that point. > > > > Note, trace ring buffer is only accessible to privileged users, > > it is safe to use %px to print a real kernel address here. > > > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > > Cc: Jiri Pirko <jiri@resnulli.us> > > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > > --- > > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > > net/core/dev.c | 9 +++++++++ > > 2 files changed, 35 insertions(+) > > > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > > index 58209557cb3a..c3006c6b4a87 100644 > > --- a/include/trace/events/qdisc.h > > +++ b/include/trace/events/qdisc.h > > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > > __entry->txq_state, __entry->packets, __entry->skbaddr ) > > ); > > > > +TRACE_EVENT(qdisc_enqueue, > > + > > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > > + > > + TP_ARGS(qdisc, txq, skb), > > + > > + TP_STRUCT__entry( > > + __field(struct Qdisc *, qdisc) > > + __field(void *, skbaddr) > > + __field(int, ifindex) > > + __field(u32, handle) > > + __field(u32, parent) > > + ), > > + > > + TP_fast_assign( > > + __entry->qdisc = qdisc; > > + __entry->skbaddr = skb; > > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > > + __entry->handle = qdisc->handle; > > + __entry->parent = qdisc->parent; > > + ), > > + > > + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", > > + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) > > +); > > + > > TRACE_EVENT(qdisc_reset, > > > > TP_PROTO(struct Qdisc *q), > > diff --git a/net/core/dev.c b/net/core/dev.c > > index c253c2aafe97..20b9376de301 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -131,6 +131,7 @@ > > #include <trace/events/napi.h> > > #include <trace/events/net.h> > > #include <trace/events/skb.h> > > +#include <trace/events/qdisc.h> > > #include <linux/inetdevice.h> > > #include <linux/cpu_rmap.h> > > #include <linux/static_key.h> > > @@ -3864,6 +3865,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > > if (unlikely(!nolock_qdisc_is_empty(q))) { > > rc = q->enqueue(skb, q, &to_free) & > > NET_XMIT_MASK; > > + if (rc == NET_XMIT_SUCCESS) > > If NET_XMIT_CN is returned, the skb seems to be enqueued too? Sure. See the other reply from on why dropped packets are not interesting here. > > Also instead of checking the rc before calling the trace_*, maybe > it make more sense to add the rc to the tracepoint, so that the checking > is avoided, and we are able to tell the enqueuing result of a specific skb > from that tracepoint too. Totally disagree, because trace_qdisc_dequeue() is only called for successful cases too (see dequeue_skb()), it does not make sense to let them be different. > > > + trace_qdisc_enqueue(q, txq, skb); > > Does it make sense to wrap the about to something like: Nope. Because ->enqueue() is called by lower layer qdisc's too, but here we only want to track root, aka, entrance of TC. I know this may be confusing, please blame trace_qdisc_dequeue() which only tracks the exit. ;) Thanks.
On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > On Sun, Jul 11, 2021 at 8:02 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > On Mon, Jul 12, 2021 at 9:47 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > On Sun, Jul 11, 2021 at 5:50 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > On Mon, Jul 12, 2021 at 3:03 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > > From: Qitao Xu <qitao.xu@bytedance.com> > > > > > > > > > > Tracepoint trace_qdisc_enqueue() is introduced to trace skb at > > > > > the entrance of TC layer on TX side. This is kinda symmetric to > > > > > trace_qdisc_dequeue(), and together they can be used to calculate > > > > > the packet queueing latency. It is more accurate than > > > > > trace_net_dev_queue(), because we already successfully enqueue > > > > > the packet at that point. > > > > > > > > > > Note, trace ring buffer is only accessible to privileged users, > > > > > it is safe to use %px to print a real kernel address here. > > > > > > > > > > Reviewed-by: Cong Wang <cong.wang@bytedance.com> > > > > > Cc: Jamal Hadi Salim <jhs@mojatatu.com> > > > > > Cc: Jiri Pirko <jiri@resnulli.us> > > > > > Signed-off-by: Qitao Xu <qitao.xu@bytedance.com> > > > > > --- > > > > > include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ > > > > > net/core/dev.c | 9 +++++++++ > > > > > 2 files changed, 35 insertions(+) > > > > > > > > > > diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h > > > > > index 58209557cb3a..c3006c6b4a87 100644 > > > > > --- a/include/trace/events/qdisc.h > > > > > +++ b/include/trace/events/qdisc.h > > > > > @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, > > > > > __entry->txq_state, __entry->packets, __entry->skbaddr ) > > > > > ); > > > > > > > > > > +TRACE_EVENT(qdisc_enqueue, > > > > > + > > > > > + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), > > > > > + > > > > > + TP_ARGS(qdisc, txq, skb), > > > > > + > > > > > + TP_STRUCT__entry( > > > > > + __field(struct Qdisc *, qdisc) > > > > > + __field(void *, skbaddr) > > > > > + __field(int, ifindex) > > > > > + __field(u32, handle) > > > > > + __field(u32, parent) > > > > > + ), > > > > > + > > > > > + TP_fast_assign( > > > > > + __entry->qdisc = qdisc; > > > > > + __entry->skbaddr = skb; > > > > > + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; > > > > > + __entry->handle = qdisc->handle; > > > > > + __entry->parent = qdisc->parent; > > > > > + ), > > > > Hi qitao, cong > > > > Why not support the txq, we get more info from txq. > > > > > > Because we only want to calculate queueing latency, not anything > > > else. If you need it, you are welcome to add anything reasonable > > > in the future, it won't break ABI (see 3dd344ea84e122f791ab). > > Thanks! > > > > and we should take care of the return value of q->enqueue, because we > > > > can know what happens in the qdisc queue(not necessary to work with > > > > qdisc:dequeue). > > > > and we can use a tracepoint filter for the return value too. > > > > > > Disagree. Because we really have no interest in dropped packets. > > > Even if we really do, we could trace kfree_skb(), not really here. > > The qdisc returns not only the NET_XMIT_DROP, right ? > > skbprio_enqueue, sfq_enqueue and red_enqueue may return the NET_XMIT_CN. > > > > Sure, in that case a different packet is dropped, once again you > can trace it with kfree_skb() if you want. What's the problem? It's ok, but we can make it better. Yunsheng Lin may have explained why? > > > > > > we should introduce a new function to instead of now codes, that may > > > > make the codes clean. Please review my patch for more info. > > > > > > Just 3 lines of code, it is totally personal taste. > > > > > > > https://patchwork.kernel.org/project/netdevbpf/patch/20210711050007.1200-1-xiangxia.m.yue@gmail.com/ > > > > > > I did review it. Like I said, %p does not work. Have you tested your > > > patches? ;) > > Yes, I tested them, I didn't find the error. my patch is based on > > commit id 89212e160b81e778f829b89743570665810e3b13 > > I seriously doubt it, because we actually used %p in the beginning > too and got the same address for two different packets, this is why > we have to move to %px. It is 100% reproducible, so it probably > means you didn't test it at all. I use iperf to send the packets, not found the same address really <idle>-0 [043] ..s. 62.493634: qdisc_enqueue: enqueue ifindex=2 qdisc handle=0x0 parent=0x2C skbaddr=00000000a40f93fb ret=0 <idle>-0 [043] ..s. 62.494641: qdisc_enqueue: enqueue ifindex=2 qdisc handle=0x0 parent=0x20 skbaddr=00000000c3c53e95 ret=0 <idle>-0 [014] ..s. 64.473877: qdisc_enqueue: enqueue ifindex=2 qdisc handle=0x0 parent=0x20 skbaddr=00000000ad610424 ret=0 <idle>-0 [014] ..s. 64.473896: qdisc_enqueue: enqueue ifindex=2 qdisc handle=0x0 parent=0x20 skbaddr=00000000112a562d ret=0 > > Thanks. -- Best regards, Tonghao
On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > Sure, in that case a different packet is dropped, once again you > > can trace it with kfree_skb() if you want. What's the problem? > It's ok, but we can make it better. Yunsheng Lin may have explained why? Why it is better to trace dropped packets both in enqueue and in kfree_skb()? I fail to see it. You are just asking for duplications. If you do not see it by yourself, it means you don't understand or need it at all. ;) Thanks.
On Mon, Jul 12, 2021 at 11:39 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > Sure, in that case a different packet is dropped, once again you > > > can trace it with kfree_skb() if you want. What's the problem? > > It's ok, but we can make it better. Yunsheng Lin may have explained why? > > Why it is better to trace dropped packets both in enqueue and in kfree_skb()? I mean we can use one tracepoint to know what happened in the queue, not necessary to trace enqueue and kfree_skb() If so, we must match when the packet is dropped and what packets. if we use the return value in trace_qdisc_requeue. It is easy to know what happened(when, where, what packets were dropped ). > I fail to see it. You are just asking for duplications. If you do not see it by > yourself, it means you don't understand or need it at all. ;) I added the tracepoint in centos 8 4.18 kernel version in our servers for a long time. > Thanks. -- Best regards, Tonghao
On Sun, Jul 11, 2021 at 8:49 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > On Mon, Jul 12, 2021 at 11:39 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > Sure, in that case a different packet is dropped, once again you > > > > can trace it with kfree_skb() if you want. What's the problem? > > > It's ok, but we can make it better. Yunsheng Lin may have explained why? > > > > Why it is better to trace dropped packets both in enqueue and in kfree_skb()? > I mean we can use one tracepoint to know what happened in the queue, > not necessary to trace enqueue and kfree_skb() This is wrong, packets can be dropped for other reasons too, tracing enqueue is clearly not sufficient even if you trace it unconditionally. For a quick example, codel drops packets at dequeue rather than enqueue. ;) > If so, we must match when the packet is dropped and what packets. if > we use the return value in trace_qdisc_requeue. It > is easy to know what happened(when, where, what packets were dropped ). I am afraid you have to watch the dropped packets. Thanks.
On Mon, Jul 12, 2021 at 12:02 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > On Sun, Jul 11, 2021 at 8:49 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > On Mon, Jul 12, 2021 at 11:39 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > Sure, in that case a different packet is dropped, once again you > > > > > can trace it with kfree_skb() if you want. What's the problem? > > > > It's ok, but we can make it better. Yunsheng Lin may have explained why? > > > > > > Why it is better to trace dropped packets both in enqueue and in kfree_skb()? > > I mean we can use one tracepoint to know what happened in the queue, > > not necessary to trace enqueue and kfree_skb() > > This is wrong, packets can be dropped for other reasons too, tracing no matter where the packet is dropped, we should allow user to know whether dropped in the enqueue. and the what the return value. > enqueue is clearly not sufficient even if you trace it unconditionally. > For a quick example, codel drops packets at dequeue rather than > enqueue. ;) > > > If so, we must match when the packet is dropped and what packets. if > > we use the return value in trace_qdisc_requeue. It > > is easy to know what happened(when, where, what packets were dropped ). > > I am afraid you have to watch the dropped packets. > > Thanks. -- Best regards, Tonghao
On Sun, Jul 11, 2021 at 9:12 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > On Mon, Jul 12, 2021 at 12:02 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > On Sun, Jul 11, 2021 at 8:49 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > On Mon, Jul 12, 2021 at 11:39 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > Sure, in that case a different packet is dropped, once again you > > > > > > can trace it with kfree_skb() if you want. What's the problem? > > > > > It's ok, but we can make it better. Yunsheng Lin may have explained why? > > > > > > > > Why it is better to trace dropped packets both in enqueue and in kfree_skb()? > > > I mean we can use one tracepoint to know what happened in the queue, > > > not necessary to trace enqueue and kfree_skb() > > > > This is wrong, packets can be dropped for other reasons too, tracing > no matter where the packet is dropped, we should allow user to know > whether dropped in the enqueue. and the > what the return value. Again you can know it by kfree_skb(). And you can not avoid kfree_skb() no matter how you change enqueue. So, I don't see your argument of saving kfree_skb() makes any sense here. Thanks.
On Mon, Jul 12, 2021 at 12:20 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > On Sun, Jul 11, 2021 at 9:12 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > On Mon, Jul 12, 2021 at 12:02 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > On Sun, Jul 11, 2021 at 8:49 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > On Mon, Jul 12, 2021 at 11:39 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > > On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > Sure, in that case a different packet is dropped, once again you > > > > > > > can trace it with kfree_skb() if you want. What's the problem? > > > > > > It's ok, but we can make it better. Yunsheng Lin may have explained why? > > > > > > > > > > Why it is better to trace dropped packets both in enqueue and in kfree_skb()? > > > > I mean we can use one tracepoint to know what happened in the queue, > > > > not necessary to trace enqueue and kfree_skb() > > > > > > This is wrong, packets can be dropped for other reasons too, tracing > > no matter where the packet is dropped, we should allow user to know > > whether dropped in the enqueue. and the > > what the return value. > > Again you can know it by kfree_skb(). And you can not avoid > kfree_skb() no matter how you change enqueue. So, I don't see your No, If I know what value returned for specified qdisc , I can know what happened, not necessarily kfree_skb() > argument of saving kfree_skb() makes any sense here. > > Thanks. -- Best regards, Tonghao
On Sun, Jul 11, 2021 at 9:41 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > On Mon, Jul 12, 2021 at 12:20 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > On Sun, Jul 11, 2021 at 9:12 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > On Mon, Jul 12, 2021 at 12:02 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > On Sun, Jul 11, 2021 at 8:49 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > On Mon, Jul 12, 2021 at 11:39 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > > > > On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > > > > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > Sure, in that case a different packet is dropped, once again you > > > > > > > > can trace it with kfree_skb() if you want. What's the problem? > > > > > > > It's ok, but we can make it better. Yunsheng Lin may have explained why? > > > > > > > > > > > > Why it is better to trace dropped packets both in enqueue and in kfree_skb()? > > > > > I mean we can use one tracepoint to know what happened in the queue, > > > > > not necessary to trace enqueue and kfree_skb() > > > > > > > > This is wrong, packets can be dropped for other reasons too, tracing > > > no matter where the packet is dropped, we should allow user to know > > > whether dropped in the enqueue. and the > > > what the return value. > > > > Again you can know it by kfree_skb(). And you can not avoid > > kfree_skb() no matter how you change enqueue. So, I don't see your > No, If I know what value returned for specified qdisc , I can know > what happened, not necessarily kfree_skb() This is wrong. You have to trace dropped packets because you need to know when to delete the key (skb address) from the hashtable you use to calculate the latency. You save the key on enqueue and remove it on both dequeue and kfree_skb, the only difference is you only need to calculate the timestamp difference for the former. Thanks.
On Mon, Jul 12, 2021 at 12:45 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > On Sun, Jul 11, 2021 at 9:41 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > On Mon, Jul 12, 2021 at 12:20 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > On Sun, Jul 11, 2021 at 9:12 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > On Mon, Jul 12, 2021 at 12:02 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > > On Sun, Jul 11, 2021 at 8:49 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > > > On Mon, Jul 12, 2021 at 11:39 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > > > > > > On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > > > > > > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > Sure, in that case a different packet is dropped, once again you > > > > > > > > > can trace it with kfree_skb() if you want. What's the problem? > > > > > > > > It's ok, but we can make it better. Yunsheng Lin may have explained why? > > > > > > > > > > > > > > Why it is better to trace dropped packets both in enqueue and in kfree_skb()? > > > > > > I mean we can use one tracepoint to know what happened in the queue, > > > > > > not necessary to trace enqueue and kfree_skb() > > > > > > > > > > This is wrong, packets can be dropped for other reasons too, tracing > > > > no matter where the packet is dropped, we should allow user to know > > > > whether dropped in the enqueue. and the > > > > what the return value. > > > > > > Again you can know it by kfree_skb(). And you can not avoid > > > kfree_skb() no matter how you change enqueue. So, I don't see your > > No, If I know what value returned for specified qdisc , I can know > > what happened, not necessarily kfree_skb() > > This is wrong. You have to trace dropped packets because you > need to know when to delete the key (skb address) from the hashtable > you use to calculate the latency. You save the key on enqueue and > remove it on both dequeue and kfree_skb, the only difference is you No, we can set the timestamp or cb in skb in enqueue and check them in dequeue. we may not use the hashtable. If we use hashtable, we still can check the return value, save them to hashtable or not. > only need to calculate the timestamp difference for the former. > > Thanks. -- Best regards, Tonghao
On Mon, Jul 12, 2021 at 6:57 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > On Mon, Jul 12, 2021 at 12:45 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > On Sun, Jul 11, 2021 at 9:41 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > On Mon, Jul 12, 2021 at 12:20 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > On Sun, Jul 11, 2021 at 9:12 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > On Mon, Jul 12, 2021 at 12:02 PM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > > > > On Sun, Jul 11, 2021 at 8:49 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > > > > > On Mon, Jul 12, 2021 at 11:39 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > > > > > > > > On Sun, Jul 11, 2021 at 8:36 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote: > > > > > > > > > > > > > > > > > > On Mon, Jul 12, 2021 at 11:23 AM Cong Wang <xiyou.wangcong@gmail.com> wrote: > > > > > > > > > > Sure, in that case a different packet is dropped, once again you > > > > > > > > > > can trace it with kfree_skb() if you want. What's the problem? > > > > > > > > > It's ok, but we can make it better. Yunsheng Lin may have explained why? > > > > > > > > > > > > > > > > Why it is better to trace dropped packets both in enqueue and in kfree_skb()? > > > > > > > I mean we can use one tracepoint to know what happened in the queue, > > > > > > > not necessary to trace enqueue and kfree_skb() > > > > > > > > > > > > This is wrong, packets can be dropped for other reasons too, tracing > > > > > no matter where the packet is dropped, we should allow user to know > > > > > whether dropped in the enqueue. and the > > > > > what the return value. > > > > > > > > Again you can know it by kfree_skb(). And you can not avoid > > > > kfree_skb() no matter how you change enqueue. So, I don't see your > > > No, If I know what value returned for specified qdisc , I can know > > > what happened, not necessarily kfree_skb() > > > > This is wrong. You have to trace dropped packets because you > > need to know when to delete the key (skb address) from the hashtable > > you use to calculate the latency. You save the key on enqueue and > > remove it on both dequeue and kfree_skb, the only difference is you > No, we can set the timestamp or cb in skb in enqueue and check them in dequeue. > we may not use the hashtable. Are you sure this is safe?? How do you ensure we have enough space in skb->cb[]? More importantly, why do we even modify skb in tracepoint? > If we use hashtable, we still can check the return value, save them to > hashtable or not. How many times do I have to repeat the return value of enqueue is not sufficient even if you trace it? Can't you just look at codel_dequeue() and tell me how a drop at dequeue can be reflected to enqueue? Thanks.
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 58209557cb3a..c3006c6b4a87 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); +TRACE_EVENT(qdisc_enqueue, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), + + TP_ARGS(qdisc, txq, skb), + + TP_STRUCT__entry( + __field(struct Qdisc *, qdisc) + __field(void *, skbaddr) + __field(int, ifindex) + __field(u32, handle) + __field(u32, parent) + ), + + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->skbaddr = skb; + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + ), + + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) +); + TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), diff --git a/net/core/dev.c b/net/core/dev.c index c253c2aafe97..20b9376de301 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -131,6 +131,7 @@ #include <trace/events/napi.h> #include <trace/events/net.h> #include <trace/events/skb.h> +#include <trace/events/qdisc.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> #include <linux/static_key.h> @@ -3864,6 +3865,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (unlikely(!nolock_qdisc_is_empty(q))) { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + if (rc == NET_XMIT_SUCCESS) + trace_qdisc_enqueue(q, txq, skb); __qdisc_run(q); qdisc_run_end(q); @@ -3880,6 +3883,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, } rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + if (rc == NET_XMIT_SUCCESS) + trace_qdisc_enqueue(q, txq, skb); + qdisc_run(q); no_lock_out: @@ -3924,6 +3930,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + if (rc == NET_XMIT_SUCCESS) + trace_qdisc_enqueue(q, txq, skb); + if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock);