diff mbox series

[5/7] Traffic control using high-resolution timer issue

Message ID 20201001205141.8885-6-erez.geva.ext@siemens.com
State New
Headers show
Series TC-ETF support PTP clocks series | expand

Commit Message

Geva, Erez Oct. 1, 2020, 8:51 p.m. UTC
- Add new schedule function for Qdisc watchdog.
    The function avoids reprogram if watchdog already expire
    before new expire.

  - Use new schedule function in ETF.

  - Add ETF range value to kernel configuration.
    as the value is characteristic to Hardware.

Signed-off-by: Erez Geva <erez.geva.ext@siemens.com>
---
 include/net/pkt_sched.h |  2 ++
 net/sched/Kconfig       |  8 ++++++++
 net/sched/sch_api.c     | 33 +++++++++++++++++++++++++++++++++
 net/sched/sch_etf.c     | 10 ++++++++--
 4 files changed, 51 insertions(+), 2 deletions(-)

Comments

Thomas Gleixner Oct. 1, 2020, 11:07 p.m. UTC | #1
On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:

Issue? You again fail to decribe the problem.

>   - Add new schedule function for Qdisc watchdog.
>     The function avoids reprogram if watchdog already expire
>     before new expire.

Why can't the existing function not be changed to do that?

>   - Use new schedule function in ETF.
>
>   - Add ETF range value to kernel configuration.
>     as the value is characteristic to Hardware.

No. That's completely wrong. Hardware properties need to be established
at boot/runtime otherwise you can't build a kernel which runs on
different platforms.

> +void qdisc_watchdog_schedule_soon_ns(struct qdisc_watchdog *wd, u64 expires,
> +				     u64 delta_ns)

schedule soon? That sounds like schedule it sooner than later, but I
don't care.

> +{
> +	if (test_bit(__QDISC_STATE_DEACTIVATED,
> +		     &qdisc_root_sleeping(wd->qdisc)->state))
> +		return;
> +
> +	if (wd->last_expires == expires)
> +		return;

How is this supposed to be valid without checking whether the timer is
queued in the first place?

Maybe the function name should be schedule_soon_or_not()

> +	/**

Can you please use proper comment style? This is neither network comment
style nor the regular sane kernel comment style. It's kerneldoc comment
style which is reserved for function and struct documentation.

> +	 * If expires is in [0, now + delta_ns],
> +	 * do not program it.

This range info is just confusing. Just use plain english.

> +	 */
> +	if (expires <= ktime_to_ns(hrtimer_cb_get_time(&wd->timer)) + delta_ns)
> +		return;

So if the watchdog is NOT queued and expiry < now + delta_ns then this
returns and nothing starts the timer ever.

That might all be correct, but without any useful explanation it looks
completely bogus.

> +	/**
> +	 * If timer is already set in [0, expires + delta_ns],
> +	 * do not reprogram it.
> +	 */
> +	if (hrtimer_is_queued(&wd->timer) &&
> +	    wd->last_expires <= expires + delta_ns)
> +		return;
> +
> +	wd->last_expires = expires;
> +	hrtimer_start_range_ns(&wd->timer,
> +			       ns_to_ktime(expires),
> +			       delta_ns,
> +			       HRTIMER_MODE_ABS_PINNED);
> +}
> +EXPORT_SYMBOL(qdisc_watchdog_schedule_soon_ns);
> +
>  void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
>  {
>  	hrtimer_cancel(&wd->timer);
> diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
> index c48f91075b5c..48b2868c4672 100644
> --- a/net/sched/sch_etf.c
> +++ b/net/sched/sch_etf.c
> @@ -20,6 +20,11 @@
>  #include <net/pkt_sched.h>
>  #include <net/sock.h>
>  
> +#ifdef CONFIG_NET_SCH_ETF_TIMER_RANGE
> +#define NET_SCH_ETF_TIMER_RANGE CONFIG_NET_SCH_ETF_TIMER_RANGE
> +#else
> +#define NET_SCH_ETF_TIMER_RANGE (5 * NSEC_PER_USEC)
> +#endif
>  #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
>  #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
>  #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
> @@ -128,8 +133,9 @@ static void reset_watchdog(struct Qdisc *sch)
>  		return;
>  	}
>  
> -	next = ktime_sub_ns(skb->tstamp, q->delta);
> -	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
> +	next = ktime_sub_ns(skb->tstamp, q->delta + NET_SCH_ETF_TIMER_RANGE);
> +	qdisc_watchdog_schedule_soon_ns(&q->watchdog, ktime_to_ns(next),
> +					NET_SCH_ETF_TIMER_RANGE);

This is changing 5 things at once. That's just wrong.

patch 1: Add the new function and explain why it's required

patch 2: Make reset_watchdog() use it

patch 3: Add a mechanism to retrieve the magic hardware range from the
         underlying hardware driver and add that value to q->delta or
         set q->delta to it. Whatever makes sense. The current solution
         makes no sense at all

Thanks,

        tglx
diff mbox series

Patch

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index ac8c890a2657..4306c2773778 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -78,6 +78,8 @@  void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
 
 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
 				      u64 delta_ns);
+void qdisc_watchdog_schedule_soon_ns(struct qdisc_watchdog *wd, u64 expires,
+				     u64 delta_ns);
 
 static inline void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd,
 					      u64 expires)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a3b37d88800e..0f5261ee9e1b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -195,6 +195,14 @@  config NET_SCH_ETF
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_etf.
 
+config NET_SCH_ETF_TIMER_RANGE
+	int "ETF Watchdog time range delta in nano seconds"
+	depends on NET_SCH_ETF
+	default 5000
+	help
+	  Specify the time range delta for ETF watchdog
+	  Default is 5 microsecond
+
 config NET_SCH_TAPRIO
 	tristate "Time Aware Priority (taprio) Scheduler"
 	help
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ebf59ca1faab..80bd09555f5e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -645,6 +645,39 @@  void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
 }
 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
 
+void qdisc_watchdog_schedule_soon_ns(struct qdisc_watchdog *wd, u64 expires,
+				     u64 delta_ns)
+{
+	if (test_bit(__QDISC_STATE_DEACTIVATED,
+		     &qdisc_root_sleeping(wd->qdisc)->state))
+		return;
+
+	if (wd->last_expires == expires)
+		return;
+
+	/**
+	 * If expires is in [0, now + delta_ns],
+	 * do not program it.
+	 */
+	if (expires <= ktime_to_ns(hrtimer_cb_get_time(&wd->timer)) + delta_ns)
+		return;
+
+	/**
+	 * If timer is already set in [0, expires + delta_ns],
+	 * do not reprogram it.
+	 */
+	if (hrtimer_is_queued(&wd->timer) &&
+	    wd->last_expires <= expires + delta_ns)
+		return;
+
+	wd->last_expires = expires;
+	hrtimer_start_range_ns(&wd->timer,
+			       ns_to_ktime(expires),
+			       delta_ns,
+			       HRTIMER_MODE_ABS_PINNED);
+}
+EXPORT_SYMBOL(qdisc_watchdog_schedule_soon_ns);
+
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
 	hrtimer_cancel(&wd->timer);
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index c48f91075b5c..48b2868c4672 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -20,6 +20,11 @@ 
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 
+#ifdef CONFIG_NET_SCH_ETF_TIMER_RANGE
+#define NET_SCH_ETF_TIMER_RANGE CONFIG_NET_SCH_ETF_TIMER_RANGE
+#else
+#define NET_SCH_ETF_TIMER_RANGE (5 * NSEC_PER_USEC)
+#endif
 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
 #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
@@ -128,8 +133,9 @@  static void reset_watchdog(struct Qdisc *sch)
 		return;
 	}
 
-	next = ktime_sub_ns(skb->tstamp, q->delta);
-	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
+	next = ktime_sub_ns(skb->tstamp, q->delta + NET_SCH_ETF_TIMER_RANGE);
+	qdisc_watchdog_schedule_soon_ns(&q->watchdog, ktime_to_ns(next),
+					NET_SCH_ETF_TIMER_RANGE);
 }
 
 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)