new file mode 100644
@@ -0,0 +1,42 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM latency
+
+#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HIST_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(latency_template,
+ TP_PROTO(int cpu, cycles_t latency),
+
+ TP_ARGS(cpu, latency),
+
+ TP_STRUCT__entry(
+ __field(int, cpu)
+ __field(cycles_t, latency)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->latency = latency;
+ ),
+
+ TP_printk("cpu=%d, latency=%lu", __entry->cpu, __entry->latency)
+);
+
+DEFINE_EVENT(latency_template, latency_irqs,
+ TP_PROTO(int cpu, cycles_t latency),
+ TP_ARGS(cpu, latency));
+
+DEFINE_EVENT(latency_template, latency_preempt,
+ TP_PROTO(int cpu, cycles_t latency),
+ TP_ARGS(cpu, latency));
+
+DEFINE_EVENT(latency_template, latency_critical_timings,
+ TP_PROTO(int cpu, cycles_t latency),
+ TP_ARGS(cpu, latency));
+
+#endif /* _TRACE_HIST_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
@@ -14,13 +14,20 @@
#include <linux/module.h>
#include <linux/ftrace.h>
+#include <trace/events/sched.h>
+
#include "trace.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/latency.h>
+
static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;
static DEFINE_PER_CPU(int, tracing_cpu);
-
+static DEFINE_PER_CPU(cycle_t, ts_irqs);
+static DEFINE_PER_CPU(cycle_t, ts_preempt);
+static DEFINE_PER_CPU(cycle_t, ts_critical_timings);
static DEFINE_RAW_SPINLOCK(max_trace_lock);
enum {
@@ -419,9 +426,22 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
atomic_dec(&data->disabled);
}
+static inline void mark_timestamp(cycle_t __percpu *ts)
+{
+ int cpu = raw_smp_processor_id();
+ per_cpu(*ts, cpu) = ftrace_now(cpu);
+}
+
+static inline cycle_t get_delta(int cpu, cycle_t __percpu *ts)
+{
+ return ftrace_now(cpu) - per_cpu(*ts, cpu);
+}
/* start and stop critical timings used to for stoppage (in idle) */
void start_critical_timings(void)
{
+ if (trace_latency_critical_timings_enabled())
+ mark_timestamp(&ts_critical_timings);
+
if (preempt_trace() || irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -431,6 +451,13 @@ void stop_critical_timings(void)
{
if (preempt_trace() || irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+
+ if (trace_latency_critical_timings_enabled()) {
+ int cpu = raw_smp_processor_id();
+ trace_latency_critical_timings(cpu,
+ get_delta(cpu, &ts_critical_timings));
+ }
+
}
EXPORT_SYMBOL_GPL(stop_critical_timings);
@@ -438,6 +465,10 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
#ifdef CONFIG_PROVE_LOCKING
void time_hardirqs_on(unsigned long a0, unsigned long a1)
{
+ if (trace_latency_irqs_enabled()) {
+ int cpu = raw_smp_processor_id();
+ trace_latency_irqs(cpu, get_delta(cpu, &ts_irqs));
+ }
if (!preempt_trace() && irq_trace())
stop_critical_timing(a0, a1);
}
@@ -446,6 +477,10 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(a0, a1);
+
+ if (trace_latency_irqs_enabled()) {
+ mark_timestamp(&ts_irqs);
+ }
}
#else /* !CONFIG_PROVE_LOCKING */
@@ -500,9 +535,16 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
#endif /* CONFIG_PROVE_LOCKING */
#endif /* CONFIG_IRQSOFF_TRACER */
+int count = 0;
+
#ifdef CONFIG_PREEMPT_TRACER
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
+ if (trace_latency_preempt_enabled()) {
+ int cpu = raw_smp_processor_id();
+ trace_latency_preempt(cpu, get_delta(cpu, &ts_preempt));
+ }
+
if (preempt_trace() && !irq_trace())
stop_critical_timing(a0, a1);
}
@@ -511,6 +553,10 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
{
if (preempt_trace() && !irq_trace())
start_critical_timing(a0, a1);
+
+ if (trace_latency_preempt_enabled()) {
+ mark_timestamp(&ts_preempt);
+ }
}
#endif /* CONFIG_PREEMPT_TRACER */
This work is based on work by Daniel Wagner. A few tracepoints are added at the end of the critical section. With the hist trigger in place, the hist trigger plots may be generated, with per-cpu breakdown of events captured. It is based on linux kernel's event infrastructure. The following filter(s) may be used 'hist:key=latency.log2:val=hitcount:sort=latency' 'hist:key=cpu,latency:val=hitcount:sort=latency if cpu==1' 'hist:key=common_pid.execname' This captures only the latencies introduced by disabled irqs and preemption. Additional per process data has to be captured to calculate the effective latencies introduced for individual processes. Initial work - latency.patch [1] https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit/?h=v3.14-rt-rebase&id=56d50cc34943bbba12b8c5942ee1ae3b29f73acb Signed-off-by: Binoy Jayan <binoy.jayan@linaro.org> --- include/trace/events/latency.h | 42 ++++++++++++++++++++++++++++++++++++ kernel/trace/trace_irqsoff.c | 48 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/latency.h -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project