diff mbox

[V2,1/2] bpf: control the trace data output on current cpu when perf sampling

Message ID 1444826277-94060-2-git-send-email-xiakaixu@huawei.com
State New
Headers show

Commit Message

Kaixu Xia Oct. 14, 2015, 12:37 p.m. UTC
This patch adds the flag sample_disable to control the trace data
output process when perf sampling. By setting this flag and
integrating with ebpf, we can control the data output process and
get the samples we are most interested in.

The bpf helper bpf_perf_event_sample_control() can control the
perf_event on current cpu.

Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
---
 include/linux/perf_event.h      |  1 +
 include/uapi/linux/bpf.h        |  5 +++++
 include/uapi/linux/perf_event.h |  3 ++-
 kernel/bpf/verifier.c           |  3 ++-
 kernel/events/core.c            | 13 +++++++++++++
 kernel/trace/bpf_trace.c        | 32 ++++++++++++++++++++++++++++++++
 6 files changed, 55 insertions(+), 2 deletions(-)

Comments

kernel test robot Oct. 14, 2015, 1:49 p.m. UTC | #1
Hi Kaixu,

[auto build test WARNING on net-next/master -- if it's inappropriate base, please suggest rules for selecting the more suitable base]

url:    https://github.com/0day-ci/linux/commits/Kaixu-Xia/bpf-enable-disable-events-stored-in-PERF_EVENT_ARRAY-maps-trace-data-output-when-perf-sampling/20151014-204158
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> kernel/trace/bpf_trace.c:239:29: sparse: symbol 'bpf_perf_event_sample_control_proto' was not declared. Should it be static?

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Alexei Starovoitov Oct. 14, 2015, 9:21 p.m. UTC | #2
On 10/14/15 5:37 AM, Kaixu Xia wrote:
> This patch adds the flag sample_disable to control the trace data
> output process when perf sampling. By setting this flag and
> integrating with ebpf, we can control the data output process and
> get the samples we are most interested in.
>
> The bpf helper bpf_perf_event_sample_control() can control the
> perf_event on current cpu.
>
> Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
...
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6337,6 +6337,9 @@ static int __perf_event_overflow(struct perf_event *event,
>   		irq_work_queue(&event->pending);
>   	}
>
> +	if (!atomic_read(&event->sample_disable))
> +		return ret;
> +

the condition check and the name are inconsistent.
It's either
if (!enabled) return
or
if (disabled) return

>   	if (event->overflow_handler)
>   		event->overflow_handler(event, data, regs);
>   	else
> @@ -7709,6 +7712,14 @@ static void account_event(struct perf_event *event)
>   	account_event_cpu(event, event->cpu);
>   }
>
> +static void perf_event_check_sample_flag(struct perf_event *event)
> +{
> +	if (event->attr.sample_disable == 1)
> +		atomic_set(&event->sample_disable, 0);
> +	else
> +		atomic_set(&event->sample_disable, 1);
> +}

why introduce new attribute for this?
we already have 'disabled' flag.

> +static u64 bpf_perf_event_sample_control(u64 r1, u64 index, u64 flag, u64 r4, u64 r5)
> +{
> +	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
> +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> +	struct perf_event *event;
> +
> +	if (unlikely(index >= array->map.max_entries))
> +		return -E2BIG;
> +
> +	event = (struct perf_event *)array->ptrs[index];
> +	if (!event)
> +		return -ENOENT;
> +
> +	if (flag)

please check only bit 0 and check that all other bits are zero as well
for future extensibility.

> +		atomic_dec(&event->sample_disable);

it should be atomic_dec_if_positive();

> +	else
> +		atomic_inc(&event->sample_disable);

and atomic_add_unless()
to make sure we don't wrap on either side.

> +const struct bpf_func_proto bpf_perf_event_sample_control_proto = {

static.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
diff mbox

Patch

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 092a0e8..dcbf7d5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -472,6 +472,7 @@  struct perf_event {
 	struct irq_work			pending;
 
 	atomic_t			event_limit;
+	atomic_t			sample_disable;
 
 	void (*destroy)(struct perf_event *);
 	struct rcu_head			rcu_head;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 564f1f0..e2c99c6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -287,6 +287,11 @@  enum bpf_func_id {
 	 * Return: realm if != 0
 	 */
 	BPF_FUNC_get_route_realm,
+
+	/**
+	 * u64 bpf_perf_event_sample_control(&map, index, flag)
+	 */
+	BPF_FUNC_perf_event_sample_control,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2881145..a2b9dd7 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -331,7 +331,8 @@  struct perf_event_attr {
 				comm_exec      :  1, /* flag comm events that are due to an exec */
 				use_clockid    :  1, /* use @clockid for time fields */
 				context_switch :  1, /* context switch data */
-				__reserved_1   : 37;
+				sample_disable :  1, /* don't output data on samples */
+				__reserved_1   : 36;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1d6b97b..3ffe630 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -245,6 +245,7 @@  static const struct {
 } func_limit[] = {
 	{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
 	{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
+	{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_sample_control},
 };
 
 static void print_verifier_state(struct verifier_env *env)
@@ -910,7 +911,7 @@  static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		 * don't allow any other map type to be passed into
 		 * the special func;
 		 */
-		if (bool_map != bool_func)
+		if (bool_func && bool_map != bool_func)
 			return -EINVAL;
 	}
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b11756f..942351c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6337,6 +6337,9 @@  static int __perf_event_overflow(struct perf_event *event,
 		irq_work_queue(&event->pending);
 	}
 
+	if (!atomic_read(&event->sample_disable))
+		return ret;
+
 	if (event->overflow_handler)
 		event->overflow_handler(event, data, regs);
 	else
@@ -7709,6 +7712,14 @@  static void account_event(struct perf_event *event)
 	account_event_cpu(event, event->cpu);
 }
 
+static void perf_event_check_sample_flag(struct perf_event *event)
+{
+	if (event->attr.sample_disable == 1)
+		atomic_set(&event->sample_disable, 0);
+	else
+		atomic_set(&event->sample_disable, 1);
+}
+
 /*
  * Allocate and initialize a event structure
  */
@@ -7840,6 +7851,8 @@  perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		}
 	}
 
+	perf_event_check_sample_flag(event);
+
 	return event;
 
 err_per_task:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0fe96c7..f261333 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -215,6 +215,36 @@  const struct bpf_func_proto bpf_perf_event_read_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static u64 bpf_perf_event_sample_control(u64 r1, u64 index, u64 flag, u64 r4, u64 r5)
+{
+	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct perf_event *event;
+
+	if (unlikely(index >= array->map.max_entries))
+		return -E2BIG;
+
+	event = (struct perf_event *)array->ptrs[index];
+	if (!event)
+		return -ENOENT;
+
+	if (flag)
+		atomic_dec(&event->sample_disable);
+	else
+		atomic_inc(&event->sample_disable);
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_perf_event_sample_control_proto = {
+	.func		= bpf_perf_event_sample_control,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -242,6 +272,8 @@  static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_perf_event_read:
 		return &bpf_perf_event_read_proto;
+	case BPF_FUNC_perf_event_sample_control:
+		return &bpf_perf_event_sample_control_proto;
 	default:
 		return NULL;
 	}