Message ID | 20250324173121.1275209-6-mizhang@google.com |
---|---|
State | New |
Headers | show |
Series | [v4,01/38] perf: Support get/put mediated PMU interfaces | expand |
On Fri, Apr 25, 2025, Peter Zijlstra wrote: > On Mon, Mar 24, 2025 at 05:30:45PM +0000, Mingwei Zhang wrote: > > > @@ -6040,6 +6041,71 @@ void perf_put_mediated_pmu(void) > > } > > EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); > > > > +static inline void perf_host_exit(struct perf_cpu_context *cpuctx) > > +{ > > + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > > + ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); > > + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > > + if (cpuctx->task_ctx) { > > + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > > + task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); > > + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > > + } > > +} > > + > > +/* When entering a guest, schedule out all exclude_guest events. */ > > +void perf_guest_enter(void) > > +{ > > + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > > + > > + lockdep_assert_irqs_disabled(); > > + > > + perf_ctx_lock(cpuctx, cpuctx->task_ctx); > > + > > + if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest))) > > + goto unlock; > > + > > + perf_host_exit(cpuctx); > > + > > + __this_cpu_write(perf_in_guest, true); > > + > > +unlock: > > + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > > +} > > +EXPORT_SYMBOL_GPL(perf_guest_enter); > > + > > +static inline void perf_host_enter(struct perf_cpu_context *cpuctx) > > +{ > > + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > > + if (cpuctx->task_ctx) > > + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > > + > > + perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST); > > + > > + if (cpuctx->task_ctx) > > + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > > + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > > +} > > + > > +void perf_guest_exit(void) > > +{ > > + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > > + > > + lockdep_assert_irqs_disabled(); > > + > > + perf_ctx_lock(cpuctx, cpuctx->task_ctx); > > + > > + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) > > + goto unlock; > > + > > + perf_host_enter(cpuctx); > > + > > + __this_cpu_write(perf_in_guest, false); > > +unlock: > > + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > > +} > > +EXPORT_SYMBOL_GPL(perf_guest_exit); > > This naming is confusing on purpose? Pick either guest/host and stick > with it. +1. I also think the inner perf_host_{enter,exit}() helpers are superflous. These flows After a bit of hacking, and with a few spoilers, this is what I ended up with (not anywhere near fully tested). I like following KVM's kvm_xxx_{load,put}() nomenclature to tie everything together, so I went with "guest" instead of "host" even though the majority of work being down is to shedule out/in host context. /* When loading a guest's mediated PMU, schedule out all exclude_guest events. */ void perf_load_guest_context(unsigned long data) { struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); lockdep_assert_irqs_disabled(); perf_ctx_lock(cpuctx, cpuctx->task_ctx); if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded))) goto unlock; perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); if (cpuctx->task_ctx) { perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); } arch_perf_load_guest_context(data); __this_cpu_write(guest_ctx_loaded, true); unlock: perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } EXPORT_SYMBOL_GPL(perf_load_guest_context); void perf_put_guest_context(void) { struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); lockdep_assert_irqs_disabled(); perf_ctx_lock(cpuctx, cpuctx->task_ctx); if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded))) goto unlock; arch_perf_put_guest_context(); perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); if (cpuctx->task_ctx) perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST); if (cpuctx->task_ctx) perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); __this_cpu_write(guest_ctx_loaded, false); unlock: perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } EXPORT_SYMBOL_GPL(perf_put_guest_context);
On 2025-05-14 7:19 p.m., Sean Christopherson wrote: > On Fri, Apr 25, 2025, Peter Zijlstra wrote: >> On Mon, Mar 24, 2025 at 05:30:45PM +0000, Mingwei Zhang wrote: >> >>> @@ -6040,6 +6041,71 @@ void perf_put_mediated_pmu(void) >>> } >>> EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); >>> >>> +static inline void perf_host_exit(struct perf_cpu_context *cpuctx) >>> +{ >>> + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); >>> + ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); >>> + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); >>> + if (cpuctx->task_ctx) { >>> + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); >>> + task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); >>> + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); >>> + } >>> +} >>> + >>> +/* When entering a guest, schedule out all exclude_guest events. */ >>> +void perf_guest_enter(void) >>> +{ >>> + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); >>> + >>> + lockdep_assert_irqs_disabled(); >>> + >>> + perf_ctx_lock(cpuctx, cpuctx->task_ctx); >>> + >>> + if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest))) >>> + goto unlock; >>> + >>> + perf_host_exit(cpuctx); >>> + >>> + __this_cpu_write(perf_in_guest, true); >>> + >>> +unlock: >>> + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); >>> +} >>> +EXPORT_SYMBOL_GPL(perf_guest_enter); >>> + >>> +static inline void perf_host_enter(struct perf_cpu_context *cpuctx) >>> +{ >>> + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); >>> + if (cpuctx->task_ctx) >>> + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); >>> + >>> + perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST); >>> + >>> + if (cpuctx->task_ctx) >>> + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); >>> + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); >>> +} >>> + >>> +void perf_guest_exit(void) >>> +{ >>> + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); >>> + >>> + lockdep_assert_irqs_disabled(); >>> + >>> + perf_ctx_lock(cpuctx, cpuctx->task_ctx); >>> + >>> + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) >>> + goto unlock; >>> + >>> + perf_host_enter(cpuctx); >>> + >>> + __this_cpu_write(perf_in_guest, false); >>> +unlock: >>> + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); >>> +} >>> +EXPORT_SYMBOL_GPL(perf_guest_exit); >> >> This naming is confusing on purpose? Pick either guest/host and stick >> with it. > > +1. I also think the inner perf_host_{enter,exit}() helpers are superflous. > These flows > > After a bit of hacking, and with a few spoilers, this is what I ended up with > (not anywhere near fully tested). I like following KVM's kvm_xxx_{load,put}() > nomenclature to tie everything together, so I went with "guest" instead of "host" > even though the majority of work being down is to shedule out/in host context. > > /* When loading a guest's mediated PMU, schedule out all exclude_guest events. */ > void perf_load_guest_context(unsigned long data) > { > struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > > lockdep_assert_irqs_disabled(); > > perf_ctx_lock(cpuctx, cpuctx->task_ctx); > > if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded))) > goto unlock; > > perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); > perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > if (cpuctx->task_ctx) { > perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); > perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > } > > arch_perf_load_guest_context(data); > > __this_cpu_write(guest_ctx_loaded, true); > > unlock: > perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > } > EXPORT_SYMBOL_GPL(perf_load_guest_context); > > void perf_put_guest_context(void) > { > struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > > lockdep_assert_irqs_disabled(); > > perf_ctx_lock(cpuctx, cpuctx->task_ctx); > > if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded))) > goto unlock; > > arch_perf_put_guest_context(); It will set the guest_ctx_loaded to false. The update_context_time() invoked in the perf_event_sched_in() will not get a chance to update the guest time. I think something as below should work. - Disable all in the PMU (disable global control) - schedule in the host counters (but not run yet since the global control of the PMU is disabled) - arch_perf_put_guest_context() - Enable all in the PMU (Enable global control. The host counters now start) void perf_put_guest_context(void) { struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); lockdep_assert_irqs_disabled(); perf_ctx_lock(cpuctx, cpuctx->task_ctx); if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded))) goto unlock; perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); if (cpuctx->task_ctx) perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST); arch_perf_put_guest_context(); if (cpuctx->task_ctx) perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); unlock: perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } Similar to the perf_load_guest_context(). - Disable all in the PMU (disable global control) - schedule out all the host counters - arch_perf_load_guest_context() - Enable all in the PMU (enable global control) void perf_load_guest_context(unsigned long data) { struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); lockdep_assert_irqs_disabled(); perf_ctx_lock(cpuctx, cpuctx->task_ctx); if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded))) goto unlock; perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); if (cpuctx->task_ctx) { perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); } arch_perf_load_guest_context(data); perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); if (cpuctx->task_ctx) perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); unlock: perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } Thanks, Kan > > perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > if (cpuctx->task_ctx) > perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > > perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST); > > if (cpuctx->task_ctx) > perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > > __this_cpu_write(guest_ctx_loaded, false); > unlock:> perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > } > EXPORT_SYMBOL_GPL(perf_put_guest_context);
On Thu, May 15, 2025, Kan Liang wrote: > On 2025-05-14 7:19 p.m., Sean Christopherson wrote: > >> This naming is confusing on purpose? Pick either guest/host and stick > >> with it. > > > > +1. I also think the inner perf_host_{enter,exit}() helpers are superflous. > > These flows > > > > After a bit of hacking, and with a few spoilers, this is what I ended up with > > (not anywhere near fully tested). I like following KVM's kvm_xxx_{load,put}() > > nomenclature to tie everything together, so I went with "guest" instead of "host" > > even though the majority of work being down is to shedule out/in host context. > > > > /* When loading a guest's mediated PMU, schedule out all exclude_guest events. */ > > void perf_load_guest_context(unsigned long data) > > { > > struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > > > > lockdep_assert_irqs_disabled(); > > > > perf_ctx_lock(cpuctx, cpuctx->task_ctx); > > > > if (WARN_ON_ONCE(__this_cpu_read(guest_ctx_loaded))) > > goto unlock; > > > > perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > > ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); > > perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > > if (cpuctx->task_ctx) { > > perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > > task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); > > perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > > } > > > > arch_perf_load_guest_context(data); > > > > __this_cpu_write(guest_ctx_loaded, true); > > > > unlock: > > perf_ctx_unlock(cpuctx, cpuctx->task_ctx); > > } > > EXPORT_SYMBOL_GPL(perf_load_guest_context); > > > > void perf_put_guest_context(void) > > { > > struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > > > > lockdep_assert_irqs_disabled(); > > > > perf_ctx_lock(cpuctx, cpuctx->task_ctx); > > > > if (WARN_ON_ONCE(!__this_cpu_read(guest_ctx_loaded))) > > goto unlock; > > > > arch_perf_put_guest_context(); > > It will set the guest_ctx_loaded to false. > The update_context_time() invoked in the perf_event_sched_in() will not > get a chance to update the guest time. The guest_ctx_loaded in arch/x86/events/core.c is a different variable, it just happens to have the same name. It's completely gross, but exposing guest_ctx_loaded outside of kernel/events/core.c didn't seem like a great alternative. If we wanted to use a single variable, then the writes in arch_perf_{load,put}_guest_context() can simply go away.
On 2025-05-21 3:55 p.m., Namhyung Kim wrote: > On Mon, Mar 24, 2025 at 05:30:45PM +0000, Mingwei Zhang wrote: >> From: Kan Liang <kan.liang@linux.intel.com> >> >> Only KVM knows the exact time when a guest is entering/exiting. Expose >> two interfaces to KVM to switch the ownership of the PMU resources. >> >> All the pinned events must be scheduled in first. Extend the >> perf_event_sched_in() helper to support extra flag, e.g., EVENT_GUEST. >> >> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> >> Signed-off-by: Mingwei Zhang <mizhang@google.com> >> --- >> include/linux/perf_event.h | 4 ++ >> kernel/events/core.c | 80 ++++++++++++++++++++++++++++++++++---- >> 2 files changed, 77 insertions(+), 7 deletions(-) >> >> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h >> index 7bda1e20be12..37187ee8e226 100644 >> --- a/include/linux/perf_event.h >> +++ b/include/linux/perf_event.h >> @@ -1822,6 +1822,8 @@ extern int perf_event_period(struct perf_event *event, u64 value); >> extern u64 perf_event_pause(struct perf_event *event, bool reset); >> int perf_get_mediated_pmu(void); >> void perf_put_mediated_pmu(void); >> +void perf_guest_enter(void); >> +void perf_guest_exit(void); >> #else /* !CONFIG_PERF_EVENTS: */ >> static inline void * >> perf_aux_output_begin(struct perf_output_handle *handle, >> @@ -1919,6 +1921,8 @@ static inline int perf_get_mediated_pmu(void) >> } >> >> static inline void perf_put_mediated_pmu(void) { } >> +static inline void perf_guest_enter(void) { } >> +static inline void perf_guest_exit(void) { } >> #endif >> >> #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) >> diff --git a/kernel/events/core.c b/kernel/events/core.c >> index 7a2115b2c5c1..d05487d465c9 100644 >> --- a/kernel/events/core.c >> +++ b/kernel/events/core.c >> @@ -2827,14 +2827,15 @@ static void task_ctx_sched_out(struct perf_event_context *ctx, >> >> static void perf_event_sched_in(struct perf_cpu_context *cpuctx, >> struct perf_event_context *ctx, >> - struct pmu *pmu) >> + struct pmu *pmu, >> + enum event_type_t event_type) >> { >> - ctx_sched_in(&cpuctx->ctx, pmu, EVENT_PINNED); >> + ctx_sched_in(&cpuctx->ctx, pmu, EVENT_PINNED | event_type); >> if (ctx) >> - ctx_sched_in(ctx, pmu, EVENT_PINNED); >> - ctx_sched_in(&cpuctx->ctx, pmu, EVENT_FLEXIBLE); >> + ctx_sched_in(ctx, pmu, EVENT_PINNED | event_type); >> + ctx_sched_in(&cpuctx->ctx, pmu, EVENT_FLEXIBLE | event_type); >> if (ctx) >> - ctx_sched_in(ctx, pmu, EVENT_FLEXIBLE); >> + ctx_sched_in(ctx, pmu, EVENT_FLEXIBLE | event_type); >> } >> >> /* >> @@ -2890,7 +2891,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, >> else if (event_type & EVENT_PINNED) >> ctx_sched_out(&cpuctx->ctx, pmu, EVENT_FLEXIBLE); >> >> - perf_event_sched_in(cpuctx, task_ctx, pmu); >> + perf_event_sched_in(cpuctx, task_ctx, pmu, 0); >> >> for_each_epc(epc, &cpuctx->ctx, pmu, 0) >> perf_pmu_enable(epc->pmu); >> @@ -4188,7 +4189,7 @@ static void perf_event_context_sched_in(struct task_struct *task) >> ctx_sched_out(&cpuctx->ctx, NULL, EVENT_FLEXIBLE); >> } >> >> - perf_event_sched_in(cpuctx, ctx, NULL); >> + perf_event_sched_in(cpuctx, ctx, NULL, 0); >> >> perf_ctx_sched_task_cb(cpuctx->task_ctx, true); >> >> @@ -6040,6 +6041,71 @@ void perf_put_mediated_pmu(void) >> } >> EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); >> >> +static inline void perf_host_exit(struct perf_cpu_context *cpuctx) >> +{ >> + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); >> + ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); >> + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); >> + if (cpuctx->task_ctx) { >> + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); >> + task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); >> + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); >> + } >> +} > > Cpu context and task context may have events in the same PMU. > How about this? > > perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > if (cpuctx->task_ctx) > perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); > > ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); > if (cpuctx->task_ctx) > task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); > > if (cpuctx->task_ctx) > perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > Yes, this is what has to be fixed in V4. No matter for load context or put context, I think the below steps have to be followed. - Disable both cpuctx->ctx and cpuctx->task_ctx - schedule in/out host counters and load/put guest countext - Enable both cpuctx->ctx and cpuctx->task_ctx A similar proposed can be found at https://lore.kernel.org/lkml/4aaf67ab-aa5c-41e6-bced-3cb000172c52@linux.intel.com/ Thanks, Kan
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7bda1e20be12..37187ee8e226 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1822,6 +1822,8 @@ extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); int perf_get_mediated_pmu(void); void perf_put_mediated_pmu(void); +void perf_guest_enter(void); +void perf_guest_exit(void); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, @@ -1919,6 +1921,8 @@ static inline int perf_get_mediated_pmu(void) } static inline void perf_put_mediated_pmu(void) { } +static inline void perf_guest_enter(void) { } +static inline void perf_guest_exit(void) { } #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7a2115b2c5c1..d05487d465c9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2827,14 +2827,15 @@ static void task_ctx_sched_out(struct perf_event_context *ctx, static void perf_event_sched_in(struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, - struct pmu *pmu) + struct pmu *pmu, + enum event_type_t event_type) { - ctx_sched_in(&cpuctx->ctx, pmu, EVENT_PINNED); + ctx_sched_in(&cpuctx->ctx, pmu, EVENT_PINNED | event_type); if (ctx) - ctx_sched_in(ctx, pmu, EVENT_PINNED); - ctx_sched_in(&cpuctx->ctx, pmu, EVENT_FLEXIBLE); + ctx_sched_in(ctx, pmu, EVENT_PINNED | event_type); + ctx_sched_in(&cpuctx->ctx, pmu, EVENT_FLEXIBLE | event_type); if (ctx) - ctx_sched_in(ctx, pmu, EVENT_FLEXIBLE); + ctx_sched_in(ctx, pmu, EVENT_FLEXIBLE | event_type); } /* @@ -2890,7 +2891,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, else if (event_type & EVENT_PINNED) ctx_sched_out(&cpuctx->ctx, pmu, EVENT_FLEXIBLE); - perf_event_sched_in(cpuctx, task_ctx, pmu); + perf_event_sched_in(cpuctx, task_ctx, pmu, 0); for_each_epc(epc, &cpuctx->ctx, pmu, 0) perf_pmu_enable(epc->pmu); @@ -4188,7 +4189,7 @@ static void perf_event_context_sched_in(struct task_struct *task) ctx_sched_out(&cpuctx->ctx, NULL, EVENT_FLEXIBLE); } - perf_event_sched_in(cpuctx, ctx, NULL); + perf_event_sched_in(cpuctx, ctx, NULL, 0); perf_ctx_sched_task_cb(cpuctx->task_ctx, true); @@ -6040,6 +6041,71 @@ void perf_put_mediated_pmu(void) } EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); +static inline void perf_host_exit(struct perf_cpu_context *cpuctx) +{ + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); + ctx_sched_out(&cpuctx->ctx, NULL, EVENT_GUEST); + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); + if (cpuctx->task_ctx) { + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); + task_ctx_sched_out(cpuctx->task_ctx, NULL, EVENT_GUEST); + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); + } +} + +/* When entering a guest, schedule out all exclude_guest events. */ +void perf_guest_enter(void) +{ + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); + + lockdep_assert_irqs_disabled(); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest))) + goto unlock; + + perf_host_exit(cpuctx); + + __this_cpu_write(perf_in_guest, true); + +unlock: + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); +} +EXPORT_SYMBOL_GPL(perf_guest_enter); + +static inline void perf_host_enter(struct perf_cpu_context *cpuctx) +{ + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); + if (cpuctx->task_ctx) + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); + + perf_event_sched_in(cpuctx, cpuctx->task_ctx, NULL, EVENT_GUEST); + + if (cpuctx->task_ctx) + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); +} + +void perf_guest_exit(void) +{ + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); + + lockdep_assert_irqs_disabled(); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) + goto unlock; + + perf_host_enter(cpuctx); + + __this_cpu_write(perf_in_guest, false); +unlock: + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); +} +EXPORT_SYMBOL_GPL(perf_guest_exit); + /* * Holding the top-level event's child_mutex means that any * descendant process that has inherited this event will block