Message ID | 1411724805-6304-2-git-send-email-yogesh.tillu@linaro.org |
---|---|
State | New |
Headers | show |
To be consistent with ODP naming conventions, I'd suggest changing the names of these routines: s/odph_perf_open_counter/odph_perf_counter_open/ s/odph_perf_read_counter/odph_perf_counter_read/ s/odph_perf_write_counter/odph_perf_counter_write/ s/odph_perf_close_counter/odph_perf_counter_close/ Also, is the intent here to only support access to a single perf counter (PMCNTENSET_EL0)? I'd expect these routines to include an argument that references the specific counter of interest rather than a void argument. Bill On Fri, Sep 26, 2014 at 4:46 AM, Yogesh Tillu <yogesh.tillu@linaro.org> wrote: > Signed-off-by: Yogesh Tillu <yogesh.tillu@linaro.org> > --- > helper/include/odph_perf.h | 118 > +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 118 insertions(+) > create mode 100644 helper/include/odph_perf.h > > diff --git a/helper/include/odph_perf.h b/helper/include/odph_perf.h > new file mode 100644 > index 0000000..3560924 > --- /dev/null > +++ b/helper/include/odph_perf.h > @@ -0,0 +1,118 @@ > +/* Copyright (c) 2014, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > + > +/** > + * @file > + * > + * ODP Performance Counter Direct access from Userspace Header > + */ > + > +#ifndef ODPH_PERF_H_ > +#define ODPH_PERF_H_ > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#if __aarch64__ /**< Check for ArmV8 */ > +#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /**< Enable Perf count reg */ > +#endif > + > +/** > + * Open Performance counter > + * > + * @note api to enable performance counters in system, this function does > + * enable sequence for respective arm versions > + * > + * @param void > + * > + * @return 0 if open successfully, otherwise -1 > + */ > +static inline int odph_perf_open_counter(void) > +{ > +#if __aarch64__ > +/* Performance Monitors Count Enable Set register bit 31:0 disable, > +1 enable */ > +asm volatile("msr pmcntenset_el0, %0" : : "r" > (ARMV8_PMCNTENSET_EL0_ENABLE)); > +return 0; > +#elif defined(__ARM_ARCH_7A__) > + return 0; > +#else > + #error Unsupported Architecture > + return -1; > +#endif > +} > + > +/** > + * Read Performance counter > + * > + * @note api to read performance cycle counters in system > + * > + * @param void > + * > + * @return cycle counter value if read successfully, otherwise -1 > + */ > +static inline uint64_t > +odph_perf_read_counter(void) > +{ > +uint64_t ret; > +#if defined __aarch64__ > + asm volatile("mrs %0, pmccntr_el0" : "=r" (ret)); > + return ret; > +#elif defined(__ARM_ARCH_7A__) > + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(ret)); > + return ret; > +#else > + #error Unsupported architecture/compiler! > + return -1; > +#endif > +} > + > +/** > + * Write Performance counter > + * > + * @note api to write value to Performance counter, > + * NA for now > + * > + * @param void > + * > + * @return 0 if written successfully, otherwise -1 > + */ > +static inline int odph_perf_write_counter(void) > +{ > +/* TODO: Stub, might be populated based on requirnment */ > +} > + > +/** > + * Close Performance counter > + * > + * @note api to perform close sequnce for cycle counters in system > + * > + * @param void > + * > + * @return 0 if close successfully, otherwise -1 > + */ > +static inline int odph_perf_close_counter(void) > +{ > +#if defined __aarch64__ > +/* Performance Monitors Count Enable Set register bit 31:0 disable, > +1 enable */ > + asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31)); > + /* Note above statement does not really clearing register. TODO */ > + return 0; > +#elif defined(__ARM_ARCH_7A__) > + return 0; > +#else > + #error Unsupported architecture/compiler! > + return -1; > +#endif > +} > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* ODPH_PERF_H_ */ > -- > 1.9.1 > > > _______________________________________________ > lng-odp mailing list > lng-odp@lists.linaro.org > http://lists.linaro.org/mailman/listinfo/lng-odp >
I assume that this is an initial prototype,. The open/close semantics suggest to me that I should open the name of the counter I want and it returns a handle that I can read and write to access the counter. This also addresses Bills point that we need to be able to access any one of potentially hundreds of counters in a specific SoC fastpath HW. Mike On 26 September 2014 06:31, Bill Fischofer <bill.fischofer@linaro.org> wrote: > To be consistent with ODP naming conventions, I'd suggest changing the > names of these routines: > > s/odph_perf_open_counter/odph_perf_counter_open/ > s/odph_perf_read_counter/odph_perf_counter_read/ > s/odph_perf_write_counter/odph_perf_counter_write/ > s/odph_perf_close_counter/odph_perf_counter_close/ > > Also, is the intent here to only support access to a single perf counter > (PMCNTENSET_EL0)? I'd expect these routines to include an argument that > references the specific counter of interest rather than a void argument. > > Bill > > > On Fri, Sep 26, 2014 at 4:46 AM, Yogesh Tillu <yogesh.tillu@linaro.org> > wrote: > >> Signed-off-by: Yogesh Tillu <yogesh.tillu@linaro.org> >> --- >> helper/include/odph_perf.h | 118 >> +++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 118 insertions(+) >> create mode 100644 helper/include/odph_perf.h >> >> diff --git a/helper/include/odph_perf.h b/helper/include/odph_perf.h >> new file mode 100644 >> index 0000000..3560924 >> --- /dev/null >> +++ b/helper/include/odph_perf.h >> @@ -0,0 +1,118 @@ >> +/* Copyright (c) 2014, Linaro Limited >> + * All rights reserved. >> + * >> + * SPDX-License-Identifier: BSD-3-Clause >> + */ >> + >> + >> +/** >> + * @file >> + * >> + * ODP Performance Counter Direct access from Userspace Header >> + */ >> + >> +#ifndef ODPH_PERF_H_ >> +#define ODPH_PERF_H_ >> +#ifdef __cplusplus >> +extern "C" { >> +#endif >> + >> +#if __aarch64__ /**< Check for ArmV8 */ >> +#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /**< Enable Perf count reg */ >> +#endif >> + >> +/** >> + * Open Performance counter >> + * >> + * @note api to enable performance counters in system, this function does >> + * enable sequence for respective arm versions >> + * >> + * @param void >> + * >> + * @return 0 if open successfully, otherwise -1 >> + */ >> +static inline int odph_perf_open_counter(void) >> +{ >> +#if __aarch64__ >> +/* Performance Monitors Count Enable Set register bit 31:0 disable, >> +1 enable */ >> +asm volatile("msr pmcntenset_el0, %0" : : "r" >> (ARMV8_PMCNTENSET_EL0_ENABLE)); >> +return 0; >> +#elif defined(__ARM_ARCH_7A__) >> + return 0; >> +#else >> + #error Unsupported Architecture >> + return -1; >> +#endif >> +} >> + >> +/** >> + * Read Performance counter >> + * >> + * @note api to read performance cycle counters in system >> + * >> + * @param void >> + * >> + * @return cycle counter value if read successfully, otherwise -1 >> + */ >> +static inline uint64_t >> +odph_perf_read_counter(void) >> +{ >> +uint64_t ret; >> +#if defined __aarch64__ >> + asm volatile("mrs %0, pmccntr_el0" : "=r" (ret)); >> + return ret; >> +#elif defined(__ARM_ARCH_7A__) >> + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(ret)); >> + return ret; >> +#else >> + #error Unsupported architecture/compiler! >> + return -1; >> +#endif >> +} >> + >> +/** >> + * Write Performance counter >> + * >> + * @note api to write value to Performance counter, >> + * NA for now >> + * >> + * @param void >> + * >> + * @return 0 if written successfully, otherwise -1 >> + */ >> +static inline int odph_perf_write_counter(void) >> +{ >> +/* TODO: Stub, might be populated based on requirnment */ >> +} >> + >> +/** >> + * Close Performance counter >> + * >> + * @note api to perform close sequnce for cycle counters in system >> + * >> + * @param void >> + * >> + * @return 0 if close successfully, otherwise -1 >> + */ >> +static inline int odph_perf_close_counter(void) >> +{ >> +#if defined __aarch64__ >> +/* Performance Monitors Count Enable Set register bit 31:0 disable, >> +1 enable */ >> + asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31)); >> + /* Note above statement does not really clearing register. TODO */ >> + return 0; >> +#elif defined(__ARM_ARCH_7A__) >> + return 0; >> +#else >> + #error Unsupported architecture/compiler! >> + return -1; >> +#endif >> +} >> + >> +#ifdef __cplusplus >> +} >> +#endif >> + >> +#endif /* ODPH_PERF_H_ */ >> -- >> 1.9.1 >> >> >> _______________________________________________ >> lng-odp mailing list >> lng-odp@lists.linaro.org >> http://lists.linaro.org/mailman/listinfo/lng-odp >> > > > _______________________________________________ > lng-odp mailing list > lng-odp@lists.linaro.org > http://lists.linaro.org/mailman/listinfo/lng-odp > >
Use cases, prerequisites, requirements, limitations, proposed design... We don't have to write a twenty page document for this but we need to have some clarity and agreement here. Application running in user space wants to measure how many HW events (such as cache misses or CPU cycles) have occurred between different points in the execution. This is useful for profiling specific code segments (e.g. from ODP dispatch to return) and is not used only for optimizing throughput but also in order to verify real-time (latency and jitter) characteristics as ODP in bare-metal-like environments may be used to replace dedicated real-time executives. Describing the use cases is important as Linux beliebers always will have problems understanding why the current solutions don't work for us and will be sceptical to accepting our patches. Must be able to enable user space access to PMU counters (as is needed on ARM). This is probably separate from the user space library. Preferably done without a kernel module, I can't understand why the kernel should not have this feature built in. Must be able to allocate and configure one *or several* HW PMU counters with CPU architecture-defined or CPU implementation specific event types. OK to be limited by physical resources (event types, number of PMU counters). Must be able to read those counters from user space with minimal SW overhead (i.e. no system calls as this introduces too much overhead as has been demonstrated by Yogesh). Expect application threads to be CPU affine and not be subject to CPU migration. No need to support migration of PMU configuration or counter values between CPU's. CPU migration of a thread to some other CPU which does not have user space access enable will lead to a crash (on ARM). Perhaps the library should automatically configure CPU affinity or warn if the thread is allowed to be migrated? Probably PMU setup is per thread and not per process. Different threads (running on different CPU's) could be doing event counting (with different configurations) simultaneously. Or possible the setup is per-process (i.e. shared between threads) but each thread/CPU has its own set of counters? Discuss. Stop counting and free resources, allowing other applications or threads to reuse the HW resources. We probably want the kernel to manage the HW resources (as the kernel is also a user of these resources), the user space library will have to allocate (or pre-allocate) PMU counters from the kernel. This can be done using perf_event_open() but has problems today since the kernel just allocates a virtual counter for you, not a physical counter. The virtual counter may be remapped to different physical counters. If we want to support PMU's outside of the CPU (e.g. on the interconnect or memory controller or accelerators), we probably want a thread-agnostic API as these PMU's have nothing to do with specific threads. I am not sure we even need a "fastpath" user space API for such PMU's, the kernel support and API might be sufficient. On 26 September 2014 12:52, Mike Holmes <mike.holmes@linaro.org> wrote: > I assume that this is an initial prototype,. > > The open/close semantics suggest to me that I should open the name of the > counter I want and it returns a handle that I can read and write to access > the counter. > This also addresses Bills point that we need to be able to access any one > of potentially hundreds of counters in a specific SoC fastpath HW. > > Mike > > On 26 September 2014 06:31, Bill Fischofer <bill.fischofer@linaro.org> > wrote: > >> To be consistent with ODP naming conventions, I'd suggest changing the >> names of these routines: >> >> s/odph_perf_open_counter/odph_perf_counter_open/ >> s/odph_perf_read_counter/odph_perf_counter_read/ >> s/odph_perf_write_counter/odph_perf_counter_write/ >> s/odph_perf_close_counter/odph_perf_counter_close/ >> >> Also, is the intent here to only support access to a single perf counter >> (PMCNTENSET_EL0)? I'd expect these routines to include an argument that >> references the specific counter of interest rather than a void argument. >> >> Bill >> >> >> On Fri, Sep 26, 2014 at 4:46 AM, Yogesh Tillu <yogesh.tillu@linaro.org> >> wrote: >> >>> Signed-off-by: Yogesh Tillu <yogesh.tillu@linaro.org> >>> --- >>> helper/include/odph_perf.h | 118 >>> +++++++++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 118 insertions(+) >>> create mode 100644 helper/include/odph_perf.h >>> >>> diff --git a/helper/include/odph_perf.h b/helper/include/odph_perf.h >>> new file mode 100644 >>> index 0000000..3560924 >>> --- /dev/null >>> +++ b/helper/include/odph_perf.h >>> @@ -0,0 +1,118 @@ >>> +/* Copyright (c) 2014, Linaro Limited >>> + * All rights reserved. >>> + * >>> + * SPDX-License-Identifier: BSD-3-Clause >>> + */ >>> + >>> + >>> +/** >>> + * @file >>> + * >>> + * ODP Performance Counter Direct access from Userspace Header >>> + */ >>> + >>> +#ifndef ODPH_PERF_H_ >>> +#define ODPH_PERF_H_ >>> +#ifdef __cplusplus >>> +extern "C" { >>> +#endif >>> + >>> +#if __aarch64__ /**< Check for ArmV8 */ >>> +#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /**< Enable Perf count reg >>> */ >>> +#endif >>> + >>> +/** >>> + * Open Performance counter >>> + * >>> + * @note api to enable performance counters in system, this function >>> does >>> + * enable sequence for respective arm versions >>> + * >>> + * @param void >>> + * >>> + * @return 0 if open successfully, otherwise -1 >>> + */ >>> +static inline int odph_perf_open_counter(void) >>> +{ >>> +#if __aarch64__ >>> +/* Performance Monitors Count Enable Set register bit 31:0 disable, >>> +1 enable */ >>> +asm volatile("msr pmcntenset_el0, %0" : : "r" >>> (ARMV8_PMCNTENSET_EL0_ENABLE)); >>> +return 0; >>> +#elif defined(__ARM_ARCH_7A__) >>> + return 0; >>> +#else >>> + #error Unsupported Architecture >>> + return -1; >>> +#endif >>> +} >>> + >>> +/** >>> + * Read Performance counter >>> + * >>> + * @note api to read performance cycle counters in system >>> + * >>> + * @param void >>> + * >>> + * @return cycle counter value if read successfully, otherwise -1 >>> + */ >>> +static inline uint64_t >>> +odph_perf_read_counter(void) >>> +{ >>> +uint64_t ret; >>> +#if defined __aarch64__ >>> + asm volatile("mrs %0, pmccntr_el0" : "=r" (ret)); >>> + return ret; >>> +#elif defined(__ARM_ARCH_7A__) >>> + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(ret)); >>> + return ret; >>> +#else >>> + #error Unsupported architecture/compiler! >>> + return -1; >>> +#endif >>> +} >>> + >>> +/** >>> + * Write Performance counter >>> + * >>> + * @note api to write value to Performance counter, >>> + * NA for now >>> + * >>> + * @param void >>> + * >>> + * @return 0 if written successfully, otherwise -1 >>> + */ >>> +static inline int odph_perf_write_counter(void) >>> +{ >>> +/* TODO: Stub, might be populated based on requirnment */ >>> +} >>> + >>> +/** >>> + * Close Performance counter >>> + * >>> + * @note api to perform close sequnce for cycle counters in system >>> + * >>> + * @param void >>> + * >>> + * @return 0 if close successfully, otherwise -1 >>> + */ >>> +static inline int odph_perf_close_counter(void) >>> +{ >>> +#if defined __aarch64__ >>> +/* Performance Monitors Count Enable Set register bit 31:0 disable, >>> +1 enable */ >>> + asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31)); >>> + /* Note above statement does not really clearing register. TODO >>> */ >>> + return 0; >>> +#elif defined(__ARM_ARCH_7A__) >>> + return 0; >>> +#else >>> + #error Unsupported architecture/compiler! >>> + return -1; >>> +#endif >>> +} >>> + >>> +#ifdef __cplusplus >>> +} >>> +#endif >>> + >>> +#endif /* ODPH_PERF_H_ */ >>> -- >>> 1.9.1 >>> >>> >>> _______________________________________________ >>> lng-odp mailing list >>> lng-odp@lists.linaro.org >>> http://lists.linaro.org/mailman/listinfo/lng-odp >>> >> >> >> _______________________________________________ >> lng-odp mailing list >> lng-odp@lists.linaro.org >> http://lists.linaro.org/mailman/listinfo/lng-odp >> >> > > > -- > *Mike Holmes* > Linaro Technical Manager / Lead > LNG - ODP > > _______________________________________________ > lng-odp mailing list > lng-odp@lists.linaro.org > http://lists.linaro.org/mailman/listinfo/lng-odp > >
diff --git a/helper/include/odph_perf.h b/helper/include/odph_perf.h new file mode 100644 index 0000000..3560924 --- /dev/null +++ b/helper/include/odph_perf.h @@ -0,0 +1,118 @@ +/* Copyright (c) 2014, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + +/** + * @file + * + * ODP Performance Counter Direct access from Userspace Header + */ + +#ifndef ODPH_PERF_H_ +#define ODPH_PERF_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#if __aarch64__ /**< Check for ArmV8 */ +#define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /**< Enable Perf count reg */ +#endif + +/** + * Open Performance counter + * + * @note api to enable performance counters in system, this function does + * enable sequence for respective arm versions + * + * @param void + * + * @return 0 if open successfully, otherwise -1 + */ +static inline int odph_perf_open_counter(void) +{ +#if __aarch64__ +/* Performance Monitors Count Enable Set register bit 31:0 disable, +1 enable */ +asm volatile("msr pmcntenset_el0, %0" : : "r" (ARMV8_PMCNTENSET_EL0_ENABLE)); +return 0; +#elif defined(__ARM_ARCH_7A__) + return 0; +#else + #error Unsupported Architecture + return -1; +#endif +} + +/** + * Read Performance counter + * + * @note api to read performance cycle counters in system + * + * @param void + * + * @return cycle counter value if read successfully, otherwise -1 + */ +static inline uint64_t +odph_perf_read_counter(void) +{ +uint64_t ret; +#if defined __aarch64__ + asm volatile("mrs %0, pmccntr_el0" : "=r" (ret)); + return ret; +#elif defined(__ARM_ARCH_7A__) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(ret)); + return ret; +#else + #error Unsupported architecture/compiler! + return -1; +#endif +} + +/** + * Write Performance counter + * + * @note api to write value to Performance counter, + * NA for now + * + * @param void + * + * @return 0 if written successfully, otherwise -1 + */ +static inline int odph_perf_write_counter(void) +{ +/* TODO: Stub, might be populated based on requirnment */ +} + +/** + * Close Performance counter + * + * @note api to perform close sequnce for cycle counters in system + * + * @param void + * + * @return 0 if close successfully, otherwise -1 + */ +static inline int odph_perf_close_counter(void) +{ +#if defined __aarch64__ +/* Performance Monitors Count Enable Set register bit 31:0 disable, +1 enable */ + asm volatile("msr pmcntenset_el0, %0" : : "r" (0<<31)); + /* Note above statement does not really clearing register. TODO */ + return 0; +#elif defined(__ARM_ARCH_7A__) + return 0; +#else + #error Unsupported architecture/compiler! + return -1; +#endif +} + +#ifdef __cplusplus +} +#endif + +#endif /* ODPH_PERF_H_ */
Signed-off-by: Yogesh Tillu <yogesh.tillu@linaro.org> --- helper/include/odph_perf.h | 118 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 helper/include/odph_perf.h